diff --git a/R/gmum.errors.R b/R/gmum.errors.R new file mode 100644 index 0000000..5fbe781 --- /dev/null +++ b/R/gmum.errors.R @@ -0,0 +1,7 @@ +# Error codes + +GMUM_WRONG_LIBRARY = "Error 20" +GMUM_WRONG_KERNEL = "Error 21" +GMUM_BAD_PREPROCESS = "Error 22" +GMUM_NOT_SUPPORTED = "Error 23" +GMUM_WRONG_PARAMS = "Error 24" \ No newline at end of file diff --git a/R/gng-visualize.R b/R/gng-visualize.R index 0292ba1..6f4a0b4 100644 --- a/R/gng-visualize.R +++ b/R/gng-visualize.R @@ -2,7 +2,7 @@ library(igraph) .gng.plot3d<-function(gngServer){ if("rgl" %in% rownames(installed.packages()) == TRUE){ - g <- .gng.construct_igraph(gngServer) + g <- convertToGraph(gngServer) .visualizeIGraphRGL(g) }else{ warning("Please install rgl package to plot 3d graphs") @@ -81,9 +81,7 @@ library(igraph) } } .gng.plot2d.errors<-function(gngServer, vertex.color, layout){ - tmp_name <- paste("tmp",sample(1:1000, 1),".graphml", sep="") - gngServer$exportToGraphML(tmp_name) - ig = .readFromGraphML(tmp_name ) + ig <- convertToGraph(gngServer) if(length(V(ig))==0) return @@ -97,14 +95,12 @@ library(igraph) } .visualizeIGraph2dWithErrors(ig, vertex.color, layout, gngServer) - - file.remove(tmp_name) } .gng.plot2d<-function(gngServer, vertex.color, layout){ tmp_name <- paste("tmp",sample(1:1000, 1),".graphml", sep="") - gngServer$exportToGraphML(tmp_name) - ig = .readFromGraphML(tmp_name ) + + ig <- convertToGraph(gngServer) if(length(V(ig))==0) return @@ -118,7 +114,6 @@ library(igraph) } .visualizeIGraph2d(ig, vertex.color, layout) - file.remove(tmp_name) } #' Visualize igraph using igraph plot @@ -154,7 +149,9 @@ library(igraph) .visualizeIGraph2d(ig, vertex.color, layout_2d) title("Graph visualization") errors_raw = gng$getErrorStatistics() - errors = log((errors_raw+1)/min(errors_raw+1)) - plot(errors, type="l", lty=2, lwd=2, xlab="Time [s]", ylab="Mean error (log)", frame.plot=F) + errors_raw = errors_raw[5:length(errors_raw)] + errors = errors_raw + #errors = log((errors_raw)/min(errors_raw+1e-4)) + plot(errors, type="l", lty=2, lwd=2, xlab="Batch", ylab="Mean batch error", frame.plot=F) title("Mean error (log)") } diff --git a/R/gng.R b/R/gng.R index 0639798..09695bd 100644 --- a/R/gng.R +++ b/R/gng.R @@ -1,5 +1,3 @@ -#dev note: I have no idea how to document S4 methods using roxygen, I will have to assign someone to this task - library(igraph) library(methods) @@ -49,7 +47,7 @@ gng.train.online <- function(dim){ .gng.dataset.bagging <- 2 .gng.dataset.sequential <-1 -gng.train.offline <- function(max.iter = 100, min.improvement = 1e-2){ +gng.train.offline <- function(max.iter = 100, min.improvement = 1e-3){ c(.gng.train.offline, max.iter , min.improvement) } @@ -336,6 +334,26 @@ errorStatistics.gng <- NULL OptimizedGNG <- NULL +#' @title clustering +#' +#' @description Gets vector with node indexes assigned to examples in the dataset +#' +#' @usage +#' clustering(gng) +#' +#' @export +#' +#' @rdname clustering-methods +#' +#' @docType methods +#' +#' @examples +#' clustering(gng) +#' +#' @aliases clustering +#' +clustering.gng <- NULL + #' @title errorStatistics #' #' @description Gets vector with errors for every second of execution @@ -424,6 +442,11 @@ summary.gng <- NULL convertToGraph.gng <- NULL + + + + + generateExamples <- NULL #' @title insertExamples @@ -474,6 +497,8 @@ evalqOnLoad({ ){ + + config <- new(GNGConfiguration) # Fill in configuration @@ -481,36 +506,36 @@ evalqOnLoad({ config$dim = ncol(x) }else{ - config$dim = training[2] - print(config$dim) + config$dim = training[2] } if(type[1] == .gng.type.optimized){ - config$uniformgrid_optimization = TRUE - config$lazyheap_optimization = TRUE - config$set_bounding_box(type[2], type[3]) + config$.uniformgrid_optimization = TRUE + config$.lazyheap_optimization = TRUE + config$.set_bounding_box(type[2], type[3]) if(training[1] == .gng.train.offline){ - if(!max(df) <= type[3] && !min(df) >= type[2]){ - gmum.error("Passed incorrect parameters. The dataset is not in the defined range") + if(!max(x) <= type[3] && !min(x) >= type[2]){ + gmum.error(ERROR_BAD_PARAMS, "Passed incorrect parameters. The dataset is not in the defined range") } } }else{ - config$uniformgrid_optimization = FALSE - config$lazyheap_optimization = FALSE + config$.uniformgrid_optimization = FALSE + config$.lazyheap_optimization = FALSE } if(type[1] == .gng.type.utility){ - config$experimental_utility_k = type[2] - config$experimental_utility_option = 1 + config$.experimental_utility_k = type[2] + config$.experimental_utility_option = 1 } else{ - config$experimental_utility_option = 0 + config$.experimental_utility_option = 0 } - config$dataset_type=.gng.dataset.bagging + + config$.dataset_type=.gng.dataset.bagging config$beta = beta config$max_edge_age = max.edge.age config$alpha = alpha @@ -521,7 +546,7 @@ evalqOnLoad({ config$lambda = lambda config$verbosity = verbosity - if(!config$check_correctness()){ + if(!config$.check_correctness()){ gmum.error(ERROR_BAD_PARAMS, "Passed incorrect parameters.") } @@ -530,6 +555,7 @@ evalqOnLoad({ # Perform training on passed dataset if(training[1] == .gng.train.offline){ + print("Training offline") if(is.null(x)){ gmum.error(ERROR, "Passed null data and requested training offline") @@ -541,34 +567,63 @@ evalqOnLoad({ print(max_iter) min_relative_dif = training[3] iter = 0 - errors_calculated = 0 - while(iter < max_iter || errors_calculated == 0){ - Sys.sleep(0.1) - iter = server$getCurrentIteration() - - if(iter %% (max_iter/100) == 0){ - print(paste("Iteration", iter)) - } - - # Iter 5 = 5 times passed whole dataset. - if(iter > 5){ - errors_calculated = 1 - errors = server$getErrorStatistics() - best_previously = min(errors[(length(errors)-5):length(errors)-1]) - current = errors[length(errors)] - if(best_previously != 0){ - change = 1.0 - current/best_previously - if(change < min_relative_dif){ - print(best_previously) - print(errors[(length(errors)-5):length(errors)-1]) - print("Patience bailed out") - break + previous_iter = -1 + best_so_far = 1e10 + initial_patience = 3 + error_index = -1 # always bigger than 0 + patience = initial_patience + + tryCatch({ + while(iter < max_iter && server$isRunning()){ + Sys.sleep(0.1) + iter = server$getCurrentIteration() + + if(previous_iter != iter && iter %% (max_iter/100) == 0){ + print(paste("Iteration", iter)) + } + + if(length(server$getErrorStatistics()) > 5){ + errors = server$getErrorStatistics() + + best_previously = min(errors[(length(errors)-5):length(errors)]) + + #this is same as (best_so_far-best_previously)/best_so_far < min_relative_di + #we get minimum of window 5 and look at the history + if( (error_index - server$.getGNGErrorIndex()) > 4 && + (best_so_far - best_previously) < best_so_far*min_relative_dif){ + patience = patience - 1 + if(patience <= 0){ + print(sprintf("Best error during training: %f", best_so_far)) + print(sprintf("Best error in 5 previous iterations %f", best_previously)) + print(errors[(length(errors)-5):length(errors)]) + print("Patience (which you can control) elapsed, bailing out") + break + } + }else{ + patience = initial_patience } + + + error_index = server$.getGNGErrorIndex() + best_so_far = min(best_previously, best_so_far) } } - } - - terminate(server) + + previous_iter = iter + + if(server$isRunning()){ + terminate(server) + } + else{ + gmum.error(ERROR, "Training failed") + } + }, interrupt= + function(interrupt){ + if(server$isRunning()){ + terminate(server) + } + + }) } } @@ -591,13 +646,21 @@ evalqOnLoad({ verbosity=0, k=NULL ){ + + if(is(x, "data.frame")){ + x = data.matrix(x); + } + gng <- NULL + call <- match.call(expand.dots = TRUE) if(is.null(k)){ - .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes, + gng <- .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes, eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.default(), training=training, lambda=lambda, verbosity=verbosity) }else{ - .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes, + gng <- .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes, eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.utility(k=k), training=training, lambda=lambda, verbosity=verbosity) } + assign("call", call, gng) + gng } OptimizedGNG <<- function(x=NULL, labels=c(), @@ -616,17 +679,28 @@ evalqOnLoad({ gmum.error(ERROR, "Incorrect range") return } - .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes, -eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min=value.range[1]*1.1, max=value.range[2]*1.1), training=training, lambda=lambda, verbosity=verbosity) - + if(is(x, "data.frame")){ + x = data.matrix(x); + } + call <- match.call(expand.dots = TRUE) + gng <- .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes, +eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min=value.range[1], max=value.range[2]), training=training, lambda=lambda, verbosity=verbosity) + assign("call", call, gng) + gng } setGeneric("node", function(x, gng_id, ...) standardGeneric("node")) - + + setGeneric("clustering", + function(object) standardGeneric("clustering")) + + setGeneric("convertToGraph", function(object, ...) standardGeneric("convertToGraph")) - + + + setGeneric("run", function(object, ...) standardGeneric("run")) @@ -689,14 +763,34 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min summary.gng <<- function(object){ print(sprintf("Growing Neural Gas, nodes %d with mean error %f", object$getNumberNodes(), object$getMeanError())) + print(sprintf("Trained %d iterations", object$getCurrentIteration())) print("Mean errors[s]: ") - print(object$getErrorStatistics()) + errors = object$getErrorStatistics() + if(length(errors) > 10){ + errors = errors[(length(errors)-10):length(errors)] + } + + print(errors) + } + + + # Autocompletion fix + + .GlobalEnv$`.DollarNames.C++Object` <- function( x, pattern ){ + grep(pattern, asNamespace("Rcpp")$complete(x), value = TRUE)[! (substr(grep(pattern, asNamespace("Rcpp")$complete(x), value = TRUE),1,1)==".")] } + #.GlobalEnv$DollarNamesGmumr <- function( x, pattern ){ + # asNamespace("Rcpp")$`.DollarNames.C++Object`(x, pattern)[! (substr(asNamespace("Rcpp")$`.DollarNames.C++Object`(x, pattern),1,1)==".")] + #} + #environment(.GlobalEnv$DollarNamesGmumr) <- .GlobalEnv + #setMethod( ".DollarNames", "C++Object", .GlobalEnv$DollarNamesGmumr ) + setMethod("plot", "Rcpp_GNGServer", plot.gng) setMethod("print", "Rcpp_GNGServer", print.gng) setMethod("summary", "Rcpp_GNGServer", summary.gng) + setMethod("show", "Rcpp_GNGServer", summary.gng) node.gng <<- function(x, gng_id){ x$getNode(gng_id) @@ -708,6 +802,16 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min pause.gng <<- function(object){ object$pause() + n = 0.0 + sleep = 0.1 + while(object$isRunning()){ + Sys.sleep(sleep) + n = n + 1 + if(n > 2/sleep){ + print("Warning: GNG has not paused! Check status with gng$isRunning(). Something is wrong.") + return() + } + } } terminate.gng <<- function(object){ @@ -721,13 +825,19 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min errorStatistics.gng <<- function(object){ object$getErrorStatistics() } - + + clustering.gng <<- function(object){ + object$clustering() + } + save.gng <<- function(object, filename){ + warning("Saving does not preserve currently training history") object$save(filename) } load.gng <<- function(filename){ - new(GNGServer, filename) + warning("Saving does not preserve currently training history") + fromFileGNG(filename) } @@ -750,7 +860,7 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min setMethod("meanError", "Rcpp_GNGServer", meanError.gng) setMethod("errorStatistics", "Rcpp_GNGServer", errorStatistics.gng) - #' Get number of nodes + #'Get number of nodes setMethod("numberNodes" , "Rcpp_GNGServer", function(object){ @@ -758,33 +868,114 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min }) - + convertToGraph.gng <- function(object){ - .gng.construct_igraph(object) + pause(object) + + if(object$getNumberNodes() == 0){ + return(graph.empty(n=0, directed=FALSE)) + } + + #Prepare index map. Rarely there is a difference in indexing + #due to a hole in memory representation of GNG graph (i.e. + #indexing in gng can be non-continuous) + + # Warning: This is a hack. If there is a bug look for it here + indexesGNGToIGraph <- 1:(2*object$.getLastNodeIndex()) + indexesIGraphToGNG <- 1:object$getNumberNodes() + + if(object$.getLastNodeIndex() != object$getNumberNodes()){ + igraph_index = 1 + for(i in (1:object$.getLastNodeIndex())){ + node <- node(object, i) + if(length(node) != 0){ + indexesGNGToIGraph[i] = igraph_index + indexesIGraphToGNG[igraph_index] = i + igraph_index = igraph_index + 1 + } + } + } + + adjlist<-list() + for(i in 1:object$.getLastNodeIndex()){ + node <- node(object, i) + if(length(node) != 0){ + + igraph_index = indexesGNGToIGraph[i] + #print(paste(object$.getLastNodeIndex(), length(indexesGNGToIGraph), object$isRunning())) + #print(paste(igraph_index, node$neighbours)) + neighbours = node$neighbours[node$neighbours > i] + adjlist[[igraph_index]] <- sapply(neighbours, function(x){ indexesGNGToIGraph[x] }) + } else{ + #print("Empty node") + } + } + + #print("Creating the graph") + + g <- graph.adjlist(adjlist, mode = "all", duplicate=FALSE) + for(i in 1:object$.getLastNodeIndex()){ + node <- node(object, i) + if(length(node) != 0){ + igraph_index = indexesGNGToIGraph[i] + #TODO: it is more efficient to assign whole vectors + #TODO: refactor in whole code v0 v1 v2 to pos_1 pos_2 pos_3 + V(g)[igraph_index]$v0 <- node$pos[1] + V(g)[igraph_index]$v1 <- node$pos[2] + V(g)[igraph_index]$v2 <- node$pos[3] + V(g)[igraph_index]$label <- node$label + V(g)[igraph_index]$error <- node$error + if(!is.null(node$utility)){ + V(g)[igraph_index]$utility = node$utility + } + } + } + + # Add distance information + dists <- apply(get.edges(g, E(g)), 1, function(x){ + object$nodeDistance(indexesIGraphToGNG[x[1]], indexesIGraphToGNG[x[2]]) + }) + E(g)$dists = dists + + g } + - - #' Get node descriptor from graph - #' - #' @note This function will dump graph to .graphml file on this first and then will remove - #' the file. Be cautious with huge graphs! - #' - #' @param gng_id gng id of the node NOTE: nmight differ from one in exported igraph + setMethod("convertToGraph" , "Rcpp_GNGServer", convertToGraph.gng) - - - #' Find closest example - #' @param x Vector of dimensionality of vertex - #' @return gng_index of the closest example + + + setMethod("clustering" , + "Rcpp_GNGServer", + clustering.gng) + setMethod("predict" , "Rcpp_GNGServer", function(object, x){ - object$predict(x) + if( is.vector(x)){ + object$predict(x) + }else{ + if ( !is(x, "data.frame") && !is(x, "matrix") && !is(x,"numeric") ) { + gmum.error(ERROR_BAD_PARAMS, "Wrong target class, please provide data.frame, matrix or numeric vector") + } + + if (!is(x, "matrix")) { + x <- data.matrix(x) + } + + y <- rep(NA, nrow(x)) + + for(i in 1:nrow(x)){ + y[i] <- object$predict(x[i,]) + } + + y + } }) - - + + insertExamples.gng <<- function(object, examples, labels=c()){ if(length(labels) == 0){ object$insertExamples(examples, vector(mode="numeric", length=0)) @@ -820,4 +1011,18 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min "Rcpp_GNGServer", insertExamples.gng) + + methods = list() + for(name in names(GNGConfiguration@methods)){ + methods[[name]] = eval(substitute( + function(...) .CppObject$WHAT(...), list(WHAT = as.name(name)))) + } + + methods[["initialize"]] <- function(...){ + + } + + + }) + diff --git a/R/scripts/wine_dataset.R b/R/scripts/wine_dataset.R index 117fc4e..3d924b8 100644 --- a/R/scripts/wine_dataset.R +++ b/R/scripts/wine_dataset.R @@ -1,14 +1,12 @@ -library("GrowingNeuralGas") +library("gmum.r") + data(wine, package="rattle") scaled.wine <- scale(wine[-1]) -#TODO: not 200 but 126 - # Train in an offline manner -gng <- GNG(scaled.wine, labels=wine$Type, max.nodes=200, - training=gng.train.offline(max.iter=1000, min.improvement=0)) -devtools::install(".") -devtools::load_all(".") +gng <- GNG(scaled.wine, labels=wine$Type, max.nodes=20, + training=gng.train.offline(max.iter=10000, min.improvement=1e-1)) + # Print number of nodes numberNodes(gng) @@ -20,3 +18,14 @@ mean(degree(ig)) # Plot using igraph layout plot(gng, mode = gng.plot.2d, vertex.color=gng.plot.color.label, layout=igraph::layout.fruchterman.reingold) + +# Print summary of trained object +print(summary(gng)) + +# Print prediction accuracy +labels = as.vector(wine[,c("Type")], mode="double") +preds <- c() +for(i in 1:nrow(scaled.wine)){ + preds <- c(preds,round(node(gng, round(predict(gng, as.vector(scaled.wine[i,], mode="double")))+1)$label)) +} +print(table(preds, labels)) diff --git a/README.md b/README.md index ea5cd32..30a9ef8 100644 --- a/README.md +++ b/README.md @@ -53,17 +53,22 @@ You can also refer to R package documentation (pdf version ### Cluster wine dataset + +Clustering of the UCI wine dataset +
+ + In this example we will construct a clustering of UCI wine dataset using offline GNG. ```R -library("GrowingNeuralGas") +library(gmum.r) # Load data data(wine, package="rattle") scaled_wine <- scale(wine[-1]) # Train in an offline manner -gng <- GNG(scaled_wine, labels=wine$Type, max_nodes=20) +gng <- GNG(scaled_wine, labels=wine$Type, max.nodes=20) # Find closest node to vector [1,1,1] predict(gng, c(1,1,1)) @@ -77,13 +82,9 @@ meanError(gng) # Plot with first 2 coordinates as position plot(gng, mode=gng.plot.2d.errors, vertex.color=gng.plot.color.cluster, - layout=gng.plot.layout.v2d) + layout=gng.plot.layout.igraph.fruchterman) ``` -Reconstruction of the Buddha figure from Standford Repositories -
- - ##List of functions This is not a full documentation. Please refer to R package documentation (pdf version @@ -161,12 +162,3 @@ Feel free to contribute to the code. Contributions should be posted as pull requ ##Known issues --------- * Package is not released for Windows yet. - -* Igraph plotting issues - - * Due to bug in R (https://bugs.r-project.org/bugzilla/show_bug.cgi?id=15327) - on some OS you have to install liblzma-dev additionally. - - * Sometimes after installation of igraph you might have disabled graphml support - (http://lists.gnu.org/archive/html/igraph-help/2011-03/msg00101.html). Try - installing libxml2-dev package and reinstalling igraph.a diff --git a/demo/extra/mnist.R b/demo/extra/mnist.R index 0e5d61d..f4cded0 100644 --- a/demo/extra/mnist.R +++ b/demo/extra/mnist.R @@ -1,17 +1,12 @@ -#################################################################### -# Clustering MNIST dataset with GNG algorithm and running RF on it # -# note: make sure you have in data mnist dataset # - #################################################################### +library(gmum.r) +library(igraph) +#################################################################### +# Clustering MNIST dataset with GNG algorithm # +#################################################################### -# Load the MNIST digit recognition dataset into R -# http://yann.lecun.com/exdb/mnist/ -# assume you have all 4 files and gunzip'd them -# creates train$n, train$x, train$y and test$n, test$x, test$y -# e.g. train$x is a 60000 x 784 matrix, each row is one digit (28x28) -# call: show_digit(train$x[5,]) to see a digit. -# brendan o'connor - gist.github.com/39760 - anyall.org +### Helper functions ### load_mnist <- function() { load_image_file <- function(filename) { ret = list() @@ -35,155 +30,68 @@ load_mnist <- function() { ret = y ret } - train <<- load_image_file('./data/train-images.idx3-ubyte') - test <<- load_image_file('./data/t10k-images.idx3-ubyte') + train <<- load_image_file('./data/train-images-idx3-ubyte') + test <<- load_image_file('./data/t10k-images-idx3-ubyte') train <- train/255.0 test <- test/255.0 data = list() - data$train = cbind(train, as.matrix(load_label_file('./data/train-labels.idx1-ubyte'))) - data$test = cbind(test, as.matrix(load_label_file('./data/t10k-labels.idx1-ubyte'))) - + data$train = cbind(train, as.matrix(load_label_file('./data/train-labels-idx1-ubyte'))) + data$test = cbind(test, as.matrix(load_label_file('./data/t10k-labels-idx1-ubyte'))) data } -data <- load_mnist() - -#write.csv(data$train, 'mnist-train.csv') -#write.csv(data$test, 'mnist-test.csv') - show_digit <- function(arr784, col=gray(12:1/12), ...) { print(matrix(arr784, nrow=28, ncol=28)[1,]) image(matrix(arr784, nrow=28, ncol=28)[,28:1], col=col, ...) } -library("GrowingNeuralGas") -library(igraph) -library(testthat) - -max_nodes <- 1500 +### Configure and load examples ### +train.examples <- 10000 +max.nodes <- 100 +max.iter = 500 +data <- load_mnist() +X = data$train[1:train.examples,-785] +Y = data$train[1:train.examples,785] +X.test = data$test[,-785] +Y.test = data$test[,785] -# Construct gng object, NOTE: adding last column (target) as extra_data - this data won't be used -# in training, but will be assigned to close vertex in the graph (technically speaking it WILL be used in training, -# but will bear no effect on convergence) -gng <- GNG(dataset_type=gng.dataset.bagging, max_nodes=max_nodes, dim=784, lazyheap_optimization=TRUE, - experimental_vertex_extra_data=TRUE - ) +### Train Optimized GNG ### +gng <- OptimizedGNG(max.nodes=max.nodes, x=X, value.range=c(0,1), + labels=Y, training = gng.train.offline(max.iter, 1e-2)) -data <- load_mnist() -data0 <- data$train -data0[data0[,785]!=0.0,785] = 1.0 -gng$insert_examples(data0) -### Run algorithm ### -run(gng) - -number_nodes(gng) -mean_error(gng) - -### Pause and dump ### -pause(gng) -GrowingNeuralGas::dump_model(gng, "mnist.trained.1500.bin") +### Print some variables and save ### +numberNodes(gng) +meanError(gng) +save.gng(gng, "mnist.trained.100.bin") ### Plot using igraph layout and coloring from extra vertex ### plot(gng, mode=gng.plot.2d.errors, - vertex.color=gng.plot.color.cluster, layout=gng.plot.layout.igraph.fruchterman.fast) + vertex.color=gng.plot.color.label, layout=gng.plot.layout.igraph.fruchterman.fast) -# layout.fruchterman.reingold) +### Show closest to some examples ### +id=200 +show_digit(X.test[id,]) +show_digit(node(gng, predict(gng, X.test[id,])+1)$pos) -### Test prediction of 0 ### -for(i in 1:nrow(data$test)){ - if(data$test[i, 785]==0){ - predict(gng, data$test[i,-1]) - node(gng, predict(gng, data$test[i,-1])) - print(node(gng, predict(gng, data$test[i,-1]))$extra_data) - break - } -} +id=300 +show_digit(X.test[id,]) +show_digit(node(gng, predict(gng, X.test[id,])+1)$pos) + +id=400 +show_digit(X.test[id,]) +show_digit(node(gng, predict(gng, X.test[id,])+1)$pos) -### Test infomap community ### -plot(gng, vertex.color=gng.plot.color.cluster, - mode=gng.plot.2d, layout=igraph::layout.fruchterman.reingold) -centr <- centroids2.gng(gng) ### Plot centroids ### +centr <- centroids.gng(gng) centroids_pos = lapply(centr, function(x){ node(gng, x)$pos}) par(mfrow=c(2,2)) show_digit(node(gng, centr[1])$pos) show_digit(node(gng, centr[2])$pos) show_digit(node(gng, centr[3])$pos) -show_digit(node(gng, centr[4])$pos) - - -##################################################################### -# Code training classifier, not pertaining to Growing-Neural-Gas API# -##################################################################### - -function train_classifier(){ - ### Transform data ### - data <-load_mnist() - data_transformed_train <- matrix(0, ncol=(length(centroids_pos) + 1), nrow=nrow(data$train)) - data_transformed_test <- matrix(0, ncol=(length(centroids_pos) + 1), nrow=nrow(data$test)) - - for(i in 1:nrow(data$train)){ - data_transformed_train[i, 1:length(centroids_pos)] = unlist(lapply(centroids_pos, - function(x){ sqrt(sum((x - data$train[i, 1:784]) ^ 2)) } )) - data_transformed_train[i, length(centroids_pos)+1 ] = data$train[i, 785] - } - for(i in 1:nrow(data$test)){ - data_transformed_test[i, 1:length(centroids_pos)] = unlist(lapply(centroids_pos, - function(x){ sqrt(sum((x - data$test[i, 1:784]) ^ 2)) } )) - data_transformed_test[i, length(centroids_pos)+1 ] = data$test[i, 785] - } - - write.csv(data_transformed_train, file='mnist.transformed.train.csv') - write.csv(data_transformed_test, file='mnist.transformed.test.csv') - - ### Construct formula for nnet ### - data_transformed_train = read.csv(file='mnist.transformed.train.csv') - data_transformed_test = read.csv(file='mnist.transformed.test.csv') - - colnames(data_transformed_train) <- paste0("V", seq_len(ncol(data_transformed_train))) - colnames(data_transformed_test) <- paste0("V", seq_len(ncol(data_transformed_test))) - n <- colnames(data_transformed_train) - last_col = paste("V", ncol(data_transformed_train), sep="") - f <- as.formula(paste(paste(last_col, " ~", sep=""), paste(n[!n %in% last_col], collapse = " + "))) - print(f) - - - ### Train nnet ### - install.packages("randomForest") - library(randomForest) - library(kernlab) - library(klaR) - library("nnet") - - rf <- randomForest(x=data_transformed_train[,1:26], - y=as.factor(data_transformed_train[,27]), ntree=50) - - as.double(predict(rf, data_transformed_test[1,1:26]))-1 - - k <- 0 - cor <- 0 - for(i in 1:nrow(data_transformed_test)){ - - if(i%%100 == 0){ - print((cor+0.0)/k) - print(i) - } - - if((as.double(predict(rf, data_transformed_test[i,1:(ncol(data_transformed_test)-1)]))-1) - == data_transformed_test[i,ncol(data_transformed_test)]){ - cor <- cor + 1 - } - - k <- k + 1 - } - -} - - - +show_digit(node(gng, centr[4])$pos) \ No newline at end of file diff --git a/doc/img/gng_readme.png b/doc/img/gng_readme.png new file mode 100644 index 0000000..fd78680 Binary files /dev/null and b/doc/img/gng_readme.png differ diff --git a/inst/include/gng/GNG.h b/inst/include/gng/GNG.h deleted file mode 100644 index f65b4b7..0000000 --- a/inst/include/gng/GNG.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * File: GNGInclude.h - * Author: staszek - * - * Created on 12 sierpień 2012, 11:56 - */ -#ifndef GNGINCLUDE_H -#define GNGINCLUDE_H - -#include "utils/logger.h" - - -#include "GNGGlobals.h" -#include "UniformGrid.h" -#include "GNGNode.h" -#include "GNGGraph.h" -#include "GNGDataset.h" -#include "GNGAlgorithm.h" -#include "GNGDefines.h" -#include "GNGConfiguration.h" -#include "GNGServer.h" - -#endif /* GNGINCLUDE_H */ - diff --git a/inst/include/gng/GNGAlgorithm.h b/inst/include/gng/GNGAlgorithm.h deleted file mode 100644 index cc6844d..0000000 --- a/inst/include/gng/GNGAlgorithm.h +++ /dev/null @@ -1,350 +0,0 @@ -/* - * File: GNGAlgorithm.h - * Author: Stanislaw "kudkudak" Jastrzebski - * - * Created on 11 sierpień 2012, 10:02 - */ - - -#ifndef GNGALGORITHM_H -#define GNGALGORITHM_H - -#include - -#include "utils/threading.h" -#include "utils/circular_buffer.h" - -#include "GNGGlobals.h" -#include "GNGGraph.h" -#include "GNGDataset.h" -#include "UniformGrid.h" -#include "GNGLazyErrorHeap.h" -#include -#include -using namespace std; - -namespace gmum { - -/** - * The main class of the implementation dealing with computations. - * It should be agnostic of inner working (memory management etc.) of the graph and database. - * Also should not be concerned with locking logic. - */ -class GNGAlgorithm { -public: - typedef std::list Node; - - circular_buffer m_mean_error; //error of the network - int m_lambda; //lambda parameter - double m_eps_w, m_eps_n; //epsilon of the winner and of the neighbour - int m_max_age; - int m_max_nodes; - int m_iteration; - - bool m_toggle_uniformgrid, m_toggle_lazyheap; - - double m_utility_k; - int m_utility_option; - - - double m_alpha, m_betha; - double * m_betha_powers; - int m_betha_powers_to_n_length; - double * m_betha_powers_to_n; - int m_betha_powers_size; - double m_accumulated_error; - - int dim; - boost::shared_ptr m_logger; - - std::map times; - - double m_density_threshold, m_grow_rate; - - /** Constants used by lazy heap implementation */ - int s, c; - - GNGGraph & m_g; - GNGDataset * g_db; - UniformGrid, Node, int> * ug; - GNGLazyErrorHeap errorHeap; - - enum GngStatus { - GNG_PREPARING, GNG_RUNNING, GNG_PAUSED, GNG_TERMINATED - }; - - GngStatus m_gng_status; - bool running; - - enum UtilityOptions { - None, BasicUtility - }; - - - //For each iteration - gmum::fast_mutex m_statistics_mutex; - - - gmum::recursive_mutex status_change_mutex; - gmum::gmum_condition status_change_condition; - - GngStatus gng_status() { - return m_gng_status; - } - -public: - /** Run main loop of the algorithm*/ - void runAlgorithm(); - - /**Construct main algorithm object, that will hold mid-results - * @param alg_memory_lock When locked algorithm is not running anything that is memory dangerous - * @param g GNGGraph object implementing graph interface - * @param db GNGDataset object - * @param boundingbox_origin Starting point for reference system - * @param boundingbox_axis Axis lengths for reference system - * @param l Starting box size for uniform grid. Advised to be set to axis[0]/4 (TODO: move to the end of parameters list) - * @param max_nodes Maximum number of nodes - * @param max_age Maximum age of edge - * @param alpha See original paper(TODO: add description) - * @param betha See original paper (TODO: add description) - * @param lambda Every lambda new vertex is added - * @param eps_v See original paper(TODO: add description) - * @param eps_n See original paper (TODO: add description) - * @param dim Dimensionality - */ - GNGAlgorithm(GNGGraph * g, GNGDataset * db, double * boundingbox_origin, - double * boundingbox_axis, double l, int max_nodes = 1000, - int max_age = 200, double alpha = 0.95, double betha = 0.9995, - double lambda = 200, double eps_w = 0.05, double eps_n = 0.0006, - int dim = 3, bool uniformgrid_optimization = true, - bool lazyheap_optimization = true, unsigned int utility_option = - GNGConfiguration::UtilityOff, double utility_k = -1, - boost::shared_ptr logger = boost::shared_ptr()); - - ///Retrieve closest node's gng_index to the example - int predict(const std::vector &); - - /** Start algorithm loop */ - void run() { - this->m_gng_status = GNG_RUNNING; - this->status_change_condition.notify_all(); - } - - /** Pause algorithm loop */ - void pause() { - this->m_gng_status = GNG_PAUSED; - this->status_change_condition.notify_all(); - } - - /** Terminate the algorithm */ - void terminate() { - this->m_gng_status = GNG_TERMINATED; - this->status_change_condition.notify_all(); - } - - void setMaxNodes(int value) { - m_max_nodes = value; - } - - int getIteration() const{ - return m_iteration; - } - - double getMeanError() { - - gmum::scoped_lock alg_lock(m_statistics_mutex); - DBG(m_logger, 3, gmum::to_string(m_mean_error.size())); - if(m_mean_error.size() == 0){ - - return std::numeric_limits::max(); - }else{ - - return m_mean_error[m_mean_error.size()-1]; - } - } - - vector getMeanErrorStatistics() { - gmum::scoped_lock alg_lock(m_statistics_mutex); - if(m_mean_error.size() == 0){ - return vector(1, std::numeric_limits::max()); - }else{ - return vector(m_mean_error.begin(), m_mean_error.end()); - } - } - - double calculateAccumulatedError(); - - void testAgeCorrectness(); - - virtual ~GNGAlgorithm() { - delete[] m_betha_powers_to_n; - delete[] m_betha_powers; - } - -private: - - void resetUniformGrid(double * orig, double *axis, double l) { - ug->purge(orig, axis, l); - int maximum_index = m_g.get_maximum_index(); - - REP(i, maximum_index + 1) - { - if (m_g.existsNode(i)) - ug->insert(m_g[i].position, m_g[i].nr); - } - } - - GNGNode ** LargestErrorNodesLazy(); - - GNGNode ** LargestErrorNodes(); - - GNGNode ** TwoNearestNodes(const double * position); - - void randomInit(); - - void addNewNode(); - - double adapt(const double * ex, const double * extra); - - void resizeUniformGrid(); - - bool stoppingCriterion() { - return m_g.get_number_nodes() > m_max_nodes; - } - - void increaseErrorNew(GNGNode * node, double error) { - fixErrorNew(node); - assert(m_lambda - s <= m_betha_powers_size -1); - node->error += m_betha_powers[m_lambda - s] * error; - errorHeap.updateLazy(node->nr); - } - - void fixErrorNew(GNGNode * node) { - - if (node->error_cycle == c) - return; - - while(c - node->error_cycle > m_betha_powers_to_n_length - 1){ - DBG_2(m_logger, 5, "Recreating m_betha_powers_to_n"); - delete[] m_betha_powers_to_n; - m_betha_powers_to_n_length *= 2; - m_betha_powers_to_n = new double[m_betha_powers_to_n_length]; - REP(i, m_betha_powers_to_n_length) - m_betha_powers_to_n[i] = std::pow(m_betha, m_lambda * (double) (i)); - } - - assert(c - node->error_cycle <= m_betha_powers_to_n_length -1); - - node->error = m_betha_powers_to_n[c - node->error_cycle] * node->error; - node->error_cycle = c; - - } - - double getMaximumError() const { - double max_error = 0; - int maximum_index = m_g.get_maximum_index(); - REP(i,maximum_index+1) - if (m_g.existsNode(i)) - max_error = std::max(max_error, m_g[i].error); - return max_error; - } - - void decreaseAllErrorsNew() { - return; - } - - void decreaseErrorNew(GNGNode * node) { - fixErrorNew(node); - node->error = m_alpha * node->error; - errorHeap.updateLazy(node->nr); - } - - void setErrorNew(GNGNode * node, double error) { - node->error = error; - node->error_cycle = c; - errorHeap.insertLazy(node->nr); - } - - void increaseError(GNGNode * node, double error) { - node->error += error; - } - - void decreaseAllErrors() { - int maximum_index = m_g.get_maximum_index(); - REP(i,maximum_index+1) - if (m_g.existsNode(i)) - m_g[i].error = m_betha * m_g[i].error; - } - - void decreaseError(GNGNode * node) { - node->error = m_alpha * node->error; - } - - void setError(GNGNode * node, double error) { - node->error = error; - } - - // Note: this code is not optimal and is inserted only for research purposes - - double getUtility(int i) { - return m_g[i].utility; - } - - void setUtility(int i, double u) { - m_g[i].utility = u; - } - - void utilityCriterionCheck() { - - if (m_g.get_number_nodes() < 10) - return; //just in case - - double max_error = this->getMaximumError(); - int maximum_index = m_g.get_maximum_index(); - - double min_utility = 100000000; - int min_utility_index = -1; - - for (int i = 0; i <= maximum_index; ++i) - if (min_utility > getUtility(i)) { - min_utility = getUtility(i); - min_utility_index = i; - } - - if (m_g.existsNode(min_utility_index) && max_error / getUtility(min_utility_index) > m_utility_k) { - - DBG(m_logger,2, "GNGAlgorithm:: removing node with utility "+gmum::to_string(getUtility(min_utility_index)) + " max error "+gmum::to_string(max_error)); - - DBG(m_logger,2,gmum::to_string(max_error)); - - GNGNode::EdgeIterator edg = m_g[min_utility_index].begin(); - while (edg != m_g[min_utility_index].end()) { - int nr = (*edg)->nr; - edg = m_g.removeUDEdge(min_utility_index, nr); - } - - m_g.deleteNode(min_utility_index); - setUtility(min_utility_index, 0); - } - - } - void decreaseAllUtility() { - int maximum_index = m_g.get_maximum_index(); - for (int i = 0; i <= maximum_index; ++i) - if (m_g.existsNode(i)) - setUtility(i, getUtility(i) * (m_betha)); - } -}; - -/**Design hack for passing distance function dist(index, position)*/ -struct GNGGraphAccessHack { - static GNGGraph * pool; - static double dist(int index, double *position) { - return pool->get_euclidean_dist((*pool)[index].position, position); - } -}; - - -} - -#endif diff --git a/inst/include/gng/GNGConfiguration.h b/inst/include/gng/GNGConfiguration.h deleted file mode 100644 index 5fa28ba..0000000 --- a/inst/include/gng/GNGConfiguration.h +++ /dev/null @@ -1,290 +0,0 @@ -/* - * File: GNGConfiguration.h - * Author: staszek - * - * Created on October 17, 2013, 8:11 PM - */ - -#ifndef GNGCONFIGURATION_H -#define GNGCONFIGURATION_H - -#ifdef RCPP_INTERFACE -#include -using namespace Rcpp; -#endif - -#include "utils/utils.h" -#include - - - - /** - * - * Configuration of GNG algorithm/server - * TODO: add detailed description for parameters - */ - class GNGConfiguration{ - public: - enum GraphNodeStorage{ - NoneGraphNodeStorage, - SharedMemory, - RAMMemory - } graph_storage; - - - enum DatasetType{ - NoneDatasetTypeinit, - DatasetSeq, - DatasetSampling, - DatasetSamplingProb - }; - - enum ExperimentalUtility{ - UtilityOff, - UtilityBasicOn - }; - - - /**Maximum number of nodes*/ - int max_nodes;//=1000; - /**Uniform grid optimization*/ - bool uniformgrid_optimization;//=true,lazyheap=true; - /**Lazy heap optimization*/ - bool lazyheap_optimization; - /**Bounding box specification*/ - - - /**Dimensionality of examples*/ - int dim; - - - std::vector orig; - std::vector axis; - /**Max edge age*/ - int max_age;//=200; - /**Alpha coefficient*/ - double alpha;//=0.95; - /**Beta coefficient*/ - double beta;//=0.9995; - /**Lambda coefficient*/ - double lambda;//=200; - /**Epsilion v. How strongly move winning node*/ - double eps_w;//=0.05; - /**Memory bound*/ - int graph_memory_bound; - /**Epsilion n*/ - double eps_n;//=0.0006; - - int verbosity; - - /**Pseudodistance function used (might be non metric)*/ - int distance_function; - - /**Type of used database, unsgined int for compabititlity with Rcpp**/ - unsigned int datasetType; - - /**Initial reserve memory for nodes */ - int starting_nodes; - - ///Utility constant - double experimental_utility_k; - - ///Utility option. Currently supported simples utility - int experimental_utility_option; - - public: - - - GNGConfiguration(){ - - verbosity = 10; - - starting_nodes = 100; - - experimental_utility_option = (int)UtilityOff; - experimental_utility_k = 1.5; - - graph_storage = RAMMemory; - - dim = 3; - setBoundingBox(0, 1); - - datasetType = DatasetSampling; - max_nodes=1000; - uniformgrid_optimization=false; - graph_memory_bound = 200000*sizeof(double); - - lazyheap_optimization=false; - max_age=200; - alpha=0.95; - beta=0.9995; - lambda=200; - eps_w=0.05; - eps_n=0.0006; - - distance_function = gmum::GNGGraph::Euclidean; - - - } - - - void deserialize(std::istream & in){ - ///Utility constant - in >> experimental_utility_k; - - ///Utility option. Currently supported simples utility - in >> experimental_utility_option; - - /**Maximum number of nodes*/ - in >> max_nodes;//=1000; - /**Uniform grid optimization*/ - in >> uniformgrid_optimization;//=true,lazyheap=true; - /**Lazy heap optimization*/ - in >> lazyheap_optimization; - /**Bounding box specification*/ - - /**Dimensionality of examples*/ - in >> dim; - - REPORT(dim); - - orig = vector(dim, 0); - axis = vector(dim, 0); - - for(int i=0;i>axis[i]>>orig[i]; - } - /**Max edge age*/ - in >> max_age;//=200; - /**Alpha coefficient*/ - in >> alpha;//=0.95; - /**Beta coefficient*/ - in >> beta;//=0.9995; - /**Lambda coefficient*/ - in >> lambda;//=200; - /**Epsilion v. How strongly move winning node*/ - in >> eps_w;//=0.05; - /**Memory bound*/ - in >> graph_memory_bound; - /**Epsilion n*/ - in >> eps_n;//=0.0006; - - in >> verbosity; - - /**Pseudodistance function used (might be non metric)*/ - in >> distance_function; - - - /**Type of used database, unsgined int for compabititlity with Rcpp**/ - in >> datasetType; - - /**Initial reserve memory for nodes */ - in >> starting_nodes; - } - - void serialize(std::ostream & out){ - ///Utility constant - out << experimental_utility_k << endl; - - ///Utility option. Currently supported simples utility - out << experimental_utility_option<< endl; - - /**Maximum number of nodes*/ - out << max_nodes<< endl;//=1000; - /**Uniform grid optimization*/ - out << uniformgrid_optimization<< endl;//=true,lazyheap=true; - /**Lazy heap optimization*/ - out << lazyheap_optimization<< endl; - /**Bounding box specification*/ - - /**Dimensionality of examples*/ - out << dim<< endl; - - REPORT(dim); - - for(int i=0;i(); - axis = vector(); - for(int i=0;i 3 or datasetType <= 0){ - cerr<<"ERROR: wrong database specified\n"; - - return false; - } - if(! (dim < 20 || ! uniformgrid_optimization)){ - - cerr<<"WARNING: It might be too big dimensionality for OptimizedGNG." - "OptimizedGNG works best for smaller dimensionality dataset" - "Consider using PCA or other dim. reduction technique" - "\n"; - - } - if(! (distance_function==gmum::GNGGraph::Euclidean || ! uniformgrid_optimization)){ - - cerr<<"ERROR: You can use only Euclidean distance function with uniformgrid optimization\n"; - return false; - } - if(! (!uniformgrid_optimization or (dim == axis.size() && dim == orig.size()))){ - - cerr<<"ERROR: dimensionality doesn't agree with axis and orig"< -#include -#include -#include -#include -#include -#include -#include - -#include "utils/threading.h" -#include "utils/utils.h" - -#include "GNGNode.h" -#include "GNGGlobals.h" - -using namespace std; - -namespace gmum { -/** Graph interface for GNGAlgorithm. - * - * - */ -class GNGGraph { -public: - - enum GNGDistanceFunction { - Euclidean, Cosine - }; - - virtual ~ GNGGraph() { - } - /** Lock from unsafe operations - * @note It ensures that operations won't fail (in worst case block) - * Mostly used for blocking regrowing - */ - virtual void lock() { - //Do nothing by default - } - - /** Unlock for unsafe operations */ - virtual void unlock() { - //Do nothing by default - } - - /** This is specific for GNG Graph - e - * each node is assigned index. It fetches maximum node index - */ - virtual unsigned int get_maximum_index() const = 0; - - /* - * @return True if exists node in the graph - */ - virtual bool existsNode(unsigned int) const = 0; - - virtual int get_dim() const = 0; - - virtual GNGNode & operator[](int i) = 0; - - virtual unsigned int get_number_nodes() const = 0; - - //TODO: move it to GNGNode - virtual double get_dist(int a, int b) = 0; - - //TODO: move it to GNGNode - virtual double get_euclidean_dist(const double * pos_1, const double * pos_2) const= 0; - - //TODO: move it to GNGNode - virtual double get_dist(const double *pos_a, const double *pos_b) const = 0; - - /* Initialize node with position attribute */ - virtual int newNode(const double *position) = 0; - - virtual bool deleteNode(int x) = 0; - - virtual bool isEdge(int a, int b) const = 0; - - //Again:dependency on GNGNode:: - virtual typename GNGNode::EdgeIterator removeUDEdge(int a, int b) = 0; - - virtual void addUDEdge(int a, int b) = 0; - - virtual void addDEdge(int a, int b) = 0; - - virtual std::string reportPool() { - return ""; - } - - virtual void load(std::istream & in) = 0; - virtual void serialize(std::ostream & out) = 0; - - -}; - -/* @note: Not thread safe. To be used from one thread only! - * - * Can be used by external thread by has to be locked. All operations moving - * whole memory sectory will unlock. Elegant solution: locking interface - * - * It allows for easy erasing of nodes. - * - * Node: implements GNGNode interface - * Edge: implements GNGEdge interface - * Mutex: implements lock and unlock interface - * - * TODO: change GNGEdge* to GNGEdge (problems with rev) - * TODO: edges ~ gng_dim - maybe use this for better efficiency? - */ -template class RAMGNGGraph: public GNGGraph { - /** Mutex provided externally for synchronization*/ - Mutex * mutex; - - std::vector g; - std::vector occupied; - - //TODO: change to vector - std::vector positions; //as continuous array for speed/caching purposes, could be vector - - int maximum_index; - unsigned int nodes; - - unsigned int gng_dim; - - boost::shared_ptr m_logger; - -public: - /** Indicates next free vertex */ - std::vector next_free; //TODO: has to be public : / - int first_free; - - GNGDistanceFunction dist_fnc; - - typedef typename Node::EdgeIterator EdgeIterator; - - RAMGNGGraph(Mutex * mutex, unsigned int dim, int initial_pool_size, GNGDistanceFunction dist_fnc = Euclidean, - boost::shared_ptr logger = boost::shared_ptr()) : - maximum_index(-1), mutex(mutex), gng_dim(dim), first_free(-1), nodes(0), dist_fnc(dist_fnc), m_logger(logger) { - - positions.resize(initial_pool_size * gng_dim); - - //Initialize graph data structures - g.resize(initial_pool_size); - - for (int i = 0; i < initial_pool_size; ++i) - g[i].reserve(gng_dim); - - occupied.resize(initial_pool_size); - - for (int i = 0; i < initial_pool_size; ++i) - occupied[i] = false; - next_free.resize(initial_pool_size); - - for (int i = 0; i < initial_pool_size - 1; ++i) - next_free[i] = i + 1; - next_free[initial_pool_size - 1] = -1; - first_free = 0; - - } - - /** This is specific for GNG Graph - e - * each node is assigned index. It fetches maximum node index - */ - - virtual unsigned int get_maximum_index() const { - return this->maximum_index; - } - - /* @note NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - * @return True if exists node in the graph - */ - virtual bool existsNode(unsigned i) const { - return i < nodes && occupied[i]; - } - - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - bool isEdge(int a, int b) const { - - FOREACH(edg, g[a]) - { - if ((*edg)->nr == b) - return true; - } - return false; - } - - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - const double *getPosition(int nr) const { - return g[nr].position; - } - - unsigned int get_number_nodes() const { - return this->nodes; - } - - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - Node & - operator[](int i) { - return g[i]; - } - - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - double get_dist(int a, int b) { - return get_dist(g[a].position, g[b].position); - } - - double get_euclidean_dist(const double *pos_a, const double *pos_b) const { - double distance = 0; - for (int i = 0; i < this->gng_dim; ++i) - distance += (pos_a[i] - pos_b[i]) * (pos_a[i] - pos_b[i]); - - return distance; - } - - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - double get_dist(const double *pos_a, const double *pos_b) const { - if (dist_fnc == Euclidean) { - double distance = 0; - for (int i = 0; i < this->gng_dim; ++i) - distance += (pos_a[i] - pos_b[i]) * (pos_a[i] - pos_b[i]); - - return distance; - } else if (dist_fnc == Cosine) { - double norm_1 = 0, norm_2 = 0, distance = 0; - - for (int i = 0; i < this->gng_dim; ++i) { - norm_1 += (pos_a[i]) * (pos_a[i]); - norm_2 += (pos_b[i]) * (pos_b[i]); - distance += pos_a[i] * pos_b[i]; - } - - norm_1 = sqrt(norm_1); - norm_2 = sqrt(norm_2); - return 1.0 - distance / (norm_1 * norm_2); - } - } - - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - int newNode(const double *position) { - if (first_free == -1) { - DBG(m_logger,10, "RAMGNGGraph::newNode() growing pool"); - this->resizeGraph(); - - } - - int createdNode = first_free; //taki sam jak w g_node_pool - - maximum_index = createdNode > maximum_index ? createdNode : maximum_index; - - //Assuming it is clear here -#ifdef GMUM_DEBUG - assert(g[createdNode].size() == 0); -#endif - - // Initialize node - g[createdNode].position = &positions[createdNode * gng_dim]; - occupied[createdNode] = true; - g[createdNode].nr = createdNode; - g[createdNode].edgesCount = 0; - g[createdNode].utility = 0.0; - g[createdNode]._position_owner = false; - g[createdNode].dim = gng_dim; - g[createdNode].extra_data = 0.0; - - first_free = next_free[createdNode]; - - //zwiekszam licznik wierzcholkow //na koncu zeby sie nie wywalil przypadkowo - ++this->nodes; - memcpy(&(g[createdNode].position[0]), position, - sizeof(double) * (this->gng_dim)); //param - - //TODO: this should be tracked by GNGAlgorithm - g[createdNode].error = 0.0; - g[createdNode].error_cycle = 0; - - return createdNode; - - } - - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - bool deleteNode(int x) { - - this->lock(); - if (existsNode(x)) { - //TODO: add automatic erasing edges - assert(g[x].size() == 0); - - --nodes; - if (maximum_index == x) - maximum_index = maximum_index - 1; - - occupied[x] = false; - next_free[x] = first_free; - first_free = x; - this->unlock(); - return true; - - } - - this->unlock(); - return false; - - } - - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - EdgeIterator removeUDEdge(int a, int b) { - - this->lock(); - - FOREACH(edg, g[a]) - { - if ((*edg)->nr == b) { - Edge *ptr_rev = (Edge *) ((**edg).rev); - Edge *ptr = (Edge *) (&(**edg)); - - g[b].erase(find(g[b].begin(), g[b].end(), (*edg)->rev)); - edg = g[a].erase(edg); - - delete ptr; - delete ptr_rev; - - g[a].edgesCount--; - g[b].edgesCount--; - this->unlock(); - return edg; - } - } - - this->unlock(); - DBG(m_logger,10, "ExtGraphNodeManager()::removeEdge Not found edge!"); - return g[a].end(); - - } - - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - void addUDEdge(int a, int b) { - - this->lock(); - - if (a == b) - throw "Added loop to the graph"; - - g[a].push_back(new Edge(b)); - g[b].push_back(new Edge(a)); - - g[a].back()->rev = g[b].back(); - g[b].back()->rev = g[a].back(); - - g[a].edgesCount++; - g[b].edgesCount++; - this->unlock(); - - } - - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - - void addDEdge(int a, int b) { - throw BasicException("Not implemented"); - } - - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - std::string reportPool() { - std::stringstream ss; - for (unsigned int i = 0; i < g.size(); ++i) { - string tmp = ""; - if (occupied[i]) { - tmp = tmp + to_str(g[i]) + ":"; - FOREACH(it2, g[i]) - { - tmp += to_str((*it2)->nr) + "[" - + to_str((((*it2)->rev))->nr) + "],"; - } - tmp = tmp + "\n"; - } - ss << tmp; - } - return ss.str(); - } - - ~RAMGNGGraph() { - for (int i = 0; i < g.size(); ++i) { - if (occupied[i]) { - FOREACH(edg, g[i]) - delete *edg; - } - } - - } - - virtual int get_dim() const { - return gng_dim; - } - - virtual void lock() { - mutex->lock(); - } - - virtual void unlock() { - mutex->unlock(); - } - - - /* - * format is [N] [gng_dim] N* [0/1 + vertex] N*[ [l] l*[gng_idx]] - */ - void serialize(std::ostream & output) { - this->lock(); - - - vector S; - S.reserve(10000); - - //Header - S.push_back((double) (g.size())); - S.push_back((double) (maximum_index + 1)); - S.push_back((double) gng_dim); - S.push_back((double) first_free); - S.push_back((double) nodes); - - DBG(m_logger,7, "GNGGraph::Serializing nodes"); - //Nodes - for (int i = 0; i < g.size(); ++i) { - if (existsNode(i)) { - S.push_back((double) 1); - vector serialized_node = g[i].dumpVertexData(); - - std::copy(serialized_node.begin(), serialized_node.end(), - std::back_inserter(S)); - } else { - S.push_back((double) 0); - } - } DBG(m_logger,7, "GNGGraph::Serializing edges"); - //Edges - for (int i = 0; i < g.size(); ++i) { - if (existsNode(i)) { - vector serialized_node = g[i].dumpEdges(); - std::copy(serialized_node.begin(), serialized_node.end(), - std::back_inserter(S)); - } else { - S.push_back((double) 0); - } - } DBG(m_logger,7, "GNGGraph::Serializing nextFree"); - //NextFree - for (int i = 0; i < g.size(); ++i) { - S.push_back((double) next_free[i]); - } DBG(m_logger,7, "GNGGraph::Serialize;:writing out"); - - _write_bin_vect(output, S); - - - this->unlock(); - } - void load(std::istream & input) { - this->lock(); - - DBG(m_logger,7, "GNGGraph:: loading "); - - vector S = _load_bin_vector(input); - vector::iterator itr = S.begin(); - //Header - unsigned int bufor_size = (int) *itr; - maximum_index = (int) *(++itr) - 1; - gng_dim = (int) *(++itr); - first_free = (int) *(++itr); - nodes = (int) *(++itr); - - DBG(m_logger,5, "Read in "+to_str(bufor_size) +" sized graph with "+ - " max_index="+to_str(maximum_index)+" gng_dim="+to_str(gng_dim)+" "+ - "first_free="+to_str(first_free)+" nodes="+to_str(nodes) - ); - - positions.clear(); - g.clear(); - next_free.clear(); - occupied.clear(); - - occupied.resize(bufor_size); - g.resize(bufor_size); - next_free.resize(bufor_size); - positions.resize((bufor_size + 1) * gng_dim); - - for (int i = 0; i < bufor_size; ++i) { - occupied[i] = false; - g[i].reserve(gng_dim + 2); - } - - //Deserialize nodes - for (int i = 0; i < g.size(); ++i) { - int tmp = (int) *(++itr); - occupied[i] = (bool) tmp; - if (occupied[i]) - g[i].loadVertexData(itr, gng_dim, &positions[i * gng_dim]); - - } - - //Deserialize edges - for (int i = 0; i < g.size(); ++i) { - int edges_length = (int) *(++itr); - - for (int j = 0; j < edges_length; ++j) { - int gng_endpoint_index = (int) *(++itr); - if (gng_endpoint_index > i) - this->addUDEdge(i, gng_endpoint_index); - } - } - - //Deserialize nextFree - for (int i = 0; i < g.size(); ++i) { - next_free[i] = (int) *(++itr); - } - - - this->unlock(); - } - -private: - ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK - void resizeGraph() { - //DBG(m_logger,5, "GNGGraph::resizing graph from "+to_string(g.size())); - DBG_2(m_logger,5, "GNGGraph::resizing"); - unsigned int previous_size = g.size(); - //Grow positions pool - - positions.resize(2 * previous_size * gng_dim); - - //Reassign memory pointers - for (int i = 0; i < previous_size; ++i) { - g[i].position = &positions[i * gng_dim]; - - } - - g.resize(2 * previous_size); - - for (int i = 0; i < previous_size; ++i) { - g[i].position = &positions[i * gng_dim]; - } - - occupied.resize(2 * previous_size); - for (int i = previous_size; i < 2 * previous_size; ++i) { - // g[i].reset(); - // g[i].reserve(gng_dim); //for speed purposes - occupied[i] = false; - } - - next_free.resize(2 * previous_size); - for (int i = previous_size - 1; i < 2 * previous_size - 1; ++i) { - next_free[i] = i + 1; - } - next_free[g.size() - 1] = -1; - first_free = previous_size; - - DBG_2(m_logger,5, "GNGGraph::resizing done"); DBG(m_logger,5, to_str(first_free)); DBG(m_logger,5, to_str(next_free[previous_size])); - //DBG(m_logger,5, "GNGGraph::resizing graph from "+to_string(g.size())+" done"); - } -}; - - -std::string writeToGraphML(GNGGraph &g, string filename = ""); - -} -#endif diff --git a/inst/include/gng/GNGServer.h b/inst/include/gng/GNGServer.h deleted file mode 100644 index f38c7d9..0000000 --- a/inst/include/gng/GNGServer.h +++ /dev/null @@ -1,334 +0,0 @@ -/* - * File: GNGServer.h - * Author: staszek - * - * Created on October 17, 2013, 8:12 PM - */ -#ifndef GNGSERVER_H -#define GNGSERVER_H - - -#include -#include -#include -#include -#include - -#include "utils/threading.h" -#include "utils/utils.h" - -#include "GNGDefines.h" -#include "GNGConfiguration.h" -#include "GNGGlobals.h" -#include "GNGGraph.h" -#include "GNGDataset.h" -#include "GNGAlgorithm.h" - - -#ifdef RCPP_INTERFACE -#include -using namespace Rcpp; -using namespace arma; -#endif - -using namespace gmum; - -/** Holds together all logic and objects.*/ -class GNGServer { - bool m_current_dataset_memory_was_set; - bool m_running_thread_created; - - gmum::gmum_thread * algorithm_thread; - - /** Mutex used for synchronization of graph access*/ - gmum::recursive_mutex grow_mutex; - /** Mutex used for synchronization of graph access*/ - gmum::recursive_mutex database_mutex; - /** Mutex used for synchronization of graph access*/ - gmum::recursive_mutex stat_mutex; - - GNGConfiguration current_configuration; - - std::auto_ptr gngAlgorithm; - std::auto_ptr gngGraph; - std::auto_ptr gngDataset; - - //Called from constructors - void init(GNGConfiguration configuration, std::istream * input_graph = 0); - -public: - boost::shared_ptr m_logger; - - /**Construct GNGServer using configuration*/ - GNGServer(GNGConfiguration configuration, std::istream * input_graph); - - GNGServer(std::string filename){ - std::ifstream input; - input.open(filename.c_str(), ios::in | ios::binary); - - GNGConfiguration conf; - conf.deserialize(input); - - init(conf, &input); - } - - - - - void run() { - DBG(m_logger,10, "GNGServer::runing algorithm thread"); - algorithm_thread = new gmum::gmum_thread(&GNGServer::_run, - (void*) this); - DBG(m_logger,10, "GNGServer::runing collect_statistics thread"); - - m_running_thread_created = true; - } - - - GNGConfiguration getConfiguration(){ - return current_configuration; - } - - static GNGServer * constructTestServer(GNGConfiguration config) { - return new GNGServer(config, 0 /*input_graph*/); - } - - void save(std::string filename){ - if(current_configuration.experimental_utility_option == GNGConfiguration::UtilityBasicOn){ - cerr<<"Saving is not supported for gng.type.utility\n"; - return; - } - - - std::ofstream output; - output.open(filename.c_str(), ios::out | ios::binary); - - current_configuration.serialize(output); - - try{ - gngGraph->lock(); - assert(filename != ""); - gngGraph->serialize(output); - }catch(...){ - cerr<<"Failed exporting to GraphML\n"; - #ifdef DEBUG_GMUM - throw BasicException("Failed exporting to GraphML\n"); - #endif - gngGraph->unlock(); //No RAII, yes.. - return; - } - gngGraph->unlock(); - } - - unsigned int getCurrentIteration() const{ - return gngAlgorithm->getIteration(); - } - - ///Exports GNG state to file - void exportToGraphML(std::string filename) { - try{ - gngGraph->lock(); - assert(filename != ""); - writeToGraphML(getGraph(), filename); - }catch(...){ - cerr<<"Failed exporting to GraphML\n"; - #ifdef DEBUG_GMUM - throw BasicException("Failed exporting to GraphML\n"); - #endif - gngGraph->unlock(); //No RAII, yes.. - return; - } - gngGraph->unlock(); - } - - ///Insert examples - void insertExamples(double * positions, double * extra, double * probability, - unsigned int count, unsigned int dim) { - gmum::scoped_lock lock(gngDataset.get()); - - - if (dim != current_configuration.dim) { - DBG(m_logger,10, "Wrong dimensionality is "+gmum::to_string(count*dim)+" expected "+ - gmum::to_string(count*gngDataset->getDataDim()) + \ - " data dim " + gmum::to_string(gngDataset->size())); - throw BasicException("Wrong dimensionality. " - "Check if you have added all field to " - "position (for instance probability)"); - } - - - - gngDataset->insertExamples(positions, extra, probability, count); - DBG(m_logger,7, "GNGServer::Database size "+gmum::to_string(gngDataset->size())); - - } - - unsigned int getNumberNodes() const { - int nr = this->gngGraph->get_number_nodes(); - return nr; - } - - double getMeanError(){ - return gngAlgorithm->getMeanError(); - } - - vector getMeanErrorStatistics() { - return gngAlgorithm->getMeanErrorStatistics(); - } - -#ifdef RCPP_INTERFACE - //Constructor needed for RCPPInterface - GNGServer(GNGConfiguration * configuration){ - init(*configuration, 0 /*input_graph*/); - } - SEXP m_current_dataset_memory; //will be deleted in ~delete - ///Moderately slow function returning node descriptors - Rcpp::List getNode(unsigned int index) { - unsigned int gng_index = index - 1; //1 based - - gngGraph->lock(); - - if(!gngGraph->existsNode(gng_index)) { - List ret; - return ret; - } - - GNGNode & n = getGraph()[gng_index]; - NumericVector pos(n.position, n.position + gngDataset->getGNGDim()); - - List ret; - ret["pos"] = pos; - ret["error"] = n.error; - ret["label"] = n.extra_data; - - vector neigh(n.size()); - GNGNode::EdgeIterator edg = n.begin(); - unsigned i = 0; - while(edg!=n.end()) { - neigh[i++] = (*edg)->nr + 1; - ++edg; - } - - ret["neighbours"] = IntegerVector(neigh.begin(), neigh.end()); - - gngGraph->unlock(); - - return ret; - } - - int Rpredict(Rcpp::NumericVector & r_ex) { - return gngAlgorithm->predict(std::vector(r_ex.begin(), r_ex.end()) ); - } - - Rcpp::NumericVector RgetErrorStatistics() { - vector x = getMeanErrorStatistics(); - return NumericVector(x.begin(), x.end()); - } - void RinsertExamples(Rcpp::NumericMatrix & r_points, - Rcpp::NumericVector r_extra = Rcpp::NumericVector()) { - std::vector extra(r_extra.begin(), r_extra.end()); - arma::mat * points = new arma::mat(r_points.begin(), r_points.nrow(), r_points.ncol(), false); - - - - arma::Row mean_colwise = arma::mean(*points, 0 /*dim*/); - arma::Row std_colwise = arma::stddev(*points, 0 /*dim*/); - arma::Row diff_std = arma::abs(std_colwise - 1.0); - float max_diff_std = arma::max(diff_std), max_mean = arma::max(mean_colwise); - if(max_diff_std > 0.1 || max_mean > 0.1){ - cerr< max_colwise = arma::max(*points, 0 /*dim*/); - arma::Row min_colwise = arma::min(*points, 0 /*dim*/); - arma::Row diff = max_colwise - min_colwise; - float max = arma::max(diff), min = arma::min(diff); - - for(int i=0;i min_colwise[i] || current_configuration.orig[i]+current_configuration.axis[i] < max_colwise[i]){ - cerr<<"Error: each feature has to be in range passed to gng.type.optimized \n"; - cerr<<"Error: returning, did not insert examples\n"; - return; - } - } - } - - - arma::inplace_trans( *points, "lowmem"); - - if(extra.size()){ - insertExamples(points->memptr(), &extra[0], 0 /*probabilty vector*/, - (unsigned int)points->n_cols, (unsigned int)points->n_rows); - }else{ - insertExamples(points->memptr(), 0 /* extra vector */, 0 /*probabilty vector*/, - (unsigned int)points->n_cols, (unsigned int)points->n_rows); - } - - arma::inplace_trans( *points, "lowmem"); - } - - - -#endif - - ///Pause algorithm - void pause() { - gngAlgorithm->pause(); - } - - ///Terminate algorithm - void terminate() { - getAlgorithm().terminate(); - DBG(m_logger,20, "GNGServer::getAlgorithm terminated, joining algorithm thread"); - if (algorithm_thread) - algorithm_thread->join(); - DBG(m_logger,20, "GNGServer::algorithm thread terminated, joining statistic thread"); - gmum::sleep(100); - } - - GNGAlgorithm & getAlgorithm() { - return *this->gngAlgorithm.get(); - } - GNGGraph & getGraph() { - return *this->gngGraph.get(); - } - GNGDataset & getDatabase() { - return *this->gngDataset.get(); - } - - ~GNGServer() { - DBG(m_logger,10, "GNGServer::destructor called"); -#ifdef RcppInterface - - if(m_current_dataset_memory_was_set) { - R_ReleaseObject(m_current_dataset_memory); - - } - //R Matrix will be deleted from R level -#endif - } - -private: - - /** Run GNG Server - runs in separate thread and returns control - * @note Runs one extra threads for communication. - */ - static void _run(void * server) { - GNGServer * gng_server = (GNGServer*) server; - try { - DBG(gng_server->m_logger,10, "GNGServer::run::proceeding to algorithm"); - gng_server->getAlgorithm().run(); - gng_server->getAlgorithm().runAlgorithm(); - } catch (std::exception & e) { - cerr << "GNGServer::failed _run with " << e.what() << endl; - DBG(gng_server->m_logger,10, e.what()); - } - } - -}; - -#endif -/* GNGSERVER_H */ diff --git a/inst/include/gng/GNGGlobals.h b/inst/include/gng/globals.h similarity index 100% rename from inst/include/gng/GNGGlobals.h rename to inst/include/gng/globals.h diff --git a/inst/include/gng/gng.h b/inst/include/gng/gng.h new file mode 100644 index 0000000..13f024c --- /dev/null +++ b/inst/include/gng/gng.h @@ -0,0 +1,24 @@ +/* + * File: GNGInclude.h + * Author: staszek + * + * Created on 12 sierpień 2012, 11:56 + */ +#ifndef GNGINCLUDE_H +#define GNGINCLUDE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils/logger.h" + + + +#endif /* GNGINCLUDE_H */ + diff --git a/inst/include/gng/gng_algorithm.h b/inst/include/gng/gng_algorithm.h new file mode 100644 index 0000000..1375189 --- /dev/null +++ b/inst/include/gng/gng_algorithm.h @@ -0,0 +1,197 @@ +/* + * File: GNGAlgorithm.h + * Author: Stanislaw "kudkudak" Jastrzebski + * + * Created on 11 sierpień 2012, 10:02 + */ + +#ifndef GNGALGORITHM_H +#define GNGALGORITHM_H + +#include +#include +#include +#include +#include +#include + +#include "utils/threading.h" +#include "utils/circular_buffer.h" + +#include +#include + +using namespace std; + +namespace gmum { + +/** + * The main class of the implementation dealing with computations. + * It should be agnostic of inner working (memory management etc.) of the graph and database. + * Also should not be concerned with locking logic. + * + * + * @note Many algorithm results are retrievable only when it is paused for performance reasons + */ +class GNGAlgorithm { +public: + /**Construct main algorithm object, that will hold mid-results + * @param alg_memory_lock When locked algorithm is not running anything that is memory dangerous + * @param g GNGGraph object implementing graph interface + * @param db GNGDataset object + * @param boundingbox_origin Starting point for reference system + * @param boundingbox_axis Axis lengths for reference system + * @param l Starting box size for uniform grid. Advised to be set to axis[0]/4 (TODO: move to the end of parameters list) + * @param max_nodes Maximum number of nodes + * @param max_age Maximum age of edge + * @param alpha See original paper(TODO: add description) + * @param betha See original paper (TODO: add description) + * @param lambda Every lambda new vertex is added + * @param eps_v See original paper(TODO: add description) + * @param eps_n See original paper (TODO: add description) + * @param dim Dimensionality + */ + GNGAlgorithm(GNGGraph * g, GNGDataset * db, double * boundingbox_origin, + double * boundingbox_axis, double l, int max_nodes = 1000, + int max_age = 200, double alpha = 0.95, double betha = 0.9995, + double lambda = 200, double eps_w = 0.05, double eps_n = 0.0006, + int dim = 3, bool uniformgrid_optimization = true, + bool lazyheap_optimization = true, unsigned int utility_option = + GNGConfiguration::UtilityOff, double utility_k = -1, + boost::shared_ptr logger = boost::shared_ptr()); + + /** Run main loop of the algorithm*/ + void runAlgorithm(); + + ///Retrieve closest node's gng_index to the example + int predict(const std::vector &); + + void run(); + void pause(); + bool isRunning(); + void terminate(); + + unsigned getErrorIndex() const; + void setMaxNodes(int value); + int getIteration() const; + double getMeanError(); + vector > getMeanErrorStatistics(); + + //Retrieve clustering result. + //@note pauses algorithm as many + const vector & get_clustering(); + + virtual ~GNGAlgorithm(); +public: + //TODO: don't use list in UniformGrid + typedef std::list Node; + + int calculated_errors; //for convergence checking + circular_buffer > m_mean_error; //error of the network + int m_lambda; //lambda parameter + double m_eps_w, m_eps_n; //epsilon of the winner and of the neighbour + int m_max_age; + int m_max_nodes; + int m_iteration; + + bool m_toggle_uniformgrid, m_toggle_lazyheap; + + double m_utility_k; + int m_utility_option; + + double m_alpha, m_betha; + double * m_betha_powers; + int m_betha_powers_to_n_length; + double * m_betha_powers_to_n; + int m_betha_powers_size; + double m_accumulated_error; + + int dim; + boost::shared_ptr m_logger; + + + double m_density_threshold, m_grow_rate; + + /** Constants used by lazy heap implementation */ + int s, c; + + GNGGraph & m_g; + GNGDataset * g_db; + UniformGrid, Node, int> * ug; + GNGLazyErrorHeap errorHeap; + + enum GngStatus { + GNG_PREPARING, GNG_RUNNING, GNG_PAUSED, GNG_TERMINATED + }; + + GngStatus m_gng_status; + bool running; + + enum UtilityOptions { + None, BasicUtility + }; + + gmum::recursive_mutex status_change_mutex; + //rewrite to the same locking logic as get_clustering + gmum::fast_mutex m_statistics_mutex; + gmum::gmum_condition status_change_condition; + + vector clustering_result; +private: + //Main algorithm methods + + //@return error and closest node index + std::pair adapt(const double * ex, const double * extra); + std::pair _getNearestNeurons(const double *ex); + + void randomInit(); + void addNewNode(); + + GNGNode ** LargestErrorNodesLazy(); + GNGNode ** LargestErrorNodes(); + + bool stoppingCriterion(); + + //Utility functions + + double calculateAccumulatedError(); + void testAgeCorrectness(); + void resetUniformGrid(double * orig, double *axis, double l); + void resizeUniformGrid(); + + //sets clustering assignment of given node + void set_clustering(unsigned int ex, unsigned int node_idx); + + void increaseErrorNew(GNGNode * node, double error); + void fixErrorNew(GNGNode * node); + double getMaximumError() const; + void decreaseAllErrorsNew(); + void decreaseErrorNew(GNGNode * node); + void setErrorNew(GNGNode * node, double error); + void increaseError(GNGNode * node, double error); + void decreaseAllErrors(); + void decreaseError(GNGNode * node); + void setError(GNGNode * node, double error); + + // Note: this code is not optimal and is inserted only for research purposes + double getUtility(int i); + void setUtility(int i, double u); + void utilityCriterionCheck(); + void decreaseAllUtility(); + + GngStatus gng_status() { + return m_gng_status; + } +}; + +/**Design hack for passing distance function dist(index, position)*/ +struct GNGGraphAccessHack { + static GNGGraph * pool; + static double dist(int index, double *position) { + return pool->get_euclidean_dist((*pool)[index].position, position); + } +}; + +} + +#endif diff --git a/inst/include/gng/gng_configuration.h b/inst/include/gng/gng_configuration.h new file mode 100644 index 0000000..4215daf --- /dev/null +++ b/inst/include/gng/gng_configuration.h @@ -0,0 +1,278 @@ +/* + * File: GNGConfiguration.h + * Author: staszek + * + * Created on October 17, 2013, 8:11 PM + */ + +#ifndef GNGCONFIGURATION_H +#define GNGCONFIGURATION_H + +#ifdef RCPP_INTERFACE +#include +using namespace Rcpp; +#endif + +#include "utils/utils.h" +#include "gng_graph.h" +#include + +/** + * + * Configuration of GNG algorithm/server + */ +class GNGConfiguration { +public: + enum GraphNodeStorage { + NoneGraphNodeStorage, SharedMemory, RAMMemory + } graph_storage; + + enum DatasetType { + NoneDatasetTypeinit, DatasetSeq, DatasetSampling, DatasetSamplingProb + }; + + enum ExperimentalUtility { + UtilityOff, UtilityBasicOn + }; + + /**Maximum number of nodes*/ + int max_nodes; //=1000; + /**Uniform grid optimization*/ + bool uniformgrid_optimization; //=true,lazyheap=true; + /**Lazy heap optimization*/ + bool lazyheap_optimization; + /**Bounding box specification*/ + + /**Dimensionality of examples*/ + unsigned dim; + + std::vector orig; + std::vector axis; + /**Max edge age*/ + int max_age; //=200; + /**Alpha coefficient*/ + double alpha; //=0.95; + /**Beta coefficient*/ + double beta; //=0.9995; + /**Lambda coefficient*/ + double lambda; //=200; + /**Epsilion v. How strongly move winning node*/ + double eps_w; //=0.05; + /**Memory bound*/ + int graph_memory_bound; + /**Epsilion n*/ + double eps_n; //=0.0006; + + int verbosity; + + /**Pseudodistance function used (might be non metric)*/ + int distance_function; + + /**Type of used database, unsgined int for compabititlity with Rcpp**/ + unsigned int datasetType; + + /**Initial reserve memory for nodes */ + int starting_nodes; + + ///Utility constant + double experimental_utility_k; + + ///Utility option. Currently supported simples utility + int experimental_utility_option; + +public: + + GNGConfiguration() { + + verbosity = 10; + + starting_nodes = 100; + + experimental_utility_option = (int) UtilityOff; + experimental_utility_k = 1.5; + + graph_storage = RAMMemory; + + dim = 3; + setBoundingBox(0, 1); + + datasetType = DatasetSampling; + max_nodes = 1000; + uniformgrid_optimization = false; + graph_memory_bound = 200000 * sizeof(double); + + lazyheap_optimization = false; + max_age = 200; + alpha = 0.95; + beta = 0.9995; + lambda = 200; + eps_w = 0.05; + eps_n = 0.0006; + + distance_function = gmum::GNGGraph::Euclidean; + + } + + void deserialize(std::istream & in) { + ///Utility constant + in >> experimental_utility_k; + + ///Utility option. Currently supported simples utility + in >> experimental_utility_option; + + /**Maximum number of nodes*/ + in >> max_nodes; //=1000; + /**Uniform grid optimization*/ + in >> uniformgrid_optimization; //=true,lazyheap=true; + /**Lazy heap optimization*/ + in >> lazyheap_optimization; + /**Bounding box specification*/ + + /**Dimensionality of examples*/ + in >> dim; + + REPORT(dim); + + orig = vector(dim, 0); + axis = vector(dim, 0); + + for (size_t i = 0; i < dim; ++i) { + in >> axis[i] >> orig[i]; + } + /**Max edge age*/ + in >> max_age; //=200; + /**Alpha coefficient*/ + in >> alpha; //=0.95; + /**Beta coefficient*/ + in >> beta; //=0.9995; + /**Lambda coefficient*/ + in >> lambda; //=200; + /**Epsilion v. How strongly move winning node*/ + in >> eps_w; //=0.05; + /**Memory bound*/ + in >> graph_memory_bound; + /**Epsilion n*/ + in >> eps_n; //=0.0006; + + in >> verbosity; + + /**Pseudodistance function used (might be non metric)*/ + in >> distance_function; + + /**Type of used database, unsgined int for compabititlity with Rcpp**/ + in >> datasetType; + + /**Initial reserve memory for nodes */ + in >> starting_nodes; + } + + void serialize(std::ostream & out) { + ///Utility constant + out << experimental_utility_k << endl; + + ///Utility option. Currently supported simples utility + out << experimental_utility_option << endl; + + /**Maximum number of nodes*/ + out << max_nodes << endl; //=1000; + /**Uniform grid optimization*/ + out << uniformgrid_optimization << endl; //=true,lazyheap=true; + /**Lazy heap optimization*/ + out << lazyheap_optimization << endl; + /**Bounding box specification*/ + + /**Dimensionality of examples*/ + out << dim << endl; + + REPORT(dim); + + for (size_t i = 0; i < dim; ++i) { + out << axis[i] << endl << orig[i] << endl; + } + /**Max edge age*/ + out << max_age << endl; //=200; + /**Alpha coefficient*/ + out << alpha << endl; //=0.95; + /**Beta coefficient*/ + out << beta << endl; //=0.9995; + /**Lambda coefficient*/ + out << lambda << endl; //=200; + /**Epsilion v. How strongly move winning node*/ + out << eps_w << endl; //=0.05; + /**Memory bound*/ + out << graph_memory_bound << endl; + /**Epsilion n*/ + out << eps_n << endl; //=0.0006; + + out << verbosity << endl; + + /**Pseudodistance function used (might be non metric)*/ + out << distance_function << endl; + + /**Type of used database, unsgined int for compabititlity with Rcpp**/ + out << datasetType << endl; + + /**Initial reserve memory for nodes */ + out << starting_nodes; //imporant not to add endl for binary correctness + } + + //This is a simplification - we assume square box + void setBoundingBox(double min, double max) { + orig = vector(); + axis = vector(); + for (size_t i = 0; i < dim; ++i) { + orig.push_back(min); + axis.push_back(max - min); + } + } + + /** Get default configuration of GNG Server */ + static GNGConfiguration getDefaultConfiguration() { + GNGConfiguration default_configuration; + return default_configuration; + } + + /**Validate server configuration. *Not working now**/ + bool check_correctness() { + if (experimental_utility_option != UtilityOff + && (uniformgrid_optimization || lazyheap_optimization)) { + cerr + << "ERROR: please turn OFF optimization when using experimental utility option\n"; + + return false; + } + + if (datasetType > 3 or datasetType <= 0) { + cerr << "ERROR: wrong database specified\n"; + + return false; + } + if (!(dim < 20 || !uniformgrid_optimization)) { + + cerr + << "WARNING: It might be too big dimensionality for OptimizedGNG." + "OptimizedGNG works best for smaller dimensionality dataset" + "Consider using PCA or other dim. reduction technique" + "\n"; + + } + if (!(distance_function == gmum::GNGGraph::Euclidean + || !uniformgrid_optimization)) { + + cerr + << "ERROR: You can use only Euclidean distance function with uniformgrid optimization\n"; + return false; + } + if (!(!uniformgrid_optimization + or (dim == axis.size() && dim == orig.size()))) { + + cerr << "ERROR: dimensionality doesn't agree with axis and orig" + << endl; + return false; + } + + return true; + } +}; +#endif /* GNGCONFIGURATION_H */ + diff --git a/inst/include/gng/GNGDataset.h b/inst/include/gng/gng_dataset.h similarity index 76% rename from inst/include/gng/GNGDataset.h rename to inst/include/gng/gng_dataset.h index 7314f44..26b8c26 100644 --- a/inst/include/gng/GNGDataset.h +++ b/inst/include/gng/gng_dataset.h @@ -11,11 +11,11 @@ #include #include #include +#include +#include #include "utils/threading.h" #include "utils/utils.h" -#include "GNGGlobals.h" -#include "GNGConfiguration.h" namespace gmum { @@ -30,8 +30,6 @@ namespace gmum { class GNGDataset { public: - - virtual int getDataDim() const=0; virtual int getGNGDim() const =0; @@ -46,8 +44,8 @@ class GNGDataset { virtual const double * getExtraData(unsigned int) const=0; ///Inserts examples to the dataset - virtual void insertExamples(const double *, const double*, - const double *, unsigned int count)=0; + virtual void insertExamples(const double *, const double*, const double *, + unsigned int count)=0; virtual void removeExample(unsigned int)=0; @@ -56,14 +54,12 @@ class GNGDataset { virtual ~GNGDataset() { } - virtual void lock() = 0; virtual void unlock() = 0; }; - ///Storage :< GNGDatabaseStorage -template +template class GNGDatasetSimple: public GNGDataset { protected: @@ -71,22 +67,18 @@ class GNGDatasetSimple: public GNGDataset { gmum::recursive_mutex * mutex_; - vector storage_; vector storage_extra_; vector storage_probability_; - bool store_extra_; unsigned int current_example_; boost::shared_ptr logger_; public: - enum AccessMethod{ - Sequential, - Sampling, - SamplingProbability + enum AccessMethod { + Sequential, Sampling, SamplingProbability } access_method_; /* @@ -96,8 +88,8 @@ class GNGDatasetSimple: public GNGDataset { GNGDatasetSimple(gmum::recursive_mutex *mutex, unsigned int dim, bool store_extra = false, AccessMethod access_method = Sequential, boost::shared_ptr logger = boost::shared_ptr()) : - mutex_(mutex), store_extra_(store_extra), dim_(dim), access_method_(access_method), - current_example_(0), logger_(logger){ + mutex_(mutex), store_extra_(store_extra), dim_(dim), access_method_( + access_method), current_example_(0), logger_(logger) { __init_rnd(); } @@ -114,10 +106,9 @@ class GNGDatasetSimple: public GNGDataset { ///Retrieves pointer to position const T * getPosition(unsigned int i) const { - return &storage_[i*dim_]; + return &storage_[i * dim_]; } - const T * getExtraData(unsigned int i) const { if (!store_extra_) return 0; @@ -136,8 +127,7 @@ class GNGDatasetSimple: public GNGDataset { do { index = __int_rnd(0, size() - 1); ex = getPosition(index); - } while (storage_probability_[index] - < __double_rnd(0, 1.0)); + } while (storage_probability_[index] < __double_rnd(0, 1.0)); return index; } @@ -154,37 +144,40 @@ class GNGDatasetSimple: public GNGDataset { void insertExamples(const double * positions, const double *extra, const double *probability, unsigned int count) { - if(storage_.capacity() < storage_.size() + count*dim_){ + if (storage_.capacity() < storage_.size() + count * dim_) { DBG(logger_,10, "Resizing storage_"); - storage_.reserve(storage_.size() + count*dim_); + storage_.reserve(storage_.size() + count * dim_); DBG(logger_,10, "Resized storage_"); } - storage_.insert(storage_.end(), positions, positions + count*dim_); + storage_.insert(storage_.end(), positions, positions + count * dim_); - if(store_extra_){ - if(storage_extra_.capacity() < storage_extra_.size() + count){ + if (store_extra_) { + if (storage_extra_.capacity() < storage_extra_.size() + count) { DBG(logger_,10, "Resizing store_extra_"); storage_extra_.reserve(storage_extra_.size() + count); } - if(!extra){ - for(int i=0;i #include namespace gmum { diff --git a/inst/include/gng/gng_graph.h b/inst/include/gng/gng_graph.h new file mode 100644 index 0000000..b6d5f75 --- /dev/null +++ b/inst/include/gng/gng_graph.h @@ -0,0 +1,168 @@ +/* + * File: SHGraph.h + * Author: staszek + * + * Created on 11 sierpień 2012, 09:07 + */ + +#ifndef GNGGraph_H +#define GNGGraph_H + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +namespace gmum { + +class GNGGraph { +public: + + enum GNGDistanceFunction { + Euclidean, Cosine + }; + + virtual ~ GNGGraph() { + } + + /** Lock from unsafe operations + * @note It ensures that operations won't fail (in worst case block) + * Mostly used for blocking regrowing + */ + virtual void lock() { + //Do nothing by default + } + + /** Unlock for unsafe operations */ + virtual void unlock() { + //Do nothing by default + } + + /** This is specific for GNG Graph - e + * each node is assigned index. It fetches maximum node index + */ + virtual unsigned int get_maximum_index() const = 0; + + /* + * @return True if exists node in the graph + */ + virtual bool existsNode(unsigned int) const = 0; + + virtual int get_dim() const = 0; + + virtual GNGNode & operator[](int i) = 0; + + virtual unsigned int get_number_nodes() const = 0; + + virtual double get_dist(int a, int b) = 0; + + virtual double get_euclidean_dist(const double * pos_1, + const double * pos_2) const= 0; + + virtual double get_dist(const double *pos_a, const double *pos_b) const = 0; + + /* Initialize node with position attribute */ + virtual int newNode(const double *position) = 0; + + virtual bool deleteNode(int x) = 0; + + virtual bool isEdge(int a, int b) const = 0; + + virtual GNGNode::EdgeIterator removeUDEdge(int a, int b) = 0; + + virtual void addUDEdge(int a, int b) = 0; + + virtual void addDEdge(int a, int b) = 0; + + virtual std::string reportPool() { + return ""; + } + + virtual void load(std::istream & in) = 0; + virtual void serialize(std::ostream & out) = 0; + +}; + +/* + * Node: implements GNGNode interface + * Edge: implements GNGEdge interface + * Mutex: implements lock and unlock interface + */ +template +class RAMGNGGraph: public GNGGraph { +public: + // Indicates next free vertex + std::vector next_free; + int first_free; + GNGDistanceFunction dist_fnc; + + typedef typename Node::EdgeIterator EdgeIterator; + + RAMGNGGraph(Mutex * mutex, unsigned int dim, int initial_pool_size, + GNGDistanceFunction dist_fnc = Euclidean, + boost::shared_ptr logger = boost::shared_ptr()); + + Node & operator[](int i); + + int newNode(const double *position); + bool deleteNode(int x); + EdgeIterator removeUDEdge(int a, int b); + void addUDEdge(int a, int b); + void addDEdge(int a, int b); + + std::string reportPool(); + + virtual int get_dim() const; + double get_dist(int a, int b); + double get_euclidean_dist(const double *pos_a, const double *pos_b) const; + double get_dist(const double *pos_a, const double *pos_b) const; + virtual unsigned int get_maximum_index() const; + virtual bool existsNode(unsigned i) const; + bool isEdge(int a, int b) const; + const double *getPosition(int nr) const; + unsigned int get_number_nodes() const; + + virtual void lock(); + virtual void unlock(); + + /* + * format is [N] [gng_dim] N* [0/1 + vertex] N*[ [l] l*[gng_idx]] + */ + void serialize(std::ostream & output); + void load(std::istream & input); + + ~RAMGNGGraph(); +private: + Mutex * mutex; + + std::vector g; + std::vector occupied; + std::vector positions; //as continuous array for speed/caching purposes, could be vector + + int maximum_index; + unsigned int nodes, gng_dim; + + boost::shared_ptr m_logger; + +private: + void resizeGraph(); +}; + +#include + +static std::string writeToGraphML(GNGGraph &g, string filename = ""); + +} +#endif diff --git a/inst/include/gng/gng_graph.hpp b/inst/include/gng/gng_graph.hpp new file mode 100644 index 0000000..27b15b0 --- /dev/null +++ b/inst/include/gng/gng_graph.hpp @@ -0,0 +1,521 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace gmum; + +template +RAMGNGGraph::RAMGNGGraph(Mutex * mutex, unsigned int dim, + int initial_pool_size, GNGDistanceFunction dist_fnc, + boost::shared_ptr logger) : + maximum_index(-1), mutex(mutex), gng_dim(dim), first_free(-1), nodes(0), dist_fnc( + dist_fnc), m_logger(logger) { + + positions.resize(initial_pool_size * gng_dim); + + //Initialize graph data structures + g.resize(initial_pool_size); + + for (int i = 0; i < initial_pool_size; ++i) + g[i].reserve(gng_dim); + + occupied.resize(initial_pool_size); + + for (int i = 0; i < initial_pool_size; ++i) + occupied[i] = false; + next_free.resize(initial_pool_size); + + for (int i = 0; i < initial_pool_size - 1; ++i) + next_free[i] = i + 1; + next_free[initial_pool_size - 1] = -1; + first_free = 0; + +} + +template +RAMGNGGraph::~RAMGNGGraph() { + for (int i = 0; i < g.size(); ++i) { + if (occupied[i]) { + BOOST_FOREACH(GNGEdge * edg, g[i]) + delete edg; + } + } + +} + +template +unsigned int RAMGNGGraph::get_maximum_index() const { + return this->maximum_index; +} + +template +bool RAMGNGGraph::existsNode(unsigned i) const { + return i < nodes && occupied[i]; +} + +template +bool RAMGNGGraph::isEdge(int a, int b) const { + + BOOST_FOREACH(GNGEdge * edg, g[a]) { + if ((edg)->nr == b) + return true; + } + return false; +} + +template +const double *RAMGNGGraph::getPosition(int nr) const { + return g[nr].position; +} + +template +unsigned int RAMGNGGraph::get_number_nodes() const { + return this->nodes; +} + +template +Node & +RAMGNGGraph::operator[](int i) { + return g[i]; +} + +template +double RAMGNGGraph::get_dist(int a, int b) { + return get_dist(g[a].position, g[b].position); +} + +template +double RAMGNGGraph::get_euclidean_dist(const double *pos_a, + const double *pos_b) const { + double distance = 0; + for (int i = 0; i < this->gng_dim; ++i) + distance += (pos_a[i] - pos_b[i]) * (pos_a[i] - pos_b[i]); + + return distance; +} + +template +double RAMGNGGraph::get_dist(const double *pos_a, + const double *pos_b) const { + if (dist_fnc == Euclidean) { + double distance = 0; + for (size_t i = 0; i < this->gng_dim; ++i) + distance += (pos_a[i] - pos_b[i]) * (pos_a[i] - pos_b[i]); + + return distance; + } else if (dist_fnc == Cosine) { + double norm_1 = 0, norm_2 = 0, distance = 0; + + for (size_t i = 0; i < this->gng_dim; ++i) { + norm_1 += (pos_a[i]) * (pos_a[i]); + norm_2 += (pos_b[i]) * (pos_b[i]); + distance += pos_a[i] * pos_b[i]; + } + + norm_1 = sqrt(norm_1); + norm_2 = sqrt(norm_2); + return 1.0 - distance / (norm_1 * norm_2); + } +} + +template +int RAMGNGGraph::newNode(const double *position) { + if (first_free == -1) { + DBG(m_logger,10, "RAMGNGGraph::newNode() growing pool"); + this->resizeGraph(); + + } + + int createdNode = first_free; //taki sam jak w g_node_pool + + maximum_index = createdNode > maximum_index ? createdNode : maximum_index; + + //Assuming it is clear here +#ifdef GMUM_DEBUG + assert(g[createdNode].size() == 0); +#endif + + // Initialize node + g[createdNode].position = &positions[createdNode * gng_dim]; + occupied[createdNode] = true; + g[createdNode].nr = createdNode; + g[createdNode].edgesCount = 0; + g[createdNode].utility = 0.0; + g[createdNode]._position_owner = false; + g[createdNode].dim = gng_dim; + g[createdNode].extra_data = 0.0; + + first_free = next_free[createdNode]; + + //zwiekszam licznik wierzcholkow //na koncu zeby sie nie wywalil przypadkowo + ++this->nodes; + memcpy(&(g[createdNode].position[0]), position, + sizeof(double) * (this->gng_dim)); //param + + //TODO: this should be tracked by GNGAlgorithm + g[createdNode].error = 0.0; + g[createdNode].error_cycle = 0; + + return createdNode; + +} + +template +bool RAMGNGGraph::deleteNode(int x) { + + this->lock(); + if (existsNode(x)) { + //TODO: add automatic erasing edges + assert(g[x].size() == 0); + + --nodes; + if (maximum_index == x) + maximum_index = maximum_index - 1; + + occupied[x] = false; + next_free[x] = first_free; + first_free = x; + this->unlock(); + return true; + + } + + this->unlock(); + return false; + +} + +template +typename RAMGNGGraph::EdgeIterator RAMGNGGraph::removeUDEdge(int a, int b) { + + this->lock(); + + for (typename Node::iterator edg = g[a].begin(); edg != g[a].end(); ++edg) { + if ((*edg)->nr == b) { + Edge *ptr_rev = (Edge *) ((**edg).rev); + Edge *ptr = (Edge *) (&(**edg)); + + g[b].erase(find(g[b].begin(), g[b].end(), (*edg)->rev)); + + edg = g[a].erase(edg); + + delete ptr; + delete ptr_rev; + + g[a].edgesCount--; + g[b].edgesCount--; + this->unlock(); + return edg; + } + } + + this->unlock(); + DBG(m_logger,10, "ExtGraphNodeManager()::removeEdge Not found edge!"); + return g[a].end(); + +} + +template +void RAMGNGGraph::addUDEdge(int a, int b) { + + this->lock(); + + if (a == b) + throw "Added loop to the graph"; + + g[a].push_back(new Edge(b)); + g[b].push_back(new Edge(a)); + + g[a].back()->rev = g[b].back(); + g[b].back()->rev = g[a].back(); + + g[a].edgesCount++; + g[b].edgesCount++; + this->unlock(); + +} + +template +void RAMGNGGraph::addDEdge(int a, int b) { + throw BasicException("Not implemented"); +} + +template +std::string RAMGNGGraph::reportPool() { + std::stringstream ss; + for (unsigned int i = 0; i < g.size(); ++i) { + string tmp = ""; + if (occupied[i]) { + tmp = tmp + to_str(g[i]) + ":"; + BOOST_FOREACH(GNGEdge * it2, g[i]) { + tmp += to_str((it2)->nr) + "[" + to_str((((it2)->rev))->nr) + + "],"; + } + tmp = tmp + "\n"; + } + ss << tmp; + } + return ss.str(); +} + +template +int RAMGNGGraph::get_dim() const { + return gng_dim; +} +template +void RAMGNGGraph::lock() { + mutex->lock(); +} + +template +void RAMGNGGraph::unlock() { + mutex->unlock(); +} + +template +void RAMGNGGraph::serialize(std::ostream & output) { + this->lock(); + + vector S; + S.reserve(10000); + + //Header + S.push_back((double) (g.size())); + S.push_back((double) (maximum_index + 1)); + S.push_back((double) gng_dim); + S.push_back((double) first_free); + S.push_back((double) nodes); + + DBG(m_logger,7, "GNGGraph::Serializing nodes"); + //Nodes + for (int i = 0; i < g.size(); ++i) { + if (existsNode(i)) { + S.push_back((double) 1); + vector serialized_node = g[i].dumpVertexData(); + + std::copy(serialized_node.begin(), serialized_node.end(), + std::back_inserter(S)); + } else { + S.push_back((double) 0); + } + }DBG(m_logger,7, "GNGGraph::Serializing edges"); + //Edges + for (int i = 0; i < g.size(); ++i) { + if (existsNode(i)) { + vector serialized_node = g[i].dumpEdges(); + std::copy(serialized_node.begin(), serialized_node.end(), + std::back_inserter(S)); + } else { + S.push_back((double) 0); + } + }DBG(m_logger,7, "GNGGraph::Serializing nextFree"); + //NextFree + for (int i = 0; i < g.size(); ++i) { + S.push_back((double) next_free[i]); + }DBG(m_logger,7, "GNGGraph::Serialize;:writing out"); + + _write_bin_vect(output, S); + + this->unlock(); +} + +template +void RAMGNGGraph::load(std::istream & input) { + this->lock(); + + DBG(m_logger,7, "GNGGraph:: loading "); + + vector S = _load_bin_vector(input); + vector::iterator itr = S.begin(); + //Header + unsigned int bufor_size = (int) *itr; + maximum_index = (int) *(++itr) - 1; + gng_dim = (int) *(++itr); + first_free = (int) *(++itr); + nodes = (int) *(++itr); + + DBG(m_logger,5, "Read in "+to_str(bufor_size) +" sized graph with "+ + " max_index="+to_str(maximum_index)+" gng_dim="+to_str(gng_dim)+" "+ + "first_free="+to_str(first_free)+" nodes="+to_str(nodes) + ); + + positions.clear(); + g.clear(); + next_free.clear(); + occupied.clear(); + + occupied.resize(bufor_size); + g.resize(bufor_size); + next_free.resize(bufor_size); + positions.resize((bufor_size + 1) * gng_dim); + + for (size_t i = 0; i < bufor_size; ++i) { + occupied[i] = false; + g[i].reserve(gng_dim + 2); + } + + //Deserialize nodes + for (size_t i = 0; i < g.size(); ++i) { + int tmp = (int) *(++itr); + occupied[i] = (bool) tmp; + if (occupied[i]) + g[i].loadVertexData(itr, gng_dim, &positions[i * gng_dim]); + + } + + //Deserialize edges + for (size_t i = 0; i < g.size(); ++i) { + int edges_length = (int) *(++itr); + + for (int j = 0; j < edges_length; ++j) { + int gng_endpoint_index = (int) *(++itr); + if (gng_endpoint_index > i) + this->addUDEdge(i, gng_endpoint_index); + } + } + + //Deserialize nextFree + for (size_t i = 0; i < g.size(); ++i) { + next_free[i] = (int) *(++itr); + } + + this->unlock(); +} + +template +void RAMGNGGraph::resizeGraph() { + //DBG(m_logger,5, "GNGGraph::resizing graph from "+to_string(g.size())); + DBG_2(m_logger,5, "GNGGraph::resizing"); + unsigned int previous_size = g.size(); + //Grow positions pool + + positions.resize(2 * previous_size * gng_dim); + + //Reassign memory pointers + for (size_t i = 0; i < previous_size; ++i) { + g[i].position = &positions[i * gng_dim]; + + } + + g.resize(2 * previous_size); + + for (size_t i = 0; i < previous_size; ++i) { + g[i].position = &positions[i * gng_dim]; + } + + occupied.resize(2 * previous_size); + for (int i = previous_size; i < 2 * previous_size; ++i) { + // g[i].reset(); + // g[i].reserve(gng_dim); //for speed purposes + occupied[i] = false; + } + + next_free.resize(2 * previous_size); + for (size_t i = previous_size - 1; i < 2 * previous_size - 1; ++i) { + next_free[i] = i + 1; + } + next_free[g.size() - 1] = -1; + first_free = previous_size; + + DBG_2(m_logger,5, "GNGGraph::resizing done");DBG(m_logger,5, to_str(first_free));DBG(m_logger,5, to_str(next_free[previous_size])); +} + +static void writeToGraphML(GNGGraph &g, std::ostream & out) { + + g.lock(); + + out << "\n"; + out + << "\n"; + out + << "\n"; + out + << "\n"; + out + << "\n"; + out + << "\n"; + out + << "\n"; + out + << "\n"; + out + << "\n"; + out + << "\n"; + out + << "\n"; + + std::map gng_index_to_graph_index; + + unsigned int k = 0; + for (int i = 0; i <= g.get_maximum_index(); ++i) { + + if (g.existsNode(i)) { + gng_index_to_graph_index[g[i].nr] = k; //TODO:To be honest I dnt remember purpose of g[i].nr.. + + out << "\n"; + out << "" << g[i].error << "\n"; + out << "" << g[i].extra_data << "\n"; + out << "" << g[i].nr << "\n"; + out << "" << g[i].utility << "\n"; + out << "" << g[i].position[0] << "\n"; + out << "" << g[i].position[1] << "\n"; + out << "" << g[i].position[2] << "\n"; + out << "\n"; + } + } + + unsigned int l = 0; + + for (unsigned int i = 0; i <= g.get_maximum_index(); ++i) + if (g.existsNode(i)) { + FOREACH(GNGEdge * edg, g[i]) { + if (g[i].nr > (edg)->nr) { //directed! + out << "nr] + << "\" target=\"n" + << gng_index_to_graph_index[g[i].nr] << "\">\n"; + out << "" << g.get_dist(i, (edg)->nr) + << ""; + out << "\n"; + } + } + + } + + out << "\n\n"; + g.unlock(); + +} + +static std::string writeToGraphML(GNGGraph &g, string filename) { + + if (filename == "") { + std::stringstream ss; + writeToGraphML(g, ss); + return ss.str(); + } else { + ofstream myfile(filename.c_str()); + writeToGraphML(g, myfile); + myfile.close(); + return ""; + } + +} + diff --git a/inst/include/gng/GNGLazyErrorHeap.h b/inst/include/gng/gng_lazy_error_heap.h similarity index 98% rename from inst/include/gng/GNGLazyErrorHeap.h rename to inst/include/gng/gng_lazy_error_heap.h index ebb3a74..85b2e0a 100644 --- a/inst/include/gng/GNGLazyErrorHeap.h +++ b/inst/include/gng/gng_lazy_error_heap.h @@ -8,8 +8,8 @@ #ifndef GNGLAZYERRORHEAP_H #define GNGLAZYERRORHEAP_H -#include "Heap.h" -#include "GNGGraph.h" +#include +#include namespace gmum { struct ErrorNode { double error; diff --git a/inst/include/gng/GNGNode.h b/inst/include/gng/gng_node.h similarity index 92% rename from inst/include/gng/GNGNode.h rename to inst/include/gng/gng_node.h index 0c4f9ae..c797b3c 100644 --- a/inst/include/gng/GNGNode.h +++ b/inst/include/gng/gng_node.h @@ -8,6 +8,7 @@ #ifndef SHGRAPHDEFS_H #define SHGRAPHDEFS_H +#include #include #include #include @@ -15,7 +16,6 @@ #include #include -#include "GNGGlobals.h" /** * Basic interface for Edge in GNGGraph. */ @@ -76,7 +76,7 @@ class GNGNode: public std::vector { double dist(GNGNode * gnode) const { //dist doesnt account for param using namespace std; double ret = 0; - for (int i = 0; i < dim; ++i) + for (size_t i = 0; i < dim; ++i) ret += (this->position[i] - gnode->position[i]) * (this->position[i] - gnode->position[i]); return sqrt(ret); @@ -84,7 +84,7 @@ class GNGNode: public std::vector { friend std::ostream& operator<<(std::ostream& out, const GNGNode & node) { out << node.nr << "(" << node.error << ")("; - for (int i = 0; i < node.dim; ++i) { + for (size_t i = 0; i < node.dim; ++i) { out << node.position[i] << ","; } out << ")"; @@ -95,7 +95,7 @@ class GNGNode: public std::vector { vector dumpEdges() { vector dump(1 + this->size(), 0.0); dump[0] = this->size(); - for (int i = 0; i < this->size(); ++i) + for (size_t i = 0; i < this->size(); ++i) dump[i + 1] = (*this)[i]->nr; return dump; } @@ -111,8 +111,8 @@ class GNGNode: public std::vector { dump[5] = (int) _position_owner; dump[6] = dim; dump[7] = extra_data; - for (int i = 0; i < dim; ++i) { - dump[i + 7] = position[i]; + for (size_t i = 0; i < dim; ++i) { + dump[i + 8] = position[i]; } return dump; } @@ -128,7 +128,7 @@ class GNGNode: public std::vector { dim = x[6]; extra_data = x[7]; position = position_ptr; - for (int i = 0; i < dim; ++i) { + for (size_t i = 0; i < dim; ++i) { position[i] = x[i + 8]; } } diff --git a/inst/include/gng/gng_server.h b/inst/include/gng/gng_server.h new file mode 100644 index 0000000..d6f3998 --- /dev/null +++ b/inst/include/gng/gng_server.h @@ -0,0 +1,146 @@ +/* + * File: GNGServer.h + * Author: staszek + * + * Created on October 17, 2013, 8:12 PM + */ +#ifndef GNGSERVER_H +#define GNGSERVER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils/threading.h" +#include "utils/utils.h" + + +#ifdef RCPP_INTERFACE +#include +using namespace Rcpp; +using namespace arma; +#endif + +using namespace gmum; + +/** Holds together all logic and objects.*/ +class GNGServer { +public: + boost::shared_ptr m_logger; + + /**Construct GNGServer using configuration*/ + GNGServer(GNGConfiguration configuration, std::istream * input_graph); + + GNGServer(std::string filename); + + void run(); + + void pause(); + + void terminate(); + + double nodeDistance(int id1, int id2) const; + + void save(std::string filename); + + unsigned int getCurrentIteration() const; + + ///Exports GNG state to file + void exportToGraphML(std::string filename); + + ///Insert examples + void insertExamples(double * positions, double * extra, + double * probability, unsigned int count, unsigned int dim); + + unsigned getGNGErrorIndex() const; + bool isRunning() const; + vector getMeanErrorStatistics(); + unsigned int getNumberNodes() const; + double getMeanError(); + GNGConfiguration getConfiguration(); + GNGAlgorithm & getAlgorithm(); + GNGGraph & getGraph(); + GNGDataset & getDatabase(); + + ~GNGServer(); + +#ifdef RCPP_INTERFACE + //Constructor needed for RCPPInterface + GNGServer(GNGConfiguration * configuration); + + SEXP m_current_dataset_memory;//will be deleted in ~delete + + ///Moderately slow function returning node descriptors + Rcpp::List getNode(int index); + + int Rpredict(Rcpp::NumericVector & r_ex); + + Rcpp::NumericVector RgetClustering(); + + Rcpp::NumericVector RgetErrorStatistics(); + + void RinsertExamples(Rcpp::NumericMatrix & r_points, + Rcpp::NumericVector r_extra = Rcpp::NumericVector()); + + //This is tricky - used only by convertToIGraph in R, because + //it might happen that we delete nodes and have bigger index of the last node + //than actual nodes (especially in the case of utility version of GNG) + unsigned int _getLastNodeIndex() const; + +#endif + +private: + bool m_current_dataset_memory_was_set; + bool m_running_thread_created; + + gmum::gmum_thread * algorithm_thread; + + /** Mutex used for synchronization of graph access*/ + gmum::recursive_mutex grow_mutex; + /** Mutex used for synchronization of graph access*/ + gmum::recursive_mutex database_mutex; + /** Mutex used for synchronization of graph access*/ + gmum::recursive_mutex stat_mutex; + + GNGConfiguration current_configuration; + + std::auto_ptr gngAlgorithm; + std::auto_ptr gngGraph; + std::auto_ptr gngDataset; + + //Called from constructors + void init(GNGConfiguration configuration, std::istream * input_graph = 0); + +private: + /** Run GNG Server - runs in separate thread and returns control + * @note Runs one extra threads for communication. + */ + static void _run(void * server) { + GNGServer * gng_server = (GNGServer*) server; + try { + DBG(gng_server->m_logger,10, "GNGServer::run::proceeding to algorithm"); + gng_server->getAlgorithm().run(); + gng_server->getAlgorithm().runAlgorithm(); + } catch (std::exception & e) { + cerr << "GNGServer::failed _run with " << e.what() << endl; + DBG(gng_server->m_logger,10, e.what()); + } + } + +public: + //legacy code + static GNGServer * constructTestServer(GNGConfiguration config) { + return new GNGServer(config, 0 /*input_graph*/); + } +}; + +#endif +/* GNGSERVER_H */ diff --git a/inst/include/gng/Heap.h b/inst/include/gng/heap.h similarity index 100% rename from inst/include/gng/Heap.h rename to inst/include/gng/heap.h diff --git a/inst/include/gng/UniformGrid.h b/inst/include/gng/uniform_grid.h similarity index 51% rename from inst/include/gng/UniformGrid.h rename to inst/include/gng/uniform_grid.h index 3221b05..635dae5 100644 --- a/inst/include/gng/UniformGrid.h +++ b/inst/include/gng/uniform_grid.h @@ -1,4 +1,3 @@ - /* * File: UniformGrid.h * Author: staszek @@ -9,14 +8,12 @@ #ifndef UNIFORM_GRID_H #define UNIFORM_GRID_H -#include -#include -#include -#include +#include +#include +#include +#include #include - -#include "GNGGlobals.h" -#include "utils/utils.h" +#include using namespace std; @@ -30,6 +27,76 @@ class UniformGrid { typedef VectorContainer NodeArray; public: + UniformGrid(double * origin, int *dim, int gng_dim, double m_grow_factor = + 1.5, double m_density_threshold = 2.0, + double m_density_threshold_min = 0.4, + boost::shared_ptr logger = boost::shared_ptr()); + + UniformGrid(double * origin, double *axis, double l, int gng_dim, + double m_grow_factor = 1.5, double m_density_threshold = 2.0, + double m_density_threshold_min = 0.4, + boost::shared_ptr logger = boost::shared_ptr()); + + vector findNearest(const double *p, int n = 2); + + // Calculates new size given growth factor + long int calculate_new_size(double *origin, double *axis, double l); + + void purge(double *origin, int* dim, double l); + + void purge(double *origin, double *axis, double l); + + void new_l(double l); + + int insert(double *p, T x); + + bool remove(double *p); + + // Calculates if growing the grid will payoff + bool check_grow(); + + // Print the grid + void print3d(); + + double getCellLength() const { + return m_l; + } + + double getDensity() const { + return m_density; + } + + int getCapacity() const { + return SIZE(m_grid); + } + + int getNodes() const { + return m_nodes; + } + + int getDimension(int axis) const { + return m_dim[axis]; + } + + void setDistFunction(double (*dist_fnc)(T, double*)) { + m_dist_fnc = dist_fnc; + } + + ~UniformGrid(); + +private: + // Check if search was successful + bool searchSuccessful(double min_dist = -1); + + void scanCell(int k, double* query); + + void crawl(int current_dim, int fixed_dim); + + bool scanCorners(); + + T find(double *p); + +private: /*** Maximum size pass which UG won't grow */ static const int MAX_SIZE = 1000000; @@ -59,6 +126,7 @@ class UniformGrid { int gng_dim; + //TODO: erase GNG_MAX_DIM double m_axis[GNG_MAX_DIM]; int m_nodes; @@ -71,6 +139,7 @@ class UniformGrid { vector m_neigh; + //TODO: erase GNG_MAX_DIM double m_origin[GNG_MAX_DIM]; int getIndex(int *p) { @@ -98,162 +167,13 @@ class UniformGrid { bool isZero(double x) { return x > -EPS && x < EPS; } - void purge(double *origin, int* dim, double l); unsigned int calculate_cell_side(double axis, double l, int old_dim) { return max(old_dim + 1, (int) ((axis) / (l)) + 1); } -public: - ~UniformGrid() { - delete[] s_center; - delete[] s_pos; - delete[] s_query; - delete[] m_dim; - delete[] m_tmp_int; - } - - //TODO: extract constructor base - UniformGrid(double * origin, int *dim, int gng_dim, double m_grow_factor = - 1.5, double m_density_threshold = 2.0, - double m_density_threshold_min = 0.4, - boost::shared_ptr logger = boost::shared_ptr()) : - m_dist_fnc(0), gng_dim(gng_dim), m_density_threshold( - m_density_threshold), m_density_threshold_min( - m_density_threshold_min), m_grow_factor(m_grow_factor), m_logger( - logger) { - neighbourhood_size = int(pow(3.0, (double) gng_dim)); - - this->m_density_threshold = m_density_threshold; - this->m_density_threshold_min = m_density_threshold_min; - this->m_grow_factor = m_grow_factor; - - s_center = new int[this->gng_dim]; - - s_pos = new int[this->gng_dim]; - s_query = new double[this->gng_dim]; - m_dim = new int[this->gng_dim]; //number of uniform cells along certain axis - m_tmp_int = new int[this->gng_dim]; //avoid alloc on heap all the time in calccell <- one thread! - - //Zero - for (int i = 0; i < this->gng_dim; ++i) - s_center[i] = s_pos[i] = s_query[i] = m_dim[i] = m_tmp_int[i] = 0; - - purge(origin, dim, -1.0); - } - - UniformGrid(double * origin, double *axis, double l, int gng_dim, - double m_grow_factor = 1.5, double m_density_threshold = 2.0, - double m_density_threshold_min = 0.4, - boost::shared_ptr logger = boost::shared_ptr()) : - m_dist_fnc(0), gng_dim(gng_dim), m_density_threshold( - m_density_threshold), m_density_threshold_min( - m_density_threshold_min), m_grow_factor(m_grow_factor), m_logger( - logger) { - neighbourhood_size = int(pow(3.0, (double) gng_dim)); - - s_center = new int[this->gng_dim]; - - s_pos = new int[this->gng_dim]; - s_query = new double[this->gng_dim]; - m_dim = new int[this->gng_dim]; //number of uniform cells along certain axis - m_tmp_int = new int[this->gng_dim]; //avoid alloc on heap all the time in calccell <- one thread! - - //Zero - for (int i = 0; i < this->gng_dim; ++i) - s_center[i] = s_pos[i] = s_query[i] = m_dim[i] = m_tmp_int[i] = 0; - - purge(origin, axis, l); - - } - - /** Calculates new size given growth factor */ - long int calculate_new_size(double *origin, double *axis, double l) { - unsigned long int result = 1; - - REP(i, this->gng_dim) - { - result *= calculate_cell_side(axis[i], l, m_dim[i]); - if (result > UniformGrid::MAX_SIZE) - return -1; - } - return result; - } - - /** Calculates if growing the grid will payoff*/ - bool check_grow() { - unsigned long int result = this->calculate_new_size(m_origin, m_axis, - m_l / m_grow_factor); - - if (result == -1) - return false; - - double avg_density = m_nodes / (double) m_grid.size(); - double new_avg_density = m_nodes - / (double) (this->calculate_new_size(m_origin, m_axis, - m_l / m_grow_factor)); - - DBG(m_logger,2, "avg_desnity = "+to_str(avg_density)); DBG(m_logger,2, "new_avg_density = "+to_str(new_avg_density)); - - return avg_density > m_density_threshold - && new_avg_density > m_density_threshold_min; - } - - void print3d(); - - bool searchSuccessful(double min_dist = -1) { - REP(i, s_search_query) - { - if (s_found_cells[i] == -1 || s_found_cells_dist[i] > min_dist) - return false; - } - return true; - } - - double getCellLength() const { - return m_l; - } - - double getDensity() const { - return m_density; - } - - int getCapacity() const { - return SIZE(m_grid); - } - - int getNodes() const { - return m_nodes; - } - - int getDimension(int axis) const { - return m_dim[axis]; - } - - void new_l(double l); - - void purge(double *origin, double *axis, double l); - - //mutates pos! - - int insert(double *p, T x); - bool remove(double *p); - T find(double *p); - - vector findNearest(const double *p, int n = 2); - - void setDistFunction(double (*dist_fnc)(T, double*)) { - m_dist_fnc = dist_fnc; - } - - //jedna funkcja uzywa m_tmp_int - - void scanCell(int k, double* query); - void crawl(int current_dim, int fixed_dim); - bool scanCorners(); - }; -#include "UniformGrid.hpp" +#include } diff --git a/inst/include/gng/UniformGrid.hpp b/inst/include/gng/uniform_grid.hpp similarity index 68% rename from inst/include/gng/UniformGrid.hpp rename to inst/include/gng/uniform_grid.hpp index e3db991..ae581e7 100644 --- a/inst/include/gng/UniformGrid.hpp +++ b/inst/include/gng/uniform_grid.hpp @@ -28,8 +28,8 @@ void UniformGrid::print3d() { cout << inner_index; //if(TMP[inner_index]) cout<<"::"; cout << ":"; - FOREACH(x, m_grid[inner_index]) - cout << *x << ","; + BOOST_FOREACH(int x, m_grid[inner_index]) + cout << x << ","; cout << "\t"; } @@ -112,19 +112,19 @@ void UniformGrid::scanCell(int k, if (s_search_query != 2) throw "Not implemented for >2 search query.."; - FOREACH(node, m_grid[k]) + BOOST_FOREACH(int node, m_grid[k]) { - dist_candidate = m_dist_fnc(*node, query); + dist_candidate = m_dist_fnc(node, query); // - if (*node != s_found_cells[1] + if (node != s_found_cells[1] && (s_found_cells_dist[0] < 0 || dist_candidate <= s_found_cells_dist[0])) { //Overwrite worst s_found_cells_dist[0] = dist_candidate; - s_found_cells[0] = *node; + s_found_cells[0] = node; //Swap it to the right place for (int j = 1; j < s_search_query; ++j) { @@ -343,7 +343,8 @@ bool UniformGrid::remove(double *p) { //retur int * cell = calculateCell(p); int index = getIndex(cell); - FOREACH(node, m_grid[index]) + for(typename Node::iterator node = m_grid[index].begin(); + node != m_grid[index].end(); ++node) { if (isZero(m_dist_fnc(*node, p))) { m_grid[index].erase(node); @@ -353,3 +354,115 @@ bool UniformGrid::remove(double *p) { //retur } return false; } + +template +UniformGrid::~UniformGrid() { + delete[] s_center; + delete[] s_pos; + delete[] s_query; + delete[] m_dim; + delete[] m_tmp_int; +} + +//TODO: extract constructor base +template + UniformGrid::UniformGrid(double * origin, int *dim, int gng_dim, double m_grow_factor, double m_density_threshold, + double m_density_threshold_min, + boost::shared_ptr logger) : + m_dist_fnc(0), gng_dim(gng_dim), m_density_threshold( + m_density_threshold), m_density_threshold_min( + m_density_threshold_min), m_grow_factor(m_grow_factor), m_logger( + logger) { + neighbourhood_size = int(pow(3.0, (double) gng_dim)); + + this->m_density_threshold = m_density_threshold; + this->m_density_threshold_min = m_density_threshold_min; + this->m_grow_factor = m_grow_factor; + + s_center = new int[this->gng_dim]; + + s_pos = new int[this->gng_dim]; + s_query = new double[this->gng_dim]; + m_dim = new int[this->gng_dim]; //number of uniform cells along certain axis + m_tmp_int = new int[this->gng_dim]; //avoid alloc on heap all the time in calccell <- one thread! + + //Zero + for (int i = 0; i < this->gng_dim; ++i) + s_center[i] = s_pos[i] = s_query[i] = m_dim[i] = m_tmp_int[i] = 0; + + + purge(origin, dim, -1.0); +} + +template +bool UniformGrid::searchSuccessful(double min_dist) { + REP(i, s_search_query) + { + if (s_found_cells[i] == -1 || s_found_cells_dist[i] > min_dist) + return false; + } + return true; + } + +template +UniformGrid::UniformGrid(double * origin, double *axis, double l, int gng_dim, + double m_grow_factor, double m_density_threshold, + double m_density_threshold_min, + boost::shared_ptr logger) : + m_dist_fnc(0), gng_dim(gng_dim), m_density_threshold( + m_density_threshold), m_density_threshold_min( + m_density_threshold_min), m_grow_factor(m_grow_factor), m_logger( + logger) { + neighbourhood_size = int(pow(3.0, (double) gng_dim)); + + s_center = new int[this->gng_dim]; + + s_pos = new int[this->gng_dim]; + s_query = new double[this->gng_dim]; + m_dim = new int[this->gng_dim]; //number of uniform cells along certain axis + m_tmp_int = new int[this->gng_dim]; //avoid alloc on heap all the time in calccell <- one thread! + + //Zero + for (int i = 0; i < this->gng_dim; ++i) + s_center[i] = s_pos[i] = s_query[i] = m_dim[i] = m_tmp_int[i] = 0; + + purge(origin, axis, l); + +} + + +template +long int UniformGrid::calculate_new_size(double *origin, double *axis, double l) { + unsigned long int result = 1; + + REP(i, this->gng_dim) + { + result *= calculate_cell_side(axis[i], l, m_dim[i]); + if (result > UniformGrid::MAX_SIZE) + return -1; + } + return result; +} + + +template +bool UniformGrid::check_grow() { + unsigned long int result = this->calculate_new_size(m_origin, m_axis, + m_l / m_grow_factor); + + if (result == -1) + return false; + + double avg_density = m_nodes / (double) m_grid.size(); + double new_avg_density = m_nodes + / (double) (this->calculate_new_size(m_origin, m_axis, + m_l / m_grow_factor)); + + DBG(m_logger,2, "avg_desnity = "+to_str(avg_density)); DBG(m_logger,2, "new_avg_density = "+to_str(new_avg_density)); + + return avg_density > m_density_threshold + && new_avg_density > m_density_threshold_min; +} + + + diff --git a/inst/include/utils/circular_buffer.h b/inst/include/utils/circular_buffer.h index 63296a6..ee58953 100644 --- a/inst/include/utils/circular_buffer.h +++ b/inst/include/utils/circular_buffer.h @@ -386,7 +386,7 @@ class circular_buffer reference at_checked(size_type index) const { - if (size >= contents_size_) + if (size() >= contents_size_) { throw std::exception(); } diff --git a/inst/include/utils/log.hpp b/inst/include/utils/log.hpp new file mode 100644 index 0000000..c324009 --- /dev/null +++ b/inst/include/utils/log.hpp @@ -0,0 +1,62 @@ +#ifndef LOG_HPP +#define LOG_HPP + +#include +#include +#include + +#ifdef DEBUG +#define LOG(logger, level, text) logger.log(level, text); +#define DBG(logger, level, text) logger.log(level, text); +#else +#define LOG(logger, level, text) logger.log(level, text); +#define DBG(logger, level, text) +#endif + +class LogLevel { +public: + + static const int NO_LOGGING = 0; + static const int FATAL = 1; + static const int ERR = 2; + static const int WARNING = 3; + static const int INFO = 4; + static const int DEBUG = 5; + static const int TRACE = 6; + +}; + +class Logger { +public: + + int verbosity; + + Logger() { + verbosity = LogLevel::INFO; + } + + void log(int level, std::string text) { + if (level <= verbosity) { + std::cout << text << std::endl << std::flush; + } + } + + void log(int level, int number) { + if (level <= verbosity) { + std::stringstream ss; + ss << number; + std::cout << ss.str() << std::endl << std::flush; + } + } + + void log(int level, double number) { + if (level <= verbosity) { + std::stringstream ss; + ss << number; + std::cout << ss.str() << std::endl << std::flush; + } + } + +}; + +#endif /* LOG_HPP */ diff --git a/inst/include/utils/threading.h b/inst/include/utils/threading.h index 536d8d9..f98de4c 100644 --- a/inst/include/utils/threading.h +++ b/inst/include/utils/threading.h @@ -10,7 +10,7 @@ #ifndef THREADING_H #define THREADING_H -#include "utils/tinythread.h" //TODO: path problems +#include "utils/tinythread.h" #include "utils/fast_mutex.h" namespace gmum{ diff --git a/inst/include/utils/tinythread.h b/inst/include/utils/tinythread.h index ae43337..14afc30 100644 --- a/inst/include/utils/tinythread.h +++ b/inst/include/utils/tinythread.h @@ -631,7 +631,9 @@ namespace chrono { /// Construct a duration object with the given duration. template - explicit duration(const _Rep2& r) : rep_(r) {}; + explicit duration(const _Rep2& r) : rep_(r) { + + } /// Return the value of the duration object. rep count() const diff --git a/inst/include/utils/utils.h b/inst/include/utils/utils.h index 953dd84..75ce18b 100644 --- a/inst/include/utils/utils.h +++ b/inst/include/utils/utils.h @@ -10,16 +10,16 @@ #include #include +#include "boost/foreach.hpp" using namespace std; typedef vector VI; -typedef long long LL; -#define FOR(x, b, e) for(int x=b; x<=(e); ++x) -#define FORD(x, b, e) for(int x=b; x>=(e); ––x) -#define REP(x, n) for(int x=0; x<(n); ++x) +#define FOR(x, b, e) for(size_t x=b; x<=(e); ++x) +#define FORD(x, b, e) for(size_t x=b; x>=(e); ––x) +#define REP(x, n) for(size_t x=0; x<(n); ++x) #define VAR(v,n) typeof(n) v=(n) #define ALL(c) c.begin(),c.end() #define SIZE(x) (int)(x).size() -#define FOREACH(i,c) for(VAR(i,(c).begin());i!=(c).end();++i) +#define FOREACH(i,c) BOOST_FOREACH(i, c) //for(VAR(i,(c).begin());i!=(c).end();++i) #define IWAS(x) cout< #include #include - -//c-like random functions +#include #include #include #include -#ifdef DEBUG_GMUM_2 -#define DBG_2(logger, level, text) logger->log(level, text); -#define REPORT_2(x) cerr<<#x<<"="<<(x)<log(level, text); + #define REPORT_2(x) cerr<<#x<<"="<<(x)<log(level, text); -#define REPORT(x) cout<<#x<<"="<<(x)<log(level, text); + #define REPORT(x) cout<<#x<<"="<<(x)< , for example c(0.3, 0.6, 0.7, 100, 0.7)} +\item{training}{Can be either gng.train.offline(max.iter, min.improvement), or gng.train.online()} \item{lambda}{Every lambda iteration is added new vertex. Default 200} -\item{load_model_filename}{Set to path to file from which load serialized model} - -\item{uniformgrid.optimization}{TRUE/FALSE. You cannot use utility option with this, so please pass FALSE here then.} - -\item{lazyheap.optimization}{TRUE/FALSE. You cannot use utility option with this, so please pass FALSE here then.} - -\item{max.nodes}{Maximum number of nodes (after reaching this size it will continue running, but won't add new nodes)} - -\item{dataset.type}{Dataset type. Possibilities gng.dataset.bagging, gng.dataset.bagging.prob, gng.dataset.sequential} +\item{k}{Utility constant, by default turned off. Good value is 1.3. Constant controlling speed of erasing obsolete nodes, see http://sund.de/netze/applets/gng/full/tex/DemoGNG/node20.html} } \description{ Construct GNG object } \examples{ -# Default GNG instance, without optimitzations and vertex dimensionality 3 - -gng <- GNG(dataset_type=gng.dataset.bagging.prob, max_nodes=max_nodes, dim=3) - -# Construct GNG loaded from file with uniform grid - -gng <- GNG(dataset_type=gng.dataset.bagging.prob, max_nodes=max_nodes, dim=3, -uniformgrid_optimization=TRUE, lazyheap_optimization=FALSE, -uniformgrid_boundingbox_sides=c(3,3,3), uniformgrid_boundingbox_origin=c(-0.5,-0.5,-0.5), -load_model_filename="sphere_simple.bin") +data(wine, package="rattle") +scaled.wine <- scale(wine[-1]) +# Train in an offline manner +gng <- GNG(scaled.wine, labels=wine$Type, max.nodes=20) +# Plot +plot(gng, mode=gng.plot.2d.cluster) + +# Train in an online manner with utility (erasing obsolete nodes) +gng <- GNG(scaled.wine, labels=wine$Type, max.nodes=20, training=gng.train.online(), k=1.3) +insertExamples(gng, scale(wine[-1]) +run(gng) +Sys.sleep(10) +terminate(gng) +# Plot +plot(gng, mode=gng.plot.2d.cluster) } diff --git a/man/OptimizedGNG.Rd b/man/OptimizedGNG.Rd new file mode 100644 index 0000000..48ec9fa --- /dev/null +++ b/man/OptimizedGNG.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{OptimizedGNG} +\alias{OptimizedGNG} +\title{Constructor of Optimized GrowingNeuralGas object.} +\usage{ +OptimizedGNG(x = NULL, labels = c(), beta = 0.99, alpha = 0.5, + max.nodes = 1000, eps.n = 6e-04, eps.w = 0.05, max.edge.age = 200, + training = gng.train.offline(), lambda = 200, verbosity = 0, + value.range = c(0, 1)) +} +\arguments{ +\item{beta}{coefficient. Decrease the error variables of all node nodes by this fraction. Forgetting rate. Default 0.99} + +\item{alpha}{Alpha coefficient. Decrease the error variables of the nodes neighboring to the newly inserted node by this fraction. Default 0.5} + +\item{max.nodes}{Maximum number of nodes (after reaching this size it will continue running, but won't add new nodes)} + +\item{eps.n}{Default 0.0006. How strongly adapt neighbour node} + +\item{eps.w}{Default 0.05. How strongly adapt winning node} + +\item{training}{Can be either gng.train.offline(max.iter, min.improvement), or gng.train.online()} + +\item{lambda}{Every lambda iteration is added new vertex. Default 200} + +\item{value.range}{Default [0,1]. All example features should be in this range} +} +\description{ +Construct simplified and optimized GNG object. Can be used to train offline, or online. Data dimensionality shouldn't be too big, if +it is consider using dimensionality reduction (for instance PCA). Cannot use utility. +} +\examples{ +# Train online optimizedGNG. All values in this dataset are in the range (-4.3, 4.3) +data(wine, package="rattle") +gng <- OptimizedGNG(training = gng.train.online(), value.range=c(min(scale(wine[-1])),max(scale(wine[-1]))), max.nodes=20) +insertExamples(gng, scale(wine[-1])) +run(gng) +Sys.sleep(10) +pause(gng) +} + diff --git a/man/clustering-methods.Rd b/man/clustering-methods.Rd new file mode 100644 index 0000000..11344ac --- /dev/null +++ b/man/clustering-methods.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\docType{methods} +\name{clustering.gng} +\alias{clustering} +\alias{clustering.gng} +\title{clustering} +\usage{ +clustering(gng) +} +\description{ +Gets vector with node indexes assigned to examples in the dataset +} +\examples{ +clustering(gng) +} + diff --git a/man/convert_igraph-methods.Rd b/man/convertToGraph-methods.Rd similarity index 61% rename from man/convert_igraph-methods.Rd rename to man/convertToGraph-methods.Rd index 6bfe7d8..9fe2285 100644 --- a/man/convert_igraph-methods.Rd +++ b/man/convertToGraph-methods.Rd @@ -1,19 +1,19 @@ % Generated by roxygen2 (4.0.2): do not edit by hand \docType{methods} -\name{convert_igraph.gng} -\alias{convert_igraph} -\alias{convert_igraph.gng} -\title{convert_igraph} +\name{convertToGraph.gng} +\alias{convertToGraph} +\alias{convertToGraph.gng} +\title{convertToGraph} \format{\preformatted{ NULL }} \usage{ -convert_igraph(gng) +convertToGraph(gng) } \description{ Converts to igraph (O(n) method, writing intermediately to disk) } \examples{ -convert_igraph(gng) +convertToGraph(gng) } \keyword{datasets} diff --git a/man/dump_model-methods.Rd b/man/dump_model-methods.Rd deleted file mode 100644 index d28dc82..0000000 --- a/man/dump_model-methods.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\docType{methods} -\name{dump_model.gng} -\alias{dump_model} -\alias{dump_model.gng} -\title{dump_model} -\usage{ -dump_model(gng) -} -\arguments{ -\item{filename}{Dump destination} -} -\description{ -Writes graph to a disk space efficient binary format. It can be used in GNG constructor -to reconstruct graph from file. -} -\details{ -Dump model to binary -} -\examples{ -dump_model(gng, 'graph.bin') -} - diff --git a/man/errorStatistics-methods.Rd b/man/errorStatistics-methods.Rd new file mode 100644 index 0000000..f40b320 --- /dev/null +++ b/man/errorStatistics-methods.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\docType{methods} +\name{errorStatistics.gng} +\alias{errorStatistics} +\alias{errorStatistics.gng} +\title{errorStatistics} +\usage{ +errorStatistics(gng) + +errorStatistics(gng) +} +\description{ +Gets vector with errors for every second of execution + +Gets vector with errors for every second of execution +} +\examples{ +errorStatistics(gng) +errorStatistics(gng) +} + diff --git a/man/error_statistics-methods.Rd b/man/error_statistics-methods.Rd deleted file mode 100644 index ba23b0e..0000000 --- a/man/error_statistics-methods.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\docType{methods} -\name{error_statistics.gng} -\alias{error_statistics} -\alias{error_statistics.gng} -\title{error_statistics} -\usage{ -error_statistics(gng) -} -\description{ -Gets vector with errors for every second of execution -} -\examples{ -error_statistics(gng) -} - diff --git a/man/insertExamples-methods.Rd b/man/insertExamples-methods.Rd new file mode 100644 index 0000000..6c3d37f --- /dev/null +++ b/man/insertExamples-methods.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\docType{methods} +\name{insertExamples.gng} +\alias{insertExamples} +\alias{insertExamples.gng} +\title{insertExamples} +\usage{ +insertExamples(gng, M, L=c()) +} +\arguments{ +\item{examples}{Matrix with examples} +} +\description{ +Insert (inefficiently) examples to algorithm dataset. For +efficient insertion use gng$inser_examples +} +\examples{ +#Add preset examples +M = generateExamples(preset=gng.preset.sphere) +insertExamples(gng, M) +} + diff --git a/man/insert_examples-methods.Rd b/man/insert_examples-methods.Rd deleted file mode 100644 index 332b726..0000000 --- a/man/insert_examples-methods.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\docType{methods} -\name{insert_examples.gng} -\alias{insert_examples} -\alias{insert_examples.gng} -\title{insert_examples} -\usage{ -insert_examples(gng, M) -} -\arguments{ -\item{examples}{Matrix with examples of dimensionality N rows x C columns, where -C columns = dim (passed as parameter to GNG object) + 1 or 0 (1 if vertex_extra_data is TRUE) -+ 1 or 0 (1 if dataset_type=gng.dataset.bagging.prob).} - -\item{preset}{Use only if you are adding exemplary dataset. Possibilities: gng.preset.sphere, -gng.preset.box. You can specify preset params: N=1000, center=c(0.5,0.5,0.5), prob=-1.} -} -\description{ -Insert (inefficiently) examples to algorithm dataset. For -efficient insertion use gng$inser_examples, and for setting memory pointer -use gng$set_memory_move_examples -} -\note{ -Complicated memory layout of the matrix is due to need for memory efficiency. -In the future versions you can expect wrapper simplifying addition and also -streaming from disk file -} -\examples{ -#Add preset examples with probability of being sampled (this assumed GNG was created with gng.dataset.bagging.prob dataset) -insert_examples(gng, preset=gng.preset.sphere) - -#Insert efficiently examples -M <- matrix(0, ncol=3, nrow=10) -M[1,] = c(4,5,6) -gng$insert_examples(M) - -#Set memory of the algorithm to point in memory. Note: you cannot remove this matrix while -in execution or you will experience memory error -gng$set_memory_move_examples(M) -} - diff --git a/man/load.gng-methods.Rd b/man/load.gng-methods.Rd new file mode 100644 index 0000000..3520d37 --- /dev/null +++ b/man/load.gng-methods.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\docType{methods} +\name{load.gng} +\alias{load.gng} +\title{load.gng} +\usage{ +load.gng(gng) +} +\arguments{ +\item{filename}{Dump location} +} +\description{ +Writes model to a disk space efficient binary format. +} +\details{ +Load model from binary format +} +\examples{ +load.gng('model.bin') +} + diff --git a/man/mean_error-methods.Rd b/man/meanError-methods.Rd similarity index 60% rename from man/mean_error-methods.Rd rename to man/meanError-methods.Rd index 14a8888..6968b54 100644 --- a/man/mean_error-methods.Rd +++ b/man/meanError-methods.Rd @@ -1,16 +1,16 @@ % Generated by roxygen2 (4.0.2): do not edit by hand \docType{methods} -\name{mean_error.gng} -\alias{mean_error} -\alias{mean_error.gng} -\title{mean_error} +\name{meanError.gng} +\alias{meanError} +\alias{meanError.gng} +\title{meanError} \usage{ -mean_error(gng) +meanError(gng) } \description{ Gets mean error of the graph (note: blocks the execution, O(n)) } \examples{ -mean_error(gng) +meanError(gng) } diff --git a/man/plot-methods.Rd b/man/plot-methods.Rd index 7769e63..c7aa669 100644 --- a/man/plot-methods.Rd +++ b/man/plot-methods.Rd @@ -8,7 +8,7 @@ plot(gng) } \arguments{ \item{vertex.color}{how to color vertexes. Possible values: gng.plot.color.cluster(vertex color is set to fastgreedy.community clustering), -gng.plot.color.extra(rounds to integer extra dim if present), gng.plot.color.none(every node is white),} +gng.plot.color.label(rounds to integer label if present), gng.plot.color.none(every node is white),} \item{layout}{layout to be used when plotting. Possible values: gng.plot.layour.igraph.v2d (first two dimensions), gng.plot.layout.igraph.auto (auto layout from igraph) gng.plot.layout.igraph.fruchterman.fast (fast fruchterman reingold layout),or any function accepting igraph graph and returning layout} @@ -24,8 +24,7 @@ Plot GNG } \note{ If you want to "power-use" plotting and plot for instance a subgraph, you might be interested in -exporting igraph with convert_igraph function and plotting it using/reusing function from this package: -.visualizeIGraph2d +exporting igraph with convertToGraph function } \examples{ # Plots igraph using first 2 coordinates and colors according to clusters diff --git a/man/save.gng-methods.Rd b/man/save.gng-methods.Rd new file mode 100644 index 0000000..b184f23 --- /dev/null +++ b/man/save.gng-methods.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\docType{methods} +\name{save.gng} +\alias{save.gng} +\title{save.gng} +\usage{ +save.gng(gng) +} +\arguments{ +\item{filename}{Dump destination} +} +\description{ +Writes model to a disk space efficient binary format. +} +\details{ +Save model to binary format +} +\examples{ +save.gng(gng, 'graph.bin') +} + diff --git a/src/Makevars b/src/Makevars index d6823d2..5b8e825 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,22 +1,23 @@ -SOURCES = $(wildcard utils/*.cpp gng/*.cpp) -OBJECTS=$(SOURCES:.cpp=.o) - -#OBJECTS go to different folder depending on the target -#OBJECTS2=$(notdir $(SOURCES)) -#OBJECTS3=$(OBJECTS2:.cpp=.o) -#OBJECTS4=$(addprefix ../build/release/,$(OBJECTS3)) - -R_HOME := $(shell R RHOME) -DEBUG := -DNDEBUG_GMUM -DNDEBUG_GMUM_2 -GNG_FLAGS := -D RCPP_INTERFACE - -CPPFLAGS := $(shell $(R_HOME)/bin/R CMD config --cppflags) -RLDFLAGS := $(shell $(R_HOME)/bin/R CMD config --ldflags) -RCPPINCL := $(shell $(R_HOME)/bin/Rscript -e "Rcpp:::CxxFlags()") -RCPPLIBS := $(shell $(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()") -RCPPARMAINCL := $(shell $(R_HOME)/bin/Rscript -e "RcppArmadillo:::CxxFlags()") - -PKG_CPPFLAGS := -O4 $(DEBUG) -I../inst/include -I../inst/include/gng -I../inst/include/utils -I. $(CPPFLAGS) $(RCPPINCL) $(RCPPARMAINCL) -PKG_CXXFLAGS := -O4 $(GNG_FLAGS) $(DEBUG) $(RCPPFLAGS) $(RCPPINCL) $(RCPPARMAINCL) $(RINSIDEINCL) $(shell $(R_HOME)/bin/R CMD config CXXFLAGS) -g -pthread -fpic -PKG_LIBS := -O4 $(DEBUG) $(RLDFLAGS) $(RRPATH) $(RBLAS) $(RLAPACK) $(RCPPLIBS) $(RLDFLAGS) $(RINSIDELIBS) -lpthread +# Note!: R is overwriting all optimization flags, if you are a dev you can change flags in ~/.R/Makevars + +CPP_SOURCES := $(wildcard utils/*.cpp gng/*.cpp) +SOURCES := $(CPP_SOURCES) +INCLUDES := -I ../inst/include/utils -I ../inst/include -I ../inst/include/gng + +OBJECTS = $(CPP_SOURCES:.cpp=.o) + +DEBUG := -DNDEBUG_GMUM -DNDEBUG_GMUM_2 +PREPROCESS := $(DEBUG) -DARMA_DONT_USE_CXX11 -DRCPP_INTERFACE + +GCC_STD := + +R_HOME := $(shell R RHOME) +R_LIBS := $(shell $(R_HOME)/bin/R CMD config --ldflags) $(shell echo 'Rcpp:::LdFlags()' | $(R_HOME)/bin/R --vanilla --slave) +R_CPPFLAGS := $(shell $(R_HOME)/bin/R CMD config --cppflags) $(shell echo 'Rcpp:::CxxFlags()' | $(R_HOME)/bin/R --vanilla --slave) $(shell echo 'RcppArmadillo:::CxxFlags()' | $(R_HOME)/bin/R --vanilla --slave) +LAPACK_LIBS := $(shell ${R_HOME}/bin/R CMD config LAPACK_LIBS) +BLAS_LIBS := $(shell ${R_HOME}/bin/R CMD config BLAS_LIBS) + +PKG_CPPFLAGS := $(GCC_STD) $(PREPROCESS) $(R_CPPFLAGS) $(INCLUDES) -pthread -mtune=native +PKG_LIBS := $(LAPACK_LIBS) $(BLAS_LIBS) $(R_LIBS) + diff --git a/src/gng/GNGAlgorithm.cpp b/src/gng/GNGAlgorithm.cpp deleted file mode 100644 index 84cc36e..0000000 --- a/src/gng/GNGAlgorithm.cpp +++ /dev/null @@ -1,749 +0,0 @@ -/* - * File: GNGAlgorithm.cpp - * Author: staszek "kudkudak" jastrzebski gmail.com> - * - * Created on 11 sierpień 2012, 10:02 - */ - -//TODO: refactor getExample - -#include "GNGAlgorithm.h" -#include - -using namespace gmum; -using namespace std; - -namespace gmum { - -GNGNode ** GNGAlgorithm::LargestErrorNodesLazy() { - GNGNode ** largest = new GNGNode*[2]; - GNGNode * gng_node; - - FOREACH(it, errorHeap.getLazyList()){ - gng_node = &m_g[*it]; - errorHeap.insert(gng_node->nr, gng_node->error); - } - - errorHeap.getLazyList().clear(); - - ErrorNode max; - //Extract max until you get correct one (that is not lazy) - do { - max = errorHeap.extractMax(); - - DBG_2(m_logger, 4, "GNGAlgorithm::LargestErrorLazy::found max " + to_string(max.i)); - - GNGNode * gng_node = &m_g[max.i]; - if (gng_node->error_cycle != c) { - fixErrorNew(gng_node); - errorHeap.update(gng_node->nr, gng_node->error); - } else { - largest[0] = gng_node; - int j = 0; - double error = 0.0; - DBG_2(m_logger, 4, "GNGAlgorithm::LargestErrorLazy::found max " + to_string(max.i)); - - FOREACH(edg, *largest[0]) { - ++j; - fixErrorNew(&m_g[(*edg)->nr]); - - if (j == 1) { - largest[1] = &m_g[(*edg)->nr]; - error = largest[1]->error; - ; - continue; - } - - double new_error = m_g[(*edg)->nr].error; - - if (error < new_error) { - error = new_error; - largest[1] = &m_g[(*edg)->nr]; - } - } - break; - } - - - - } while (1); - DBG_2(m_logger, 3, "GNGAlgorithm::LargestErrorLazy::returning"); - return largest; - -} - -GNGGraph* GNGGraphAccessHack::pool = 0; - -GNGAlgorithm::GNGAlgorithm(GNGGraph * g, GNGDataset* db, - double * boundingbox_origin, double * boundingbox_axis, double l, - int max_nodes, int max_age, double alpha, double betha, double lambda, - double eps_w, double eps_n, int dim, bool uniformgrid_optimization, - bool lazyheap_optimization, unsigned int utility_option, - double utility_k, boost::shared_ptr logger) : - m_g(*g), g_db(db), c(0), s(0), m_max_nodes(max_nodes), m_max_age( - max_age), m_alpha(alpha), m_betha(betha), m_lambda(lambda), m_eps_w( - eps_w), m_eps_n(eps_n), m_density_threshold(0.1), m_grow_rate( - 1.5), errorHeap(), dim(dim), m_toggle_uniformgrid( - uniformgrid_optimization), m_toggle_lazyheap( - lazyheap_optimization), running(false), m_utility_option( - utility_option), m_mean_error(1000), m_utility_k(utility_k), m_logger( - logger), m_iteration(0) { - - DBG(m_logger, 1, "GNGAlgorithm:: Constructing object");DBG(m_logger, 10, "GNGAlgorithm::Constructed object with utility "+to_string(utility_option)+" "+to_string(utility_k)); - - if (m_toggle_uniformgrid) { - ug = new UniformGrid, Node, int>(boundingbox_origin, boundingbox_axis, l, dim, m_grow_rate, m_density_threshold, 0.4, m_logger); - - GNGGraphAccessHack::pool = &m_g; - - ug->setDistFunction(GNGGraphAccessHack::dist); - - // Restore uniform grid state - int maximum_index = m_g.get_maximum_index(); - REP(i, maximum_index + 1) - if (m_g.existsNode(i)) - ug->insert(m_g[i].position, m_g[i].nr); - - } - - if (m_toggle_lazyheap) { - int maximum_index = m_g.get_maximum_index(); - REP(i, maximum_index + 1) - if (m_g.existsNode(i)) - setErrorNew(&m_g[i], m_g[i].error); - } - - m_betha_powers_size = m_lambda * 10; - m_betha_powers = new double[m_betha_powers_size]; - - REP(i, m_betha_powers_size) - m_betha_powers[i] = std::pow(m_betha, (double) (i)); - - m_betha_powers_to_n_length = m_max_nodes * 2; - m_betha_powers_to_n = new double[m_max_nodes * 2]; - - REP(i, m_max_nodes * 2) - m_betha_powers_to_n[i] = std::pow(m_betha, m_lambda * (double) (i)); - DBG(m_logger, 1, "GNGAlgorithm:: Constructed object"); -} - -void GNGAlgorithm::randomInit() { - - DBG(m_logger, 3, "randomInit::Drawing examples"); - - int ex1 = g_db->drawExample(); - int ex2 = g_db->drawExample(); - - DBG(m_logger, 3, "randomInit::Drawn 2"); - int index = 0; - while (ex2 == ex1 && index < 100) { - ++index; - ex2 = g_db->drawExample(); - } - DBG(m_logger, 3, "randomInit::database_size = "+to_string(g_db->size()));DBG(m_logger, 3, "randomInit::drawn "+to_string(ex1)+" "+to_string(ex2)); - - const double * ex1_ptr = g_db->getPosition(ex1); - const double * ex1_extra_ptr = g_db->getExtraData(ex1); - const double * ex2_ptr = g_db->getPosition(ex2); - const double * ex2_extra_ptr = g_db->getExtraData(ex2); - - m_g.newNode(ex1_ptr); - m_g.newNode(ex2_ptr); - - if (ex1_extra_ptr) - m_g[0].extra_data = ex1_extra_ptr[0]; - if (ex2_extra_ptr) - m_g[1].extra_data = ex2_extra_ptr[0]; - - DBG(m_logger, 3, "randomInit::created nodes graph size="+to_string(m_g.get_number_nodes())); - -#ifdef GMUM_DEBUG_2 - assert(m_g.get_number_nodes()==2); -#endif - - if (m_toggle_uniformgrid) { - ug->insert(m_g[0].position, 0); - ug->insert(m_g[1].position, 1); - } - - if (m_toggle_lazyheap) { - setErrorNew(&m_g[0], 0.0); - setErrorNew(&m_g[1], 0.0); - } - if (this->m_utility_option == BasicUtility) { - setUtility(0, 0.001); - setUtility(1, 0.001); - } -} - -void GNGAlgorithm::addNewNode() { - using namespace std; - - DBG(m_logger, 4, "GNGAlgorith::AddNewNode::start search"); - - if (m_toggle_lazyheap) - DBG(m_logger, 4, "GNGAlgorithm::AddNewNode:: "+to_string(m_toggle_lazyheap)+" : )= toggle_lazyheap"); - - GNGNode ** error_nodes_new; - - if (m_toggle_lazyheap) - error_nodes_new = LargestErrorNodesLazy(); - else - error_nodes_new = LargestErrorNodes(); - - DBG_2(m_logger, 4, "GNGAlgorith::AddNewNode::search completed"); - - if (!error_nodes_new[0] || !error_nodes_new[1]) - return; - - DBG_2(m_logger, 4, "GNGAlgorith::AddNewNode::search completed and successful"); - - if (m_max_nodes <= m_g.get_number_nodes()) { - DBG(m_logger, 4, "GNGAlgorith::AddNewNode:: achieved maximum number of nodes"); - delete[] error_nodes_new; - return; - } - - double position[this->dim]; //param - - //TODO: < GNG_DIM? - for (int i = 0; i < this->dim; ++i) //param - position[i] = (error_nodes_new[0]->position[i] + error_nodes_new[1]->position[i]) / 2; - - //In case pool has been reallocated - int er_nr1 = error_nodes_new[0]->nr, er_nr2 = error_nodes_new[1]->nr; - int new_node_index = m_g.newNode(&position[0]); - error_nodes_new[0] = &m_g[er_nr1]; - error_nodes_new[1] = &m_g[er_nr2]; - - //Vote for extra data - m_g[new_node_index].extra_data = (error_nodes_new[0]->extra_data - + error_nodes_new[1]->extra_data) / 2.0; - - if (m_toggle_uniformgrid) - ug->insert(m_g[new_node_index].position, new_node_index); - - DBG_2(m_logger, 4, "GNGAlgorith::AddNewNode::added " + to_string(m_g[new_node_index])); - - m_g.removeUDEdge(error_nodes_new[0]->nr, error_nodes_new[1]->nr); - - DBG_2(m_logger, 3, "GNGAlgorith::AddNewNode::removed edge beetwen " + - to_string(error_nodes_new[0]->nr) + " and" + to_string(error_nodes_new[1]->nr)); - DBG(m_logger, 2, "GNGAlgorithm::AddNewNode::largest error node after removing edge : " - + to_string(*error_nodes_new[0])); - - m_g.addUDEdge(error_nodes_new[0]->nr, new_node_index); - - m_g.addUDEdge(new_node_index, error_nodes_new[1]->nr); - - DBG_2(m_logger, 3, "GNGAlgorith::AddNewNode::add edge beetwen " + - to_string(error_nodes_new[0]->nr) + " and" + to_string(new_node_index)); - - if (!m_toggle_lazyheap) { - decreaseError(error_nodes_new[0]); - decreaseError(error_nodes_new[1]); - setError(&m_g[new_node_index], - (error_nodes_new[0]->error + error_nodes_new[1]->error) / 2); - } else { - decreaseErrorNew(error_nodes_new[0]); - decreaseErrorNew(error_nodes_new[1]); - setErrorNew(&m_g[new_node_index], - (error_nodes_new[0]->error + error_nodes_new[1]->error) / 2); - } - - if (this->m_utility_option == BasicUtility) - this->setUtility(new_node_index,0.5 * (getUtility(error_nodes_new[0]->nr) + getUtility(error_nodes_new[1]->nr))); - - delete[] error_nodes_new; - DBG_2(m_logger, 3, "GNGAlgorith::AddNewNode::delete done"); -} - -int GNGAlgorithm::predict(const std::vector & ex) { - - if (m_g.get_number_nodes() == 0) - return -1; //No node - - if (ex.size() != g_db->getGNGDim()) - throw BasicException("Wrong example dimensionality"); - - - if (m_toggle_uniformgrid) { - std::vector nearest_index = ug->findNearest(&ex[0], 2); //TwoNearestNodes(ex->position); - return nearest_index[0]; - } else { - GNGNode ** tmp = TwoNearestNodes(&ex[0]); - int ret_index = tmp[0]->nr; - delete[] tmp; - return ret_index; - } -} - -double GNGAlgorithm::adapt(const double * ex, const double * extra) { - DBG_2(m_logger, 4, "GNGAlgorith::Adapt::commence search"); - - GNGNode * nearest[2]; - - if (m_toggle_uniformgrid) { - - DBG(m_logger, 1, "GNGAlgorithm::Adapt::Graph size "+to_string(m_g.get_number_nodes())); - - std::vector nearest_index = ug->findNearest(ex, 2); //TwoNearestNodes(ex->position); - - DBG(m_logger, 1, "GNGAlgorithm::Adapt::Found nearest"); - -#ifdef GMUM_DEBUG_2 - if (nearest_index[0] == nearest_index[1]) { - cerr<<"Adapt::Found same nearest_indexes!~! "+to_string(nearest_index[0])<position, ex); - double error1 = m_g.get_dist(nearest[1]->position, ex); - - if(error1 < error0) - throw BasicException("Failed UG - returned nodes in reversed order ?"); - -#endif - - } else { - DBG(m_logger, 1, "GNGAlgorith::Adapt::calling TwoNearestNodes"); - - GNGNode ** tmp = TwoNearestNodes(ex); - DBG(m_logger, 1, "GNGAlgorith::Adapt::found TwoNearestNodes"); - - nearest[0] = tmp[0]; - nearest[1] = tmp[1]; - delete[] tmp; - } - - - DBG_2(m_logger, 4, "GNGAlgorith::Adapt::found nearest nodes to the drawn example " + to_string(*nearest[0]) + " " + to_string(*nearest[1])); - - double error = m_g.get_dist(nearest[0]->position, ex); - - if (this->m_utility_option == BasicUtility) { - - DBG(m_logger, 4, "GNGAlgorithm::Adapt::setting utility"); - - double error_2 = m_g.get_dist(nearest[1]->position, ex); - - this->setUtility(nearest[0]->nr, this->getUtility(nearest[0]->nr) + error_2 - error); - } - - DBG_2(m_logger, 3, "GNGAlgorith::Adapt::increasing error"); - - if (!m_toggle_lazyheap) - increaseError(nearest[0], error); - else - increaseErrorNew(nearest[0], error); - - DBG_2(m_logger, 3, "GNGAlgorith::Adapt::accounted for the error"); - - if (m_toggle_uniformgrid) - ug->remove(nearest[0]->position); - for (int i = 0; i < this->dim; ++i) - nearest[0]->position[i] += m_eps_w * (ex[i] - nearest[0]->position[i]); - - //Adapt to extra dimensionality if present (TODO: refactor) - if (extra) - nearest[0]->extra_data = (nearest[0]->extra_data + extra[0]) / 2.0; - - if (m_toggle_uniformgrid) - ug->insert(nearest[0]->position, nearest[0]->nr); - - if (nearest[0]->edgesCount) { - FOREACH(edg, *nearest[0]){ - if (m_toggle_uniformgrid) - ug->remove(m_g[(*edg)->nr].position); - - for (int i = 0; i < this->dim; ++i) { //param accounting - m_g[(*edg)->nr].position[i] += m_eps_n - * (ex[i] - m_g[(*edg)->nr].position[i]); - } - - //Adapt to extra dimensionality if present (TODO: refactor) - if (extra) { - m_g[(*edg)->nr].extra_data = (0.9 * m_g[(*edg)->nr].extra_data - + extra[0] * 0.1); - } - - if (m_toggle_uniformgrid) - ug->insert(m_g[(*edg)->nr].position, (*edg)->nr); - } - } - - DBG(m_logger, 4, "GNGAlgorith::Adapt::position of the winner and neighbour mutated"); - - if (!m_g.isEdge(nearest[0]->nr, nearest[1]->nr)) { - m_g.addUDEdge(nearest[0]->nr, nearest[1]->nr); - DBG(m_logger, 4, "GNGAlgorith::Adapt::added edge beetwen " + to_string(nearest[0]->nr) + " and " + to_string(nearest[1]->nr)); - } - - bool BYPASS = false; - - DBG_2(m_logger, 4, "GNGAlgorith::Adapt::commence scan of edges"); - - //TODO: assuming here GNGNode not any arbitrary node :/ - GNGNode::EdgeIterator edg = nearest[0]->begin(); - while (edg != nearest[0]->end()) { - DBG(m_logger, 2, "Currently on edge to"+to_string((*edg)->nr)); - - (*edg)->age++; - (((*edg)->rev))->age++; - - if ((*edg)->nr == nearest[1]->nr) { - (*edg)->age = 0; - (((*edg)->rev))->age = 0; - } - - if ((*edg)->age > m_max_age) { - - DBG(m_logger, 3, "GNGAlgorith::Adapt::Removing aged edge " + to_string(nearest[0]->nr) + " - " + to_string((*edg)->nr)); - - int nr = (*edg)->nr; - - //Note that this is O(E), but average number of edges is very small, so it is OK - edg = m_g.removeUDEdge(nearest[0]->nr, nr); - - if (m_g[nr].edgesCount == 0 && this->m_utility_option == None) { - - DBG(m_logger, 8, "GNGAlgorith:: remove node because no edges"); - -#ifdef DEBUG_GMUM_2 - FOREACH(edg, m_g[nr]) - { - cerr<<"WARNING: GNGAlgorithm:: edges count of neighbours of erased node, shouldn't happen! " + to_string(m_g[(*edg)->nr].edgesCount)<remove(m_g[nr].position); - - DBG(m_logger, 8, "GNGAlgorithm::Adapt() Erasing node " + to_string(nr)); - DBG(m_logger, 8, "GNGAlgorithm::Adapt() First coordinate " + to_string(m_g[nr].position[0])); - - m_g.deleteNode(nr); - } - if (m_g[nearest[0]->nr].edgesCount == 0 - && this->m_utility_option == None) { - - DBG(m_logger, 49, "WARNING: GNGAlgorithm::Adapt() remove node because no edges, shouldn't happen"); //Shouldn't happen - - if (m_toggle_uniformgrid) - ug->remove(m_g[nearest[0]->nr].position); - m_g.deleteNode(nearest[0]->nr); - break; - } - if (edg != nearest[0]->end()) - --edg; - else - break;DBG(m_logger, 3, "GNGAlgorith::Adapt::Removal completed"); - } - ++edg; - } - //erase nodes - if (this->m_utility_option == BasicUtility) - this->utilityCriterionCheck(); - - return error; -} - -double GNGAlgorithm::calculateAccumulatedError() { - - int maximum_index = m_g.get_maximum_index(); - m_accumulated_error = 0.0; - - if (this->m_toggle_lazyheap) { - - m_g.lock(); - int maximum_index = m_g.get_maximum_index(); - m_accumulated_error = 0.0; - - REP(i, maximum_index + 1) - if (m_g.existsNode(i)) - m_accumulated_error += m_g[i].error; - - m_g.unlock(); - return m_accumulated_error; - } else { - m_g.lock(); - m_accumulated_error = 0.0; - REP(i, maximum_index + 1) - if (m_g.existsNode(i)) - m_accumulated_error += m_g[i].error; - - m_g.unlock(); - return m_accumulated_error; - } -} - -void GNGAlgorithm::testAgeCorrectness() { - - int maximum_index = m_g.get_maximum_index(); - - REP(i, maximum_index + 1) - if (m_g.existsNode(i) && m_g[i].edgesCount) - FOREACH(edg, m_g[i]) - if ((*edg)->age > m_max_age) { - //cout << "XXXXXXXXXXXXXXXXX\n"; - //cout << (m_g[i]) << endl; - } - -} - -void GNGAlgorithm::resizeUniformGrid() { - - DBG_2(m_logger, 6, "GNGAlgorithm::Resize Uniform Grid");DBG(m_logger, 6, "GNGAlgorithm::Resize Uniform Grid old_l=" + to_string(ug->getCellLength()));DBG(m_logger, 6, "GNGAlgorithm::Resize Uniform Grid new_l=" + to_string(ug->getCellLength() / m_grow_rate)); - - ug->new_l(ug->getCellLength() / m_grow_rate); - - int maximum_index = m_g.get_maximum_index(); - - REP(i, maximum_index + 1) - if (m_g.existsNode(i)) - ug->insert(m_g[i].position, m_g[i].nr); - -} - -GNGNode ** GNGAlgorithm::LargestErrorNodes() { - - DBG(m_logger, 2, "LargestErrorNodes::started procedure"); - - GNGNode ** largest = new GNGNode*[2]; - - largest[0] = 0; - largest[1] = 0; - double error = -1.0; - - REP(i, m_g.get_maximum_index() + 1) - if (m_g.existsNode(i)) - error = std::max(error, m_g[i].error); - - DBG(m_logger, 2, "LargestErrorNodes::found maximum error"); - - REP(i, m_g.get_maximum_index() + 1) - if (m_g.existsNode(i)) - if (m_g[i].error == error) - largest[0] = &m_g[i]; - - - DBG(m_logger, 2, "LargestErrorNodes::largest picked"); - - if (largest[0]->edgesCount == 0) { //{largest[0]->error=0; return largest;} //error? - m_g.deleteNode(largest[0]->nr); - return largest; - } - - int j = 0; - - FOREACH(edg, *largest[0]) { - ++j; - - if (j == 1) { - largest[1] = &m_g[(*edg)->nr]; - error = largest[1]->error; - continue; - } - - double new_error = m_g[(*edg)->nr].error; - - if (error < new_error) { - error = new_error; - largest[1] = &m_g[(*edg)->nr]; - } - } - - return largest; - -} - -GNGNode ** GNGAlgorithm::TwoNearestNodes(const double * position) { //to the example - - DBG(m_logger, 1, "GNGAlgorithm::just called TwoNearestNodes"); - //no unique_ptr in this C++, nearest is returned! - DBG(m_logger, 1, "GNGAlgorithm::just called TwoNearestNodes"); - - DBG(m_logger, 1, "GNGAlgorithm::just called TwoNearestNodes"); - - GNGNode** nearest = new GNGNode*[2]; - nearest[0] = 0; - nearest[1] = 0; - - DBG(m_logger, 1, "GNGAlgorithm::just called TwoNearestNodes"); - int start_index = 0; - DBG(m_logger, 1, "GNGAlgorithm::just called TwoNearestNodes");DBG(m_logger, 1, "GNGAlgorithm::starting TwoNearestNodes"); - while (!m_g.existsNode(start_index)) - ++start_index; - - DBG(m_logger, 1, "GNGAlgorithm::just called TwoNearestNodes"); - double dist = m_g.get_dist(position, m_g[start_index].position); - nearest[0] = &m_g[start_index]; - - DBG(m_logger, 1, "GNGAlgorithm::starting search of nearest node from " + to_string(start_index)); - - for (int i = start_index + 1; i <= m_g.get_maximum_index(); ++i) { //another idea for storing list of actual nodes? - - if (m_g.existsNode(i)) { - DBG(m_logger, 1, "calculating new_dist\n"); - - double new_dist = m_g.get_dist(position, m_g[i].position); - - if (dist > new_dist) { - dist = new_dist; - nearest[0] = &m_g[i]; - } - } - } - - DBG(m_logger, 1, "finding next\n"); - - start_index = 0; - - while (!m_g.existsNode(start_index) || start_index == nearest[0]->nr) - ++start_index; - dist = m_g.get_dist(position, m_g[start_index].position); - nearest[1] = &m_g[start_index]; - - for (int i = start_index + 1; i <= m_g.get_maximum_index(); ++i) { //another idea for storing list of actual nodes? - if (m_g.existsNode(i) && i != nearest[0]->nr) { - double new_dist = m_g.get_dist(position, m_g[i].position); - - if (dist > new_dist) { - dist = new_dist; - nearest[1] = &m_g[i]; - } - } - } - DBG(m_logger, 1, "search successful");DBG(m_logger, 1, "search successful and nearest[1]= " + to_string(nearest[1])); - - return nearest; -} - -void GNGAlgorithm::runAlgorithm() { //1 thread needed to do it (the one that computes) - this->running = true; - int size = g_db->size(); - - DBG_2(m_logger, 3, "GNGAlgorithm::runAlgorithm()"); - - //Initialize global counters - s = 0; - c = 0; // cycle variable for lazyheap optimization - - DBG_2(m_logger, 3, "GNGAlgorithm::check size of the db " + to_string(size)); - - while (g_db->size() < 2) { - while (this->m_gng_status != GNG_RUNNING) { - DBG(m_logger, 1, "GNGAlgorithm::status in database loop = "+to_string(this->m_gng_status)); - if (this->m_gng_status == GNG_TERMINATED) - return; - this->status_change_condition.wait(this->status_change_mutex); - } - } - - if (m_g.get_number_nodes() == 0) { - gmum::scoped_lock db_lock(*g_db); - gmum::scoped_lock graph_lock(m_g); - randomInit(); - } else if (m_g.get_number_nodes() == 1) { - cerr << "Incorrect passed graph to GNGAlgorithm. Aborting\n"; - throw BasicException("Incorrect passed graph to GNGAlgorithm"); - } - - - //We have to calculate error so we will collect error from adapt - //and when count is > dataset size we will set m_mean_error - double accumulated_error=0.0; - int accumulated_error_count=0, accumulated_error_count_last = 0; - - DBG(m_logger, 3, "GNGAlgorithm::init successful, starting the loop"); - DBG_2(m_logger, 1, "GNGAlgorithm::gng_status="+to_string(this->m_gng_status)); - while (this->m_gng_status != GNG_TERMINATED) { - - - while (this->m_gng_status != GNG_RUNNING) { - DBG(m_logger, 1, "GNGAlgorithm::status in main loop = "+to_string(this->m_gng_status)); - if (this->m_gng_status == GNG_TERMINATED) - break; - this->status_change_condition.wait(this->status_change_mutex); - } - - - for (s = 0; s < m_lambda; ++s) { //global counter!! - - const double * position, *vertex_data; - { - //Fined grained locks are necessary to prevent deadlocks - gmum::scoped_lock db_lock(*g_db); - unsigned int ex = g_db->drawExample(); - position = g_db->getPosition(ex); - vertex_data = g_db->getExtraData(ex); - DBG(m_logger, 0, "GNGAlgorithm::draw example"); - } - - gmum::scoped_lock graph_lock(m_g); - accumulated_error += adapt(position, vertex_data); - accumulated_error_count += 1; - } - - -#ifdef GMUM_DEBUG_2 - for (int i = 0; i <= m_g.get_maximum_index(); ++i) { //another idea for storing list of actual nodes? - if (m_g.existsNode(i) && m_g[i].edgesCount == 0 && m_utility_option == None) { - cerr<<"Error at " + to_string(i))< 10000 || - accumulated_error_count > g_db->size() - ){ - gmum::scoped_lock stat_lock(m_statistics_mutex); - - m_mean_error.push_back(accumulated_error/(float)accumulated_error_count); - - accumulated_error_count_last = accumulated_error_count; - - if(accumulated_error_count > g_db->size()){ - accumulated_error = 0.0; - accumulated_error_count = 0; - } - } - - DBG_2(m_logger, 4, "GNGAlgorithm::add new node"); - - { - gmum::scoped_lock graph_lock(m_g); - addNewNode(); - - - if (m_toggle_uniformgrid && ug->check_grow()) { - DBG_2(m_logger, 10, "GNGAlgorithm:: resizing uniform grid"); - resizeUniformGrid(); - } - - ++c; //epoch - if (!m_toggle_lazyheap) - decreaseAllErrors(); - if (this->m_utility_option == BasicUtility) - decreaseAllUtility(); - } - ++m_iteration; - - DBG_2(m_logger, 9, "GNGAlgorithm::iteration "+to_string(m_iteration)); - } - DBG(m_logger, 30, "GNGAlgorithm::Terminated server"); - this->running = false; -} - -} diff --git a/src/gng/GNGGraph.cpp b/src/gng/GNGGraph.cpp deleted file mode 100644 index 4b33323..0000000 --- a/src/gng/GNGGraph.cpp +++ /dev/null @@ -1,190 +0,0 @@ -#include "GNGGraph.h" - -#include -#include - -using namespace std; - -namespace gmum { - -void writeToGraphML(GNGGraph &g, std::ostream & out) { - - g.lock(); - - out << "\n"; - out << "\n"; - out << "\n"; - out << "\n"; - out << "\n"; - out << "\n"; - out << "\n"; - out << "\n"; - out << "\n"; - out << "\n"; - out << "\n"; - - std::map gng_index_to_graph_index; - - unsigned int k = 0; - for (int i = 0; i <= g.get_maximum_index(); ++i) { - - if (g.existsNode(i)) { - gng_index_to_graph_index[g[i].nr] = k; //TODO:To be honest I dnt remember purpose of g[i].nr.. - - out << "\n"; - out << "" << g[i].error << "\n"; - out << "" << g[i].extra_data << "\n"; - out << "" << g[i].nr << "\n"; - out << "" << g[i].utility << "\n"; - out << "" << g[i].position[0] << "\n"; - out << "" << g[i].position[1] << "\n"; - out << "" << g[i].position[2] << "\n"; - out << "\n"; - } - } - - unsigned int l = 0; - - for (unsigned int i = 0; i <= g.get_maximum_index(); ++i) - if (g.existsNode(i)) { - FOREACH(edg, g[i]) - { - if (g[i].nr > (*edg)->nr) { //directed! - out << "nr] - << "\" target=\"n" - << gng_index_to_graph_index[g[i].nr] << "\">\n"; - out << "" << g.get_dist(i, (*edg)->nr) << ""; - out << "\n"; - } - } - - } - - out << "\n\n"; - g.unlock(); - -} - -std::string writeToGraphML(GNGGraph &g, string filename) { - - if (filename == "") { - std::stringstream ss; - writeToGraphML(g, ss); - return ss.str(); - } else { - ofstream myfile(filename.c_str()); - writeToGraphML(g, myfile); - myfile.close(); - return ""; - } - -} -// -// std::string writeToGraphMLBoost(GNGGraph &g, string filename){ -// -// DBG(4, "writeToGraphML::locking"); -// g.lock(); -// -// DBG(4, "writeToGraphML::locked"); -// -// typedef adjacency_list Graph; -// -// -// DBG(4, "writeToGraphML::boost_g defining"); -// -// Graph boost_g(0); -// -// DBG(4, "writeToGraphML::defined boost_g"); -// -// boost::graph_traits::vertex_descriptor u, v; -// typedef boost::property VID_prop; -// -// map::vertex_descriptor> map_desc; -// -// DBG(4, "writeToGraphML::adding verticeS"); -// //Add vertexes -// for(int i=0;i<=g.getMaximumIndex();++i) -// if(g.existsNode(i)) -// { -// DBG(1, gmum::to_string(i)); -// map_desc[g[i].nr] = add_vertex(boost_g); -// boost_g[map_desc[g[i].nr]].index = g[i].nr; -// boost_g[map_desc[g[i].nr]].error = g[i].error; -// boost_g[map_desc[g[i].nr]].extra_data = g[i].extra_data; -// boost_g[map_desc[g[i].nr]].utility = g[i].utility; -// -//// std::string dump = ""; -//// //TODO: string builder -//// for(int j=0;j= 3){ -// boost_g[map_desc[g[i].nr]].v0 = g[i].position[0]; -// boost_g[map_desc[g[i].nr]].v1 = g[i].position[1]; -// boost_g[map_desc[g[i].nr]].v2 = g[i].position[2]; -// } -// } -// -// boost::property_map< Graph, double boost_edge_desc::* >::type edge_dist( boost::get(&boost_edge_desc::dist, boost_g) ); -// DBG(4, "writeToGraphML::adding edges"); -// //Add edges -// for (unsigned int i = 0; i <= g.getMaximumIndex(); ++i) -// if(g.existsNode(i)) -// { -// typedef boost::graph_traits::edge_descriptor desc; -// desc e; -// bool b; -// FOREACH(edg, g[i]){ -// if(g[i].nr > (*edg)->nr){ //directed! -// add_edge(map_desc[g[i].nr], map_desc[(*edg)->nr], boost_g); -// boost::tie(e, b) = edge(map_desc[g[i].nr], map_desc[(*edg)->nr], boost_g); -// edge_dist[e] = g.getDist(i, (*edg)->nr); -// } -// } -// -// } -// -// -// -// //Construct properties getters -// dynamic_properties p; -// p.property("gng_index", boost::get(&boost_vertex_desc::index, boost_g)); -// p.property("v0", boost::get(&boost_vertex_desc::v0, boost_g)); -// p.property("v1", boost::get(&boost_vertex_desc::v1, boost_g)); -// p.property("v2", boost::get(&boost_vertex_desc::v2, boost_g)); -// p.property("error", boost::get(&boost_vertex_desc::error, boost_g)); -// p.property("extra_data", boost::get(&boost_vertex_desc::extra_data, boost_g)); -// p.property("dist", boost::get(&boost_edge_desc::dist, boost_g)); -// p.property("utility", boost::get(&boost_vertex_desc::utility, boost_g)); -// -// -// g.unlock(); -// -// if(filename==""){ -// -// std::stringstream ss; -// write_graphml(ss, boost_g, p, true); -// -// return ss.str(); -// } -// else{ -// DBG(4, "writeToGraphML::writing to file"); -// ofstream myfile; -// myfile.open (filename.c_str()); -// std::stringstream ss; -// write_graphml(ss, boost_g, p, true) ; -// -// myfile << ss.str(); -// myfile.close(); -// -// return ""; -// } -// -// } -} - diff --git a/src/gng/GNGServer.cpp b/src/gng/GNGServer.cpp deleted file mode 100644 index 8a43823..0000000 --- a/src/gng/GNGServer.cpp +++ /dev/null @@ -1,113 +0,0 @@ -#include "GNGServer.h" - -GNGServer::GNGServer(GNGConfiguration configuration, std::istream * input_graph){ - init(configuration, input_graph); -} - -void GNGServer::init(GNGConfiguration configuration, std::istream * input_graph){ - - algorithm_thread = 0; - m_current_dataset_memory_was_set = false; - m_running_thread_created = false; - - m_logger = boost::shared_ptr(new Logger(configuration.verbosity)); - - DBG(m_logger,10, "GNGServer()::constructing GNGServer"); - - if (!configuration.check_correctness()) - throw BasicException("Invalid configuration passed to GNGServer"); - - this->current_configuration = configuration; //assign configuration - - if (current_configuration.graph_storage == GNGConfiguration::RAMMemory) { - //Nothing to do here - } else { - throw BasicException("Not supported GNGConfiguration type"); - } - - /** Construct database **/ - if (current_configuration.datasetType - == GNGConfiguration::DatasetSampling) { - DBG(m_logger,11, "GNGServer::Constructing Normal Sampling Prob Dataset"); - this->gngDataset = std::auto_ptr( - new GNGDatasetSimple(&database_mutex, - current_configuration.dim, - true /* store_extra */, GNGDatasetSimple::Sampling, - m_logger)); - } else if (current_configuration.datasetType - == GNGConfiguration::DatasetSamplingProb) { - //Add probability to layout - DBG(m_logger,11, "GNGServer::Constructing Sampling Prob Dataset"); - this->gngDataset = std::auto_ptr( - new GNGDatasetSimple(&database_mutex, - current_configuration.dim, - true /* store_extra */, GNGDatasetSimple::SamplingProbability, - m_logger)); - } else if (current_configuration.datasetType - == GNGConfiguration::DatasetSeq) { - DBG(m_logger,11, "GNGServer::Constructing Normal Seq Dataset"); - this->gngDataset = std::auto_ptr( - new GNGDatasetSimple(&database_mutex, - current_configuration.dim, - true /* store_extra */, GNGDatasetSimple::Sequential, - m_logger)); - } else { - cerr << "Passed dataset type " << current_configuration.datasetType - << endl; - cerr << GNGConfiguration::DatasetSampling << endl; - cerr << GNGConfiguration::DatasetSamplingProb << endl; - DBG(m_logger,11, "GNGServer::Not recognized dataset"); - throw BasicException( - "Database type not supported " - + to_string(current_configuration.datasetType)); - } - - DBG(m_logger,10, "GNGServer()::gngDatabase constructed"); - - /** Construct graph **/ - if (current_configuration.graph_storage == GNGConfiguration::SharedMemory) { - throw BasicException("Not supported SharedMemory configuration"); - } else if (current_configuration.graph_storage - == GNGConfiguration::RAMMemory) { - REPORT(current_configuration.starting_nodes); - - this->gngGraph = - std::auto_ptr >( - new RAMGNGGraph(&grow_mutex, - current_configuration.dim, - current_configuration.starting_nodes, - (gmum::GNGGraph::GNGDistanceFunction) current_configuration.distance_function, - m_logger)); - } else { - throw BasicException("Not supported GNGConfiguration type"); - } - - - if(input_graph){ - this->gngGraph->load(*input_graph); - } - - DBG(m_logger,10, "GNGServer()::constructing algorithm object"); - - /** Initiliaze main computing object **/ - this->gngAlgorithm = std::auto_ptr( - new GNGAlgorithm( - this->gngGraph.get(), //I do not want algorithm to depend on boost - this->gngDataset.get(), ¤t_configuration.orig[0], - ¤t_configuration.axis[0], - current_configuration.axis[0] * 1.1, //only 2^dim //TODO: min - current_configuration.max_nodes, - current_configuration.max_age, current_configuration.alpha, - current_configuration.beta, current_configuration.lambda, - current_configuration.eps_w, current_configuration.eps_n, - current_configuration.dim, - current_configuration.uniformgrid_optimization, - current_configuration.lazyheap_optimization, - current_configuration.experimental_utility_option, - current_configuration.experimental_utility_k, m_logger)); - - DBG(m_logger,10, "GNGServer()::constructed algorithm object"); - -} - - diff --git a/src/gng/gng_algorithm.cpp b/src/gng/gng_algorithm.cpp new file mode 100644 index 0000000..f72efd7 --- /dev/null +++ b/src/gng/gng_algorithm.cpp @@ -0,0 +1,1010 @@ +/* + * File: GNGAlgorithm.cpp + * Author: staszek "kudkudak" jastrzebski gmail.com> + * + * Created on 11 sierpień 2012, 10:02 + */ + +//TODO: refactor getExample +#include +#include +#include +#include +#include + +using namespace boost; +using namespace gmum; +using namespace std; + +namespace gmum { + +GNGNode ** GNGAlgorithm::LargestErrorNodesLazy() { + GNGNode ** largest = new GNGNode*[2]; + GNGNode * gng_node; + + FOREACH(int it, errorHeap.getLazyList()) + { + gng_node = &m_g[it]; + errorHeap.insert(gng_node->nr, gng_node->error); + } + + errorHeap.getLazyList().clear(); + + ErrorNode max; + //Extract max until you get correct one (that is not lazy) + do { + max = errorHeap.extractMax(); + + DBG_2(m_logger, 4, "GNGAlgorithm::LargestErrorLazy::found max " + to_string(max.i)); + + GNGNode * gng_node = &m_g[max.i]; + if (gng_node->error_cycle != c) { + fixErrorNew(gng_node); + errorHeap.update(gng_node->nr, gng_node->error); + } else { + largest[0] = gng_node; + int j = 0; + double error = 0.0; + DBG_2(m_logger, 4, "GNGAlgorithm::LargestErrorLazy::found max " + to_string(max.i)); + + BOOST_FOREACH(GNGEdge * edg, *largest[0]) + { + ++j; + fixErrorNew(&m_g[(edg)->nr]); + + if (j == 1) { + largest[1] = &m_g[(edg)->nr]; + error = largest[1]->error; + ; + continue; + } + + double new_error = m_g[(edg)->nr].error; + + if (error < new_error) { + error = new_error; + largest[1] = &m_g[(edg)->nr]; + } + } + break; + } + + } while (1); DBG_2(m_logger, 3, "GNGAlgorithm::LargestErrorLazy::returning"); + return largest; + +} + +GNGGraph* GNGGraphAccessHack::pool = 0; + +GNGAlgorithm::GNGAlgorithm(GNGGraph * g, GNGDataset* db, + double * boundingbox_origin, double * boundingbox_axis, double l, + int max_nodes, int max_age, double alpha, double betha, double lambda, + double eps_w, double eps_n, int dim, bool uniformgrid_optimization, + bool lazyheap_optimization, unsigned int utility_option, + double utility_k, boost::shared_ptr logger) : + m_g(*g), g_db(db), c(0), s(0), m_max_nodes(max_nodes), m_max_age( + max_age), m_alpha(alpha), m_betha(betha), m_lambda(lambda), m_eps_w( + eps_w), m_eps_n(eps_n), m_density_threshold(0.1), m_grow_rate( + 1.5), errorHeap(), dim(dim), m_toggle_uniformgrid( + uniformgrid_optimization), m_toggle_lazyheap( + lazyheap_optimization), running(false), m_utility_option( + utility_option), m_mean_error(1000), m_utility_k(utility_k), m_logger( + logger), m_iteration(0) { + + DBG(m_logger, 1, "GNGAlgorithm:: Constructing object"); + DBG(m_logger, 10, + "GNGAlgorithm::Constructed object with utility " + + to_string(utility_option) + " " + to_string(utility_k)); + + if (m_toggle_uniformgrid) { + ug = new UniformGrid, Node, int>(boundingbox_origin, + boundingbox_axis, l, dim, m_grow_rate, m_density_threshold, 0.4, + m_logger); + + GNGGraphAccessHack::pool = &m_g; + + ug->setDistFunction(GNGGraphAccessHack::dist); + + // Restore uniform grid state + int maximum_index = m_g.get_maximum_index(); + REP(i, maximum_index + 1) + if (m_g.existsNode(i)) + ug->insert(m_g[i].position, m_g[i].nr); + + } + + if (m_toggle_lazyheap) { + int maximum_index = m_g.get_maximum_index(); + REP(i, maximum_index + 1) + if (m_g.existsNode(i)) + setErrorNew(&m_g[i], m_g[i].error); + } + + m_betha_powers_size = m_lambda * 10; + m_betha_powers = new double[m_betha_powers_size]; + + REP(i, m_betha_powers_size) + m_betha_powers[i] = std::pow(m_betha, (double) (i)); + + m_betha_powers_to_n_length = m_max_nodes * 2; + m_betha_powers_to_n = new double[m_max_nodes * 2]; + + REP(i, m_max_nodes * 2) + m_betha_powers_to_n[i] = std::pow(m_betha, m_lambda * (double) (i)); + DBG(m_logger, 1, "GNGAlgorithm:: Constructed object"); +} + +void GNGAlgorithm::randomInit() { + + DBG(m_logger, 3, "randomInit::Drawing examples"); + + int ex1 = g_db->drawExample(); + int ex2 = g_db->drawExample(); + + DBG(m_logger, 3, "randomInit::Drawn 2"); + int index = 0; + while (ex2 == ex1 && index < 100) { + ++index; + ex2 = g_db->drawExample(); + } + DBG(m_logger, 3, "randomInit::database_size = " + to_string(g_db->size())); + DBG(m_logger, 3, + "randomInit::drawn " + to_string(ex1) + " " + to_string(ex2)); + + const double * ex1_ptr = g_db->getPosition(ex1); + const double * ex1_extra_ptr = g_db->getExtraData(ex1); + const double * ex2_ptr = g_db->getPosition(ex2); + const double * ex2_extra_ptr = g_db->getExtraData(ex2); + + m_g.newNode(ex1_ptr); + m_g.newNode(ex2_ptr); + + if (ex1_extra_ptr) + m_g[0].extra_data = ex1_extra_ptr[0]; + if (ex2_extra_ptr) + m_g[1].extra_data = ex2_extra_ptr[0]; + + DBG(m_logger, 3, + "randomInit::created nodes graph size=" + + to_string(m_g.get_number_nodes())); + +#ifdef GMUM_DEBUG_2 + assert(m_g.get_number_nodes()==2); +#endif + + if (m_toggle_uniformgrid) { + ug->insert(m_g[0].position, 0); + ug->insert(m_g[1].position, 1); + } + + if (m_toggle_lazyheap) { + setErrorNew(&m_g[0], 0.0); + setErrorNew(&m_g[1], 0.0); + } + if (this->m_utility_option == BasicUtility) { + setUtility(0, 0.001); + setUtility(1, 0.001); + } +} + +void GNGAlgorithm::addNewNode() { + using namespace std; + + DBG(m_logger, 4, "GNGAlgorith::AddNewNode::start search"); + + if (m_toggle_lazyheap) + DBG(m_logger, 4, + "GNGAlgorithm::AddNewNode:: " + to_string(m_toggle_lazyheap) + + " : )= toggle_lazyheap"); + + GNGNode ** error_nodes_new; + + if (m_toggle_lazyheap) + error_nodes_new = LargestErrorNodesLazy(); + else + error_nodes_new = LargestErrorNodes(); + + DBG_2(m_logger, 4, "GNGAlgorith::AddNewNode::search completed"); + + if (!error_nodes_new[0] || !error_nodes_new[1]) + return; + + DBG_2(m_logger, 4, "GNGAlgorith::AddNewNode::search completed and successful"); + + if (m_max_nodes <= m_g.get_number_nodes()) { + DBG(m_logger, 4, + "GNGAlgorith::AddNewNode:: achieved maximum number of nodes"); + delete[] error_nodes_new; + return; + } + + double position[this->dim]; //param + + //TODO: < GNG_DIM? + for (int i = 0; i < this->dim; ++i) //param + position[i] = (error_nodes_new[0]->position[i] + + error_nodes_new[1]->position[i]) / 2; + + //In case pool has been reallocated + int er_nr1 = error_nodes_new[0]->nr, er_nr2 = error_nodes_new[1]->nr; + int new_node_index = m_g.newNode(&position[0]); + error_nodes_new[0] = &m_g[er_nr1]; + error_nodes_new[1] = &m_g[er_nr2]; + + //Vote for extra data + m_g[new_node_index].extra_data = (error_nodes_new[0]->extra_data + + error_nodes_new[1]->extra_data) / 2.0; + + if (m_toggle_uniformgrid) + ug->insert(m_g[new_node_index].position, new_node_index); + + DBG_2(m_logger, 4, "GNGAlgorith::AddNewNode::added " + to_string(m_g[new_node_index])); + + m_g.removeUDEdge(error_nodes_new[0]->nr, error_nodes_new[1]->nr); + + DBG_2(m_logger, 3, "GNGAlgorith::AddNewNode::removed edge beetwen " + + to_string(error_nodes_new[0]->nr) + " and" + to_string(error_nodes_new[1]->nr)); + DBG(m_logger, 2, + "GNGAlgorithm::AddNewNode::largest error node after removing edge : " + + to_string(*error_nodes_new[0])); + + m_g.addUDEdge(error_nodes_new[0]->nr, new_node_index); + + m_g.addUDEdge(new_node_index, error_nodes_new[1]->nr); + + DBG_2(m_logger, 3, "GNGAlgorith::AddNewNode::add edge beetwen " + + to_string(error_nodes_new[0]->nr) + " and" + to_string(new_node_index)); + + if (!m_toggle_lazyheap) { + decreaseError(error_nodes_new[0]); + decreaseError(error_nodes_new[1]); + setError(&m_g[new_node_index], + (error_nodes_new[0]->error + error_nodes_new[1]->error) / 2); + } else { + decreaseErrorNew(error_nodes_new[0]); + decreaseErrorNew(error_nodes_new[1]); + setErrorNew(&m_g[new_node_index], + (error_nodes_new[0]->error + error_nodes_new[1]->error) / 2); + } + + if (this->m_utility_option == BasicUtility) + this->setUtility(new_node_index, + 0.5 + * (getUtility(error_nodes_new[0]->nr) + + getUtility(error_nodes_new[1]->nr))); + + delete[] error_nodes_new; + DBG_2(m_logger, 3, "GNGAlgorith::AddNewNode::delete done"); +} + +int GNGAlgorithm::predict(const std::vector & ex) { + + if (m_g.get_number_nodes() == 0) + return -1; //No node + + if (ex.size() != g_db->getGNGDim()) + throw BasicException("Wrong example dimensionality"); + + return _getNearestNeurons(&ex[0]).first; +} + +std::pair GNGAlgorithm::adapt(const double * ex, + const double * extra) { + DBG_2(m_logger, 4, "GNGAlgorith::Adapt::commence search"); + + std::pair nearest = _getNearestNeurons(ex); + GNGNode * nearest_0 = &m_g[nearest.first], * nearest_1 = &m_g[nearest.second]; + + + DBG_2(m_logger, 4, "GNGAlgorith::Adapt::found nearest nodes to the drawn example " + to_string(*nearest_0) + " " + to_string(*nearest_1)); + + double error = m_g.get_dist(nearest_0->position, ex); + + if (this->m_utility_option == BasicUtility) { + + DBG(m_logger, 4, "GNGAlgorithm::Adapt::setting utility"); + + double error_2 = m_g.get_dist(nearest_1->position, ex); + + this->setUtility(nearest_0->nr, + this->getUtility(nearest_0->nr) + error_2 - error); + } + + DBG_2(m_logger, 3, "GNGAlgorith::Adapt::increasing error"); + + if (!m_toggle_lazyheap) + increaseError(nearest_0, error); + else + increaseErrorNew(nearest_0, error); + + DBG_2(m_logger, 3, "GNGAlgorith::Adapt::accounted for the error"); + + if (m_toggle_uniformgrid) + ug->remove(nearest_0->position); + for (int i = 0; i < this->dim; ++i) + nearest_0->position[i] += m_eps_w * (ex[i] - nearest_0->position[i]); + + //Adapt to extra dimensionality if present (TODO: refactor) + if (extra) + nearest_0->extra_data = (nearest_0->extra_data + extra[0]) / 2.0; + + if (m_toggle_uniformgrid) + ug->insert(nearest_0->position, nearest_0->nr); + + if (nearest_0->edgesCount) { + FOREACH(GNGEdge * edg, *nearest_0) + { + if (m_toggle_uniformgrid) + ug->remove(m_g[(edg)->nr].position); + + for (int i = 0; i < this->dim; ++i) { //param accounting + m_g[(edg)->nr].position[i] += m_eps_n + * (ex[i] - m_g[(edg)->nr].position[i]); + } + + //Adapt to extra dimensionality if present (TODO: refactor) + if (extra) { + m_g[(edg)->nr].extra_data = (0.9 * m_g[(edg)->nr].extra_data + + extra[0] * 0.1); + } + + if (m_toggle_uniformgrid) + ug->insert(m_g[(edg)->nr].position, (edg)->nr); + } + } + + DBG(m_logger, 4, + "GNGAlgorith::Adapt::position of the winner and neighbour mutated"); + + if (!m_g.isEdge(nearest_0->nr, nearest_1->nr)) { + m_g.addUDEdge(nearest_0->nr, nearest_1->nr); + DBG(m_logger, 4, + "GNGAlgorith::Adapt::added edge beetwen " + + to_string(nearest_0->nr) + " and " + + to_string(nearest_1->nr)); + } + + bool BYPASS = false; + + DBG_2(m_logger, 4, "GNGAlgorith::Adapt::commence scan of edges"); + + //TODO: assuming here GNGNode not any arbitrary node :/ + GNGNode::EdgeIterator edg = nearest_0->begin(); + while (edg != nearest_0->end()) { + DBG(m_logger, 2, "Currently on edge to" + to_string((edg)->nr)); + + (*edg)->age++; + (((*edg)->rev))->age++; + + if ((*edg)->nr == nearest_1->nr) { + (*edg)->age = 0; + (((*edg)->rev))->age = 0; + } + + if ((*edg)->age > m_max_age) { + + DBG(m_logger, 3, + "GNGAlgorith::Adapt::Removing aged edge " + + to_string(nearest_0->nr) + " - " + + to_string((edg)->nr)); + + int nr = (*edg)->nr; + + //Note that this is O(E), but average number of edges is very small, so it is OK + edg = m_g.removeUDEdge(nearest_0->nr, nr); + + if (m_g[nr].edgesCount == 0 && this->m_utility_option == None) { + + DBG(m_logger, 8, "GNGAlgorith:: remove node because no edges"); + +#ifdef DEBUG_GMUM_2 + FOREACH(GNGEdge* edg2, m_g[nr]) + { + cerr<<"WARNING: GNGAlgorithm:: edges count of neighbours of erased node, shouldn't happen! " + to_string(m_g[(edg2)->nr].edgesCount)<remove(m_g[nr].position); + + DBG(m_logger, 8, + "GNGAlgorithm::Adapt() Erasing node " + + to_string(nr)); + DBG(m_logger, 8, + "GNGAlgorithm::Adapt() First coordinate " + + to_string(m_g[nr].position[0])); + + m_g.deleteNode(nr); + } + if (m_g[nearest_0->nr].edgesCount == 0 + && this->m_utility_option == None) { + + DBG(m_logger, 49, + "WARNING: GNGAlgorithm::Adapt() remove node because no edges, shouldn't happen"); //Shouldn't happen + + if (m_toggle_uniformgrid) + ug->remove(m_g[nearest_0->nr].position); + m_g.deleteNode(nearest_0->nr); + break; + } + if (edg != nearest_0->end()) + --edg; + else + break; + DBG(m_logger, 3, "GNGAlgorith::Adapt::Removal completed"); + } + ++edg; + } + + //erase nodes + if (this->m_utility_option == BasicUtility) + this->utilityCriterionCheck(); + + return std::pair(error, nearest.first); +} + +double GNGAlgorithm::calculateAccumulatedError() { + + int maximum_index = m_g.get_maximum_index(); + m_accumulated_error = 0.0; + + if (this->m_toggle_lazyheap) { + + m_g.lock(); + int maximum_index = m_g.get_maximum_index(); + m_accumulated_error = 0.0; + + REP(i, maximum_index + 1) + if (m_g.existsNode(i)) + m_accumulated_error += m_g[i].error; + + m_g.unlock(); + return m_accumulated_error; + } else { + m_g.lock(); + m_accumulated_error = 0.0; + REP(i, maximum_index + 1) + if (m_g.existsNode(i)) + m_accumulated_error += m_g[i].error; + + m_g.unlock(); + return m_accumulated_error; + } +} + +void GNGAlgorithm::testAgeCorrectness() { + int maximum_index = m_g.get_maximum_index(); + REP(i, maximum_index + 1) + if (m_g.existsNode(i) && m_g[i].edgesCount) + BOOST_FOREACH(GNGEdge* edg, m_g[i]) + if ((edg)->age > m_max_age) { + //cout << "XXXXXXXXXXXXXXXXX\n"; + //cout << (m_g[i]) << endl; + } +} + +void GNGAlgorithm::resizeUniformGrid() { + + DBG_2(m_logger, 6, "GNGAlgorithm::Resize Uniform Grid"); + DBG(m_logger, 6, + "GNGAlgorithm::Resize Uniform Grid old_l=" + + to_string(ug->getCellLength())); + DBG(m_logger, 6, + "GNGAlgorithm::Resize Uniform Grid new_l=" + + to_string(ug->getCellLength() / m_grow_rate)); + + ug->new_l(ug->getCellLength() / m_grow_rate); + + int maximum_index = m_g.get_maximum_index(); + + REP(i, maximum_index + 1) + if (m_g.existsNode(i)) + ug->insert(m_g[i].position, m_g[i].nr); + +} + +GNGNode ** GNGAlgorithm::LargestErrorNodes() { + DBG(m_logger, 2, "LargestErrorNodes::started procedure"); + + GNGNode ** largest = new GNGNode*[2]; + + largest[0] = 0; + largest[1] = 0; + double error = -1.0; + + REP(i, m_g.get_maximum_index() + 1) + if (m_g.existsNode(i)) + error = std::max(error, m_g[i].error); + + DBG(m_logger, 2, "LargestErrorNodes::found maximum error"); + + REP(i, m_g.get_maximum_index() + 1) + if (m_g.existsNode(i)) + if (m_g[i].error == error) + largest[0] = &m_g[i]; + + DBG(m_logger, 2, "LargestErrorNodes::largest picked"); + + if (largest[0]->edgesCount == 0) { //{largest[0]->error=0; return largest;} //error? + m_g.deleteNode(largest[0]->nr); + return largest; + } + + int j = 0; + + FOREACH(GNGEdge* edg, *largest[0]) + { + ++j; + + if (j == 1) { + largest[1] = &m_g[(edg)->nr]; + error = largest[1]->error; + continue; + } + + double new_error = m_g[(edg)->nr].error; + + if (error < new_error) { + error = new_error; + largest[1] = &m_g[(edg)->nr]; + } + } + + return largest; +} + +void GNGAlgorithm::runAlgorithm() { //1 thread needed to do it (the one that computes) + this->running = true; + int size = g_db->size(); + + DBG_2(m_logger, 3, "GNGAlgorithm::runAlgorithm()"); + + //Initialize global counters + s = 0; + c = 0; // cycle variable for lazyheap optimization + + DBG_2(m_logger, 3, "GNGAlgorithm::check size of the db " + to_string(size)); + + while (g_db->size() < 2) { + while (this->m_gng_status != GNG_RUNNING) { + DBG(m_logger, 1, + "GNGAlgorithm::status in database loop = " + + to_string(this->m_gng_status)); + if (this->m_gng_status == GNG_TERMINATED) + return; + this->status_change_condition.wait(this->status_change_mutex); + } + } + + if (m_g.get_number_nodes() == 0) { + gmum::scoped_lock db_lock(*g_db); + gmum::scoped_lock graph_lock(m_g); + randomInit(); + } else if (m_g.get_number_nodes() == 1) { + cerr << "Incorrect passed graph to GNGAlgorithm. Aborting\n"; + throw BasicException("Incorrect passed graph to GNGAlgorithm"); + } + + //We have to calculate error so we will collect error from adapt + //and when count is > dataset size we will set m_mean_error + double accumulated_error = 0.0; + double time_elapsed =0., time_elapsed_last_error=0.; + int accumulated_error_count = 0, accumulated_error_count_last = 0; + + DBG(m_logger, 3, "GNGAlgorithm::init successful, starting the loop"); DBG_2(m_logger, 1, "GNGAlgorithm::gng_status="+to_string(this->m_gng_status)); + while (true) { + + while (this->m_gng_status != GNG_RUNNING) { + DBG(m_logger, 1, + "GNGAlgorithm::status in main loop = " + + to_string(this->m_gng_status)); + if (this->m_gng_status == GNG_TERMINATED) + break; + this->status_change_condition.wait(this->status_change_mutex); + } + if (this->m_gng_status == GNG_TERMINATED) + break; + + double dt =0.; + boost::posix_time::ptime start = boost::posix_time::microsec_clock::local_time(); + + for (s = 0; s < m_lambda; ++s) { //global counter!! + + const double * position, *vertex_data; + unsigned int ex = 0; + { + //Fined grained locks are necessary to prevent deadlocks + gmum::scoped_lock db_lock(*g_db); + ex = g_db->drawExample(); + position = g_db->getPosition(ex); + vertex_data = g_db->getExtraData(ex); + DBG(m_logger, 0, "GNGAlgorithm::draw example"); + } + + gmum::scoped_lock graph_lock(m_g); + std::pair adapt_result = adapt(position, vertex_data); + +#ifdef GMUM_DEBUG + assert(adapt_result.second >= 0); +#endif + + set_clustering(ex, adapt_result.second); + accumulated_error += adapt_result.first; + accumulated_error_count += 1; + } + +#ifdef GMUM_DEBUG_2 + for (int i = 0; i <= m_g.get_maximum_index(); ++i) { //another idea for storing list of actual nodes? + if (m_g.existsNode(i) && m_g[i].edgesCount == 0 && m_utility_option == None) { + cerr<<"Error at " + to_string(i))< 0.1 && accumulated_error_count > 5 * m_g.get_number_nodes()) || + accumulated_error_count > 15 * m_g.get_number_nodes()) { + gmum::scoped_lock stat_lock(m_statistics_mutex); + + m_mean_error.push_back(make_pair(time_elapsed, + accumulated_error/(double)accumulated_error_count + )); + + accumulated_error_count_last = accumulated_error_count; + time_elapsed_last_error = 0.0; + accumulated_error = 0.0; + accumulated_error_count = 0; + } + + DBG_2(m_logger, 4, "GNGAlgorithm::add new node"); + + { + gmum::scoped_lock graph_lock(m_g); + addNewNode(); + + if (m_toggle_uniformgrid && ug->check_grow()) { + DBG_2(m_logger, 10, "GNGAlgorithm:: resizing uniform grid"); + resizeUniformGrid(); + } + + ++c; //epoch + if (!m_toggle_lazyheap) + decreaseAllErrors(); + if (this->m_utility_option == BasicUtility) + decreaseAllUtility(); + } + ++m_iteration; + + DBG_2(m_logger, 9, "GNGAlgorithm::iteration "+to_string(m_iteration)); + } + DBG(m_logger, 30, "GNGAlgorithm::Terminated server"); + this->running = false; +} + + + + + + + + + + +/** Start algorithm loop */ +void GNGAlgorithm::run() { + this->m_gng_status = GNG_RUNNING; + this->status_change_condition.notify_all(); +} + +bool GNGAlgorithm::isRunning(){ + return this->m_gng_status == GNG_RUNNING; +} + +/** Pause algorithm loop */ +void GNGAlgorithm::pause() { + this->m_gng_status = GNG_PAUSED; + this->status_change_condition.notify_all(); +} + +/** Terminate the algorithm */ +void GNGAlgorithm::terminate() { + this->m_gng_status = GNG_TERMINATED; + this->status_change_condition.notify_all(); +} + +void GNGAlgorithm::setMaxNodes(int value) { + m_max_nodes = value; +} + +int GNGAlgorithm::getIteration() const{ + return m_iteration; +} + +unsigned GNGAlgorithm::getErrorIndex() const{ + return m_mean_error.size(); +} + +double GNGAlgorithm::getMeanError() { + + gmum::scoped_lock alg_lock(m_statistics_mutex); + DBG(m_logger, 3, gmum::to_string(m_mean_error.size())); + if(m_mean_error.size() == 0){ + return std::numeric_limits::max(); + }else{ + + return m_mean_error[m_mean_error.size()-1].second; + } +} + +vector > GNGAlgorithm::getMeanErrorStatistics() { + gmum::scoped_lock alg_lock(m_statistics_mutex); + if(m_mean_error.size() == 0){ + return vector >(1, make_pair(0., std::numeric_limits::max())); + }else{ + return vector >(m_mean_error.begin(), m_mean_error.end()); + } +} + +//Retrieve clustering result. +//@note pauses algorithm as many +const vector & GNGAlgorithm::get_clustering(){ + bool was_running = false; + if(isRunning()){ + was_running = true; + pause(); + while(isRunning()){ + DBG(m_logger, 2, "get_clustering waiting for pause"); + this->status_change_condition.wait(this->status_change_mutex); + } + } + vector & result = clustering_result; + if(was_running) + run(); + + return result; +} + + GNGAlgorithm::~GNGAlgorithm() { + delete[] m_betha_powers_to_n; + delete[] m_betha_powers; +} + + + +std::pair GNGAlgorithm::_getNearestNeurons(const double *ex){ + if (m_toggle_uniformgrid) { + DBG(m_logger, 1, "GNGAlgorithm::Adapt::Graph size " + to_string(m_g.get_number_nodes())); + std::vector nearest_index = ug->findNearest(ex, 2); //TwoNearestNodes(ex->position); + DBG(m_logger, 1, "GNGAlgorithm::Adapt::Found nearest"); + + #ifdef GMUM_DEBUG_2 + if (nearest_index[0] == nearest_index[1]) { + cerr<<"Adapt::Found same nearest_indexes!~! "+to_string(nearest_index[0])< m_g.get_dist(m_g[nearest_index[0]].position, ex)); + #endif + + return std::pair(nearest_index[0], nearest_index[1]); + } else { + DBG(m_logger, 1, "GNGAlgorithm::just called TwoNearestNodes"); + int start_index = 0; + while (!m_g.existsNode(start_index)) + ++start_index; + + double dist0 = m_g.get_dist(ex, m_g[start_index].position); + int best_0 = start_index, best_1 = -1; + for (int i = start_index + 1; i <= m_g.get_maximum_index(); ++i) { + if (m_g.existsNode(i)) { + double new_dist = m_g.get_dist(ex, m_g[i].position); + if (dist0 > new_dist) { + dist0 = new_dist; + best_0 = i; + } + } + } + + DBG(m_logger, 1, "finding next\n"); + + start_index = 0; + while (!m_g.existsNode(start_index) || start_index == best_0) + ++start_index; + double dist1 = m_g.get_dist(ex, m_g[start_index].position); + best_1 = start_index; + + for (int i = start_index + 1; i <= m_g.get_maximum_index(); ++i) { //another idea for storing list of actual nodes? + if (m_g.existsNode(i) && i != best_0) { + double new_dist = m_g.get_dist(ex, m_g[i].position); + if (dist1 > new_dist) { + dist1 = new_dist; + best_1 = i; + } + } + } + + + #ifdef GMUM_DEBUG_2 + assert(dist1 > dist0); + #endif + + return std::pair(best_0, best_1); + } +} + + +void GNGAlgorithm::resetUniformGrid(double * orig, double *axis, double l) { + ug->purge(orig, axis, l); + int maximum_index = m_g.get_maximum_index(); + + REP(i, maximum_index + 1) + { + if (m_g.existsNode(i)) + ug->insert(m_g[i].position, m_g[i].nr); + } +} + +bool GNGAlgorithm::stoppingCriterion() { + return m_g.get_number_nodes() > m_max_nodes; +} + +void GNGAlgorithm::increaseErrorNew(GNGNode * node, double error) { + fixErrorNew(node); + assert(m_lambda - s <= m_betha_powers_size -1); + node->error += m_betha_powers[m_lambda - s] * error; + errorHeap.updateLazy(node->nr); +} + +void GNGAlgorithm::fixErrorNew(GNGNode * node) { + + if (node->error_cycle == c) + return; + + while(c - node->error_cycle > m_betha_powers_to_n_length - 1){ + DBG_2(m_logger, 5, "Recreating m_betha_powers_to_n"); + delete[] m_betha_powers_to_n; + m_betha_powers_to_n_length *= 2; + m_betha_powers_to_n = new double[m_betha_powers_to_n_length]; + REP(i, m_betha_powers_to_n_length) + m_betha_powers_to_n[i] = std::pow(m_betha, m_lambda * (double) (i)); + } + + assert(c - node->error_cycle <= m_betha_powers_to_n_length -1); + + node->error = m_betha_powers_to_n[c - node->error_cycle] * node->error; + node->error_cycle = c; + +} + + +void GNGAlgorithm::set_clustering(unsigned int ex, unsigned int node_idx){ + + if(ex + 1 > clustering_result.size()){ + DBG(m_logger, 6, "Resizing clustering_result to "+to_string(g_db->size())); + clustering_result.resize(g_db->size()); + } + + //Can potentially happen in case of shrinkage of dataset size + if(ex + 1 > clustering_result.size()){ + cerr<<"g_db->size mismatch with ex index?\n"; + return; + } + + + clustering_result[ex] = node_idx; +} + +double GNGAlgorithm::getMaximumError() const { + double max_error = 0; + int maximum_index = m_g.get_maximum_index(); + REP(i,maximum_index+1) + if (m_g.existsNode(i)) + max_error = std::max(max_error, m_g[i].error); + return max_error; +} + +void GNGAlgorithm::decreaseAllErrorsNew() { + return; +} + +void GNGAlgorithm::decreaseErrorNew(GNGNode * node) { + fixErrorNew(node); + node->error = m_alpha * node->error; + errorHeap.updateLazy(node->nr); +} + +void GNGAlgorithm::setErrorNew(GNGNode * node, double error) { + node->error = error; + node->error_cycle = c; + errorHeap.insertLazy(node->nr); +} + +void GNGAlgorithm::increaseError(GNGNode * node, double error) { + node->error += error; +} + +void GNGAlgorithm::decreaseAllErrors() { + int maximum_index = m_g.get_maximum_index(); + REP(i,maximum_index+1) + if (m_g.existsNode(i)) + m_g[i].error = m_betha * m_g[i].error; +} + +void GNGAlgorithm::decreaseError(GNGNode * node) { + node->error = m_alpha * node->error; +} + +void GNGAlgorithm::setError(GNGNode * node, double error) { + node->error = error; +} + +// Note: this code is not optimal and is inserted only for research purposes + +double GNGAlgorithm::getUtility(int i) { + return m_g[i].utility; +} + +void GNGAlgorithm::setUtility(int i, double u) { + m_g[i].utility = u; +} + +void GNGAlgorithm::utilityCriterionCheck() { + + if (m_g.get_number_nodes() < 10) + return; //just in case + + double max_error = this->getMaximumError(); + int maximum_index = m_g.get_maximum_index(); + + double min_utility = 100000000; + int min_utility_index = -1; + + for (int i = 0; i <= maximum_index; ++i) + if (min_utility > getUtility(i)) { + min_utility = getUtility(i); + min_utility_index = i; + } + + if (m_g.existsNode(min_utility_index) && max_error / getUtility(min_utility_index) > m_utility_k) { + + DBG(m_logger,2, "GNGAlgorithm:: removing node with utility "+gmum::to_string(getUtility(min_utility_index)) + " max error "+gmum::to_string(max_error)); + + DBG(m_logger,2,gmum::to_string(max_error)); + + GNGNode::EdgeIterator edg = m_g[min_utility_index].begin(); + while (edg != m_g[min_utility_index].end()) { + int nr = (*edg)->nr; + edg = m_g.removeUDEdge(min_utility_index, nr); + } + + m_g.deleteNode(min_utility_index); + setUtility(min_utility_index, 0); + } + +} +void GNGAlgorithm::decreaseAllUtility() { + int maximum_index = m_g.get_maximum_index(); + for (int i = 0; i <= maximum_index; ++i) + if (m_g.existsNode(i)) + setUtility(i, getUtility(i) * (m_betha)); +} + + + + + + + + + + + + +} diff --git a/src/gng/RInterface.cpp b/src/gng/gng_module.cpp similarity index 60% rename from src/gng/RInterface.cpp rename to src/gng/gng_module.cpp index 8ccd4ff..48ec828 100644 --- a/src/gng/RInterface.cpp +++ b/src/gng/gng_module.cpp @@ -4,31 +4,38 @@ #ifdef RCPP_INTERFACE -#include +#include using namespace Rcpp; class GNGConfiguration; class GNGServer; -RCPP_EXPOSED_CLASS(GNGConfiguration); -RCPP_EXPOSED_CLASS(GNGServer); +RCPP_EXPOSED_CLASS(GNGConfiguration) +RCPP_EXPOSED_CLASS(GNGServer) -#include "GNG.h" -#include "GNGServer.h" -#include "GNGConfiguration.h" +#include +#include +#include using namespace gmum; - +GNGServer * loadFromFile(std::string filename){ + GNGServer * out = new GNGServer(filename); + return out; +} RCPP_MODULE(gng_module){ + //TODO: Rcpp doesn't accept dot starting name so no way to hide it easily + Rcpp::function("fromFileGNG", &loadFromFile); + + class_("GNGConfiguration" ) .constructor() - .field("uniformgrid_optimization", &GNGConfiguration::uniformgrid_optimization, "Uniform grid optimization" ) - .field("lazyheap_optimization", &GNGConfiguration::lazyheap_optimization ) + .field(".uniformgrid_optimization", &GNGConfiguration::uniformgrid_optimization, "Uniform grid optimization" ) + .field(".lazyheap_optimization", &GNGConfiguration::lazyheap_optimization ) .field("alpha", &GNGConfiguration::alpha, "Alpha coefficient. " "Decrease the error variables of the nodes neighboring to the newly inserted node by this fraction. Default 0.5") @@ -36,38 +43,45 @@ RCPP_MODULE(gng_module){ "Decrease the error variables of all node nodes by this fraction. Forgetting rate. Default 0.99") .field("eps_n", &GNGConfiguration::eps_n, "How strongly move neighbour node. Default 0.0006") - .field("experimental_utility_option", &GNGConfiguration::experimental_utility_option, "Default 0 (off). You can turn it on to 1, but remember to turn off optimizations. Likely will change in the future.") - .field("experimental_utility_k", + .field(".experimental_utility_option", &GNGConfiguration::experimental_utility_option, "Default 0 (off). You can turn it on to 1, but remember to turn off optimizations. Likely will change in the future.") + .field(".experimental_utility_k", &GNGConfiguration::experimental_utility_k, "Default 1.3 (note: option is off by default). ") .field("eps_w", &GNGConfiguration::eps_w, "How strongly move winner node. Default 0.05") .field("max_edge_age", &GNGConfiguration::max_age, "Max edge age") .field("dim", &GNGConfiguration::dim, "Vertex position dimensionality") .field("lambda", &GNGConfiguration::lambda, "Every lambda iteration is added new vertex. Default 200") - .field("dataset_type", &GNGConfiguration::datasetType, "Dataset type. Currently supported:" + .field(".dataset_type", &GNGConfiguration::datasetType, "Dataset type. Currently supported:" "2: DatasetBagging - examples are sampled from dataset with equal probability, " "3: DatasetBaggingProbability - examples are sampled with probability equal to pos_dim+vertex_dim coordinate (last number in vector)") .field("max_nodes", &GNGConfiguration::max_nodes) .field("verbosity", &GNGConfiguration::verbosity) - .method("check_correctness", &GNGConfiguration::check_correctness) - .method("set_bounding_box", &GNGConfiguration::setBoundingBox); + .method(".check_correctness", &GNGConfiguration::check_correctness) + .method(".set_bounding_box", &GNGConfiguration::setBoundingBox); + + + class_("GNGServer") .constructor() - .constructor() .method("save", &GNGServer::save) + .method("isRunning", &GNGServer::isRunning) .method("run", &GNGServer::run) .method("getCurrentIteration", &GNGServer::getCurrentIteration) .method("pause", &GNGServer::pause) .method("terminate", &GNGServer::terminate) .method("getMeanError", &GNGServer::getMeanError) + .method("nodeDistance", &GNGServer::nodeDistance) + .method("clustering", &GNGServer::RgetClustering) .method("getConfiguration", &GNGServer::getConfiguration) .method("getNumberNodes", &GNGServer::getNumberNodes) - .method("exportToGraphML", &GNGServer::exportToGraphML) + .method(".exportToGraphML", &GNGServer::exportToGraphML) + .method(".getGNGErrorIndex", &GNGServer::getGNGErrorIndex) .method("getNode", &GNGServer::getNode) .method("insertExamples", &GNGServer::RinsertExamples) .method("getErrorStatistics", &GNGServer::RgetErrorStatistics) - .method("predict", &GNGServer::Rpredict); + .method("predict", &GNGServer::Rpredict) + .method(".getLastNodeIndex", &GNGServer::_getLastNodeIndex); } #include diff --git a/src/gng/gng_server.cpp b/src/gng/gng_server.cpp new file mode 100644 index 0000000..5809088 --- /dev/null +++ b/src/gng/gng_server.cpp @@ -0,0 +1,402 @@ +#include +#include +#include +#include +#include +#include + +GNGServer::GNGServer(std::string filename) { + cerr << filename << endl; + + std::ifstream input; + input.open(filename.c_str(), ios::in | ios::binary); + + GNGConfiguration conf; + conf.deserialize(input); + + init(conf, &input); +} + +GNGServer::GNGServer(GNGConfiguration configuration, + std::istream * input_graph) { + init(configuration, input_graph); +} + +void GNGServer::init(GNGConfiguration configuration, + std::istream * input_graph) { + + algorithm_thread = 0; + m_current_dataset_memory_was_set = false; + m_running_thread_created = false; + + m_logger = boost::shared_ptr(new Logger(configuration.verbosity)); + + DBG(m_logger,10, "GNGServer()::constructing GNGServer"); + + if (!configuration.check_correctness()) + throw BasicException("Invalid configuration passed to GNGServer"); + + this->current_configuration = configuration; //assign configuration + + if (current_configuration.graph_storage == GNGConfiguration::RAMMemory) { + //Nothing to do here + } else { + throw BasicException("Not supported GNGConfiguration type"); + } + + /** Construct database **/ + if (current_configuration.datasetType + == GNGConfiguration::DatasetSampling) { + DBG(m_logger,11, "GNGServer::Constructing Normal Sampling Prob Dataset"); + this->gngDataset = std::auto_ptr( + new GNGDatasetSimple(&database_mutex, + current_configuration.dim, true /* store_extra */, + GNGDatasetSimple::Sampling, m_logger)); + } else if (current_configuration.datasetType + == GNGConfiguration::DatasetSamplingProb) { + //Add probability to layout + DBG(m_logger,11, "GNGServer::Constructing Sampling Prob Dataset"); + this->gngDataset = std::auto_ptr( + new GNGDatasetSimple(&database_mutex, + current_configuration.dim, true /* store_extra */, + GNGDatasetSimple::SamplingProbability, + m_logger)); + } else if (current_configuration.datasetType + == GNGConfiguration::DatasetSeq) { + DBG(m_logger,11, "GNGServer::Constructing Normal Seq Dataset"); + this->gngDataset = std::auto_ptr( + new GNGDatasetSimple(&database_mutex, + current_configuration.dim, true /* store_extra */, + GNGDatasetSimple::Sequential, m_logger)); + } else { + cerr << "Passed dataset type " << current_configuration.datasetType + << endl; + cerr << GNGConfiguration::DatasetSampling << endl; + cerr << GNGConfiguration::DatasetSamplingProb << endl; + DBG(m_logger,11, "GNGServer::Not recognized dataset"); + throw BasicException( + "Database type not supported " + + to_string(current_configuration.datasetType)); + } + + DBG(m_logger,10, "GNGServer()::gngDatabase constructed"); + + /** Construct graph **/ + if (current_configuration.graph_storage == GNGConfiguration::SharedMemory) { + throw BasicException("Not supported SharedMemory configuration"); + } else if (current_configuration.graph_storage + == GNGConfiguration::RAMMemory) { + REPORT(current_configuration.starting_nodes); + + this->gngGraph = + std::auto_ptr >( + new RAMGNGGraph(&grow_mutex, + current_configuration.dim, + current_configuration.starting_nodes, + (gmum::GNGGraph::GNGDistanceFunction) current_configuration.distance_function, + m_logger)); + } else { + throw BasicException("Not supported GNGConfiguration type"); + } + + if (input_graph) { + this->gngGraph->load(*input_graph); + } + + DBG(m_logger,10, "GNGServer()::constructing algorithm object"); + + /** Initiliaze main computing object **/ + this->gngAlgorithm = std::auto_ptr( + new GNGAlgorithm( + this->gngGraph.get(), //I do not want algorithm to depend on boost + this->gngDataset.get(), ¤t_configuration.orig[0], + ¤t_configuration.axis[0], + current_configuration.axis[0] * 1.1, //only 2^dim //TODO: min + current_configuration.max_nodes, + current_configuration.max_age, current_configuration.alpha, + current_configuration.beta, current_configuration.lambda, + current_configuration.eps_w, current_configuration.eps_n, + current_configuration.dim, + current_configuration.uniformgrid_optimization, + current_configuration.lazyheap_optimization, + current_configuration.experimental_utility_option, + current_configuration.experimental_utility_k, m_logger)); + + DBG(m_logger,10, "GNGServer()::constructed algorithm object"); + +} + +void GNGServer::run() { + DBG(m_logger,10, "GNGServer::runing algorithm thread"); + algorithm_thread = new gmum::gmum_thread(&GNGServer::_run, (void*) this); + DBG(m_logger,10, "GNGServer::runing collect_statistics thread"); + + m_running_thread_created = true; +} + +GNGConfiguration GNGServer::getConfiguration() { + return current_configuration; +} + +bool GNGServer::isRunning() const { + if (!gngAlgorithm.get()) { + return false; + } + return gngAlgorithm->isRunning(); +} + +double GNGServer::nodeDistance(int id1, int id2) const { + if (gngAlgorithm->isRunning()) { + cerr + << "nodeDistance: Please pause algorithm before calling nodeDistance function\n"; + return -1.0; + } + if (id1 <= 0 || id2 <= 0) { + cerr << "nodeDistance: Indexing starts from 1\n"; + return -1.0; + } + return gngGraph->get_dist(id1 - 1, id2 - 1); +} + +void GNGServer::save(std::string filename) { + + std::ofstream output; + output.open(filename.c_str(), ios::out | ios::binary); + + current_configuration.serialize(output); + + try { + gngGraph->lock(); + assert(filename != ""); + gngGraph->serialize(output); + } catch (...) { + cerr << "Failed exporting to GraphML\n"; +#ifdef DEBUG_GMUM + throw BasicException("Failed exporting to GraphML\n"); +#endif + gngGraph->unlock(); //No RAII, yes.. + return; + } + gngGraph->unlock(); +} + +unsigned int GNGServer::getCurrentIteration() const { + return gngAlgorithm->getIteration(); +} + +///Exports GNG state to file +void GNGServer::exportToGraphML(std::string filename) { + try { + gngGraph->lock(); + assert(filename != ""); + writeToGraphML(getGraph(), filename); + } catch (...) { + cerr << "Failed exporting to GraphML\n"; +#ifdef DEBUG_GMUM + throw BasicException("Failed exporting to GraphML\n"); +#endif + gngGraph->unlock(); //No RAII, yes.. + return; + } + gngGraph->unlock(); +} + +///Insert examples +void GNGServer::insertExamples(double * positions, double * extra, + double * probability, unsigned int count, unsigned int dim) { + gmum::scoped_lock lock(gngDataset.get()); + + if (dim != current_configuration.dim) { + DBG(m_logger,10, "Wrong dimensionality is "+gmum::to_string(count*dim)+" expected "+ + gmum::to_string(count*gngDataset->getDataDim()) + + " data dim " + gmum::to_string(gngDataset->size())); + throw BasicException("Wrong dimensionality. " + "Check if you have added all field to " + "position (for instance probability)"); + } + + gngDataset->insertExamples(positions, extra, probability, count); + DBG(m_logger,7, "GNGServer::Database size "+gmum::to_string(gngDataset->size())); + +} + +unsigned int GNGServer::getNumberNodes() const { + int nr = this->gngGraph->get_number_nodes(); + return nr; +} + +double GNGServer::getMeanError() { + return gngAlgorithm->getMeanError(); +} + +vector GNGServer::getMeanErrorStatistics() { + vector > errors = + gngAlgorithm->getMeanErrorStatistics(); + vector out; + out.reserve(errors.size()); + for (unsigned i = 0; i < errors.size(); ++i) { + out.push_back(errors[i].second); + } + return out; +} + +unsigned GNGServer::getGNGErrorIndex() const{ + return gngAlgorithm->getErrorIndex(); +} + +#ifdef RCPP_INTERFACE + +//This is tricky - used only by convertToIGraph in R, because +//it might happen that we delete nodes and have bigger index of the last node +//than actual nodes (especially in the case of utility version of GNG) +unsigned int GNGServer::_getLastNodeIndex() const { + return gngGraph->get_maximum_index() + 1; +} + +//Constructor needed for RCPPInterface +GNGServer::GNGServer(GNGConfiguration * configuration) { + init(*configuration, 0 /*input_graph*/); +} + +///Moderately slow function returning node descriptors +Rcpp::List GNGServer::getNode(int index) { + int gng_index = index - 1; //1 based + + if(index <= 0) { + cerr<<"Indexing of nodes starts from 1 (R convention)\n"; + List ret; + return ret; + } + + gngGraph->lock(); + + if(!gngGraph->existsNode(gng_index)) { + List ret; + return ret; + } + + GNGNode & n = getGraph()[gng_index]; + NumericVector pos(n.position, n.position + gngDataset->getGNGDim()); + + List ret; + ret["pos"] = pos; + ret["error"] = n.error; + ret["label"] = n.extra_data; + + if(getConfiguration().experimental_utility_option != GNGConfiguration::UtilityOff) { + ret["utility"] = n.utility; + } + + vector neigh(n.size()); + GNGNode::EdgeIterator edg = n.begin(); + unsigned i = 0; + while(edg!=n.end()) { + neigh[i++] = (*edg)->nr + 1; + ++edg; + } + + ret["neighbours"] = IntegerVector(neigh.begin(), neigh.end()); + + gngGraph->unlock(); + + return ret; +} + +int GNGServer::Rpredict(Rcpp::NumericVector & r_ex) { + return 1+gngAlgorithm->predict(std::vector(r_ex.begin(), r_ex.end()) ); +} + +Rcpp::NumericVector GNGServer::RgetClustering() { + const vector & x = gngAlgorithm->get_clustering(); + Rcpp::NumericVector out = NumericVector(x.begin(), x.end()); + for(size_t i=0;i x = getMeanErrorStatistics(); + return NumericVector(x.begin(), x.end()); +} +void GNGServer::RinsertExamples(Rcpp::NumericMatrix & r_points, + Rcpp::NumericVector r_extra ) { + std::vector extra(r_extra.begin(), r_extra.end()); + arma::mat * points = new arma::mat(r_points.begin(), r_points.nrow(), r_points.ncol(), false); + + arma::Row mean_colwise = arma::mean(*points, 0 /*dim*/); + arma::Row std_colwise = arma::stddev(*points, 0 /*dim*/); + arma::Row diff_std = arma::abs(std_colwise - 1.0); + float max_diff_std = arma::max(diff_std), max_mean = arma::max(mean_colwise); + if(max_diff_std > 0.1 || max_mean > 0.1) { + cerr< max_colwise = arma::max(*points, 0 /*dim*/); + arma::Row min_colwise = arma::min(*points, 0 /*dim*/); + arma::Row diff = max_colwise - min_colwise; + float max = arma::max(diff), min = arma::min(diff); + + for(size_t i=0;i min_colwise[i] || current_configuration.orig[i]+current_configuration.axis[i] < max_colwise[i]) { + cerr<<"Error: each feature has to be in range passed to gng.type.optimized \n"; + cerr<<"Error: returning, did not insert examples\n"; + return; + } + } + } + + arma::inplace_trans( *points, "lowmem"); + + if(extra.size()) { + insertExamples(points->memptr(), &extra[0], 0 /*probabilty vector*/, + (unsigned int)points->n_cols, (unsigned int)points->n_rows); + } else { + insertExamples(points->memptr(), 0 /* extra vector */, 0 /*probabilty vector*/, + (unsigned int)points->n_cols, (unsigned int)points->n_rows); + } + + arma::inplace_trans( *points, "lowmem"); +} + +#endif + +///Pause algorithm +void GNGServer::pause() { + gngAlgorithm->pause(); +} + +///Terminate algorithm +void GNGServer::terminate() { + getAlgorithm().terminate(); + DBG(m_logger,20, "GNGServer::getAlgorithm terminated, joining algorithm thread"); + if (algorithm_thread) + algorithm_thread->join(); + DBG(m_logger,20, "GNGServer::algorithm thread terminated, joining statistic thread"); + gmum::sleep(100); +} + +GNGAlgorithm & GNGServer::getAlgorithm() { + return *this->gngAlgorithm.get(); +} +GNGGraph & GNGServer::getGraph() { + return *this->gngGraph.get(); +} +GNGDataset & GNGServer::getDatabase() { + return *this->gngDataset.get(); +} + +GNGServer::~GNGServer() { + DBG(m_logger,10, "GNGServer::destructor called"); +#ifdef RCPP_INTERFACE + + if(m_current_dataset_memory_was_set) { + R_ReleaseObject(m_current_dataset_memory); + + } + //R Matrix will be deleted from R level +#endif +} + + diff --git a/src/utils/utils.cpp b/src/utils/utils.cpp deleted file mode 100644 index a8d3a2b..0000000 --- a/src/utils/utils.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include "utils.h" - -void __init_rnd() { - srand(time(NULL)); -} - -int __rnd(int min, int max) { - return (rand() % (max - min + 1) + min); -} - -int __int_rnd(int min, int max) { - return (rand() % (max - min + 1) + min); -} - -double __double_rnd(double min, double max) { - return min + (max - min) * ((double) rand()) / RAND_MAX; -} -void _write_bin(ostream & out, double v) { - if (isCpuLittleEndian ^ isFileLittleEndian) { - // Switch between the two - char data[8], *pDouble = (char*) (double*) (&v); - for (int i = 0; i < 8; ++i) { - data[i] = pDouble[7 - i]; - } - out.write(data, 8); - } else - out.write((char*) (&v), 8); -} - -void _write_bin_vect(ostream & out, vector & v) { - _write_bin(out, (double) v.size()); - for (int i = 0; i < v.size(); ++i) { - _write_bin(out, v[i]); - } -} -double _load_bin(istream & in) { - char data[8]; - double res; - in.read(data, 8); - if (isCpuLittleEndian ^ isFileLittleEndian) { - char data_load[8]; - // Switch between the two - for (int i = 0; i < 8; ++i) { - data_load[i] = data[7 - i]; - } - memcpy((char*) &res, &data_load[0], 8); - } else - memcpy((char*) &res, &data[0], 8); - - return res; -} - -vector _load_bin_vector(istream & in) { - int N = (int) _load_bin(in); - vector x; - x.reserve(N); - REPORT(N); - for (int i = 0; i < N; ++i) { - x.push_back(_load_bin(in)); - } - return x; -} diff --git a/tests/cpp/Makefile b/tests/cpp/Makefile index 61be32e..133ee35 100644 --- a/tests/cpp/Makefile +++ b/tests/cpp/Makefile @@ -2,9 +2,8 @@ include ../../src/Makevars # Explicit Makevars inheritances adn overrides CXX := $(CXX) -CPPFLAGS := $(PKG_CPPFLAGS) -CXXFLAGS := $(PKG_CXXFLAGS) -O2 -g -s -LDLIBS := $(PKG_LIBS) -lpthread -lgtest -lgtest_main +CPPFLAGS := $(PKG_CPPFLAGS) -O2 -g -s +LDLIBS := $(PKG_LIBS) -lpthread -lgtest -lgtest_main -larmadillo ### # Compilation parameters @@ -63,20 +62,15 @@ ROOT_INCLUDE := -I $(ROOT_INCLUDE_PATH) # Generic branch includes BRANCH_INCLUDES := $(patsubst %, -I $(ROOT_INCLUDE_PATH)/%, $(GMUMR_BRANCHES)) -# Sum up +# Sum up all INCLUDES := $(ROOT_INCLUDE) $(BRANCH_INCLUDES) ### -# Internally included libs +# Compilation rules ### -SVMLIGHT_INCLUDE := $(ROOT_INCLUDE_PATH)/svmlight -SVMLIGHT_LIBS := $(SVMLIGHT_INCLUDE)/svm_learn.o $(SVMLIGHT_INCLUDE)/svm_common.o $(SVMLIGHT_INCLUDE)/svm_hideo.o CPPFLAGS := $(CPPFLAGS) $(INCLUDES) - -### -# Compilation rules -### +CFLAGS := $(CPPFLAGS) all: $(TEST_MAIN_BIN) helper_scripts @echo ... Done! @@ -84,16 +78,16 @@ all: $(TEST_MAIN_BIN) helper_scripts @echo or use helper scripts: $(PRIMARY_TESTS_SCRIPT) $(SECONDARY_TESTS_SCRIPT) clean: - rm -f $(TEST_MAIN_BIN) $(TEST_OBJECTS) $(PRIMARY_TESTS_SCRIPT) \ - $(SECONDARY_TESTS_SCRIPT) $(ADDITIONAL_CLEANING) + rm -f $(TEST_MAIN_BIN) $(TEST_OBJECTS) $(COMPILED_LIBRARIES_OBJECTS)\ + $(PRIMARY_TESTS_SCRIPT) $(SECONDARY_TESTS_SCRIPT) $(ADDITIONAL_CLEANING) -$(TEST_MAIN_BIN): $(TEST_OBJECTS) $(OBJECTS) +$(TEST_MAIN_BIN): $(TEST_OBJECTS) $(OBJECTS) $(COMPILED_LIBRARIES_OBJECTS) @echo Linking all tests... $(CXX) $^ -lgtest_main -o $@ $(LDLIBS) %.o: %.cpp %.hpp @echo Compiling $@ ... - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@ + $(CXX) $(CPPFLAGS) -c $< -o $@ helper_scripts: @echo Making helper scripts... diff --git a/tests/cpp/gng/basic_integration_tests.cpp b/tests/cpp/gng/basic_integration_tests.cpp index 3e41a1c..0b592f0 100644 --- a/tests/cpp/gng/basic_integration_tests.cpp +++ b/tests/cpp/gng/basic_integration_tests.cpp @@ -1,5 +1,5 @@ -#include "gng/GNG.h" //TODO: path problems -#include "gng/GNGServer.h" +#include "gng/gng.h" +#include "gng/gng_server.h" #include "utils/utils.h" #include "gtest/gtest.h" @@ -203,7 +203,7 @@ TEST(GNGNumericTest, FewDimsSkewedUGConvergence) { "fewdims.graphml", extra_examples, num_extra * (config.dim + 1)); ASSERT_GE(results.first, 10.0); - ASSERT_LE(fabs(results.second), 1e-1); + ASSERT_LE(fabs(results.second), 3e-1); } TEST(GNGNumericTest, FewDimsUGConvergence) { diff --git a/tests/cpp/gng/parts_tests.cpp b/tests/cpp/gng/parts_tests.cpp index 5d23b4c..080d3be 100644 --- a/tests/cpp/gng/parts_tests.cpp +++ b/tests/cpp/gng/parts_tests.cpp @@ -1,7 +1,7 @@ -#include "gng/GNG.h" +#include "gng/gng.h" #include "utils/utils.h" -#include "gng/GNGGraph.h" -#include "gng/GNGDataset.h" +#include "gng/gng_graph.h" +#include "gng/gng_dataset.h" #include "utils/threading.h" #include @@ -209,4 +209,3 @@ TEST(DatabaseTests, BasicDatasetTest) { ASSERT_LE(dataset2.getPosition(c)[0], 0.9); } } - diff --git a/tests/cpp/run_primary_tests.sh b/tests/cpp/run_primary_tests.sh new file mode 100755 index 0000000..02ef855 --- /dev/null +++ b/tests/cpp/run_primary_tests.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +./run_tests --gtest_filter=-*NumericTest* diff --git a/tests/cpp/run_secondary_tests.sh b/tests/cpp/run_secondary_tests.sh new file mode 100755 index 0000000..2773e90 --- /dev/null +++ b/tests/cpp/run_secondary_tests.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +./run_tests --gtest_filter=*NumericTest* diff --git a/tests/testthat/test_gng_basic.R b/tests/testthat/test_gng_basic.R index 93fb24d..37a2574 100644 --- a/tests/testthat/test_gng_basic.R +++ b/tests/testthat/test_gng_basic.R @@ -1,37 +1,49 @@ library(testthat) -library("GrowingNeuralGas") -library(igraph) -for(k in 1:2){ +test_that("GNG converges on simple cases", { + +for(k in 1:3){ + library(igraph) + print(paste("Iteration", k)) max_nodes <- 600 - - if(k %% 2 == 0){ + ex <- gng.preset.sphere(N=90000) + if(k == 1){ # Construct gng object gng <- GNG(max.nodes=max_nodes, training=gng.train.online(dim=3), verbosity=10) + insertExamples(gng, ex) + # Run algorithm in parallel + run(gng) + } + else if(k == 2){ + # Construct gng object + gng <- GNG(ex, max.nodes=max_nodes, training=gng.train.offline(max.iter=1000), verbosity=10) } else{ # Construct gng object gng <- OptimizedGNG(max.nodes=max_nodes, training=gng.train.online(dim=3), verbosity=10, value.range=c(-2,2)) + insertExamples(gng, ex) + + # Run algorithm in parallel + run(gng) } # Construct examples, here we will use a sphere - ex <- gng.preset.sphere(N=90000) - insertExamples(gng, ex) - # Run algorithm in parallel - run(gng) - - # Wait for the graph to converge - n <- 0 - print("Waiting to converge") - while(numberNodes(gng) != gng$getConfiguration()$max_nodes && n < 100) { - Sys.sleep(1.0) - n <- n + 1 + + if(k!=2){ + # Wait for the graph to converge + n <- 0 + print("Waiting to converge") + while(numberNodes(gng) != gng$getConfiguration()$max_nodes && n < 100) { + Sys.sleep(1.0) + n <- n + 1 + } + test_that("GNG has reached expected number of vertexes", { + expect_that(n < 100, is_true() ) + }) } - test_that("GNG has reached expected number of vertexes", { - expect_that(n < 100, is_true() ) - }) + print("Test::Completness test") # Find closest node @@ -59,3 +71,11 @@ for(k in 1:2){ print(paste("Finished ",k)) } + +}) + +test_that("GNG clustering and predict are returning the same", { + X <- replicate(10, rnorm(20)) + gng <- GNG(X) + expect_that(all(gng$clustering() == predict(gng,X)), is_true()) +}) diff --git a/tests/testthat/test_gng_utility.R b/tests/testthat/test_gng_utility.R index 27259a7..3811094 100644 --- a/tests/testthat/test_gng_utility.R +++ b/tests/testthat/test_gng_utility.R @@ -1,8 +1,7 @@ -library(GrowingNeuralGas) library(igraph) library(testthat) -max_nodes <- 600 +max_nodes <- 500 # Construct gng object gng <- GNG(max.nodes=max_nodes, training = gng.train.online(dim=3), verbosity=10, k=1.3) @@ -21,10 +20,10 @@ meanError(gng) # Wait for it to converge -Sys.sleep(20.0) +Sys.sleep(25.0) print("Adding jumped distribution") pause(gng) -plot(gng, mode=gng.plot.2d.errors) #0.003 without utility +plot(gng, mode=gng.plot.2d.errors) #0.068 without utility , 10 times less with ex2 <- gng.preset.box(N=90000, r=1.0, center=c(3.0,3.0,3.0), prob=-1) insertExamples(gng, ex2, labels)