diff --git a/R/word2vec.R b/R/word2vec.R index 28d29f5..781b9d7 100644 --- a/R/word2vec.R +++ b/R/word2vec.R @@ -106,10 +106,9 @@ #' x <- subset(x, grepl(xpos, pattern = paste(LETTERS, collapse = "|"))) #' x$text <- sprintf("%s/%s", x$lemma, x$xpos) #' x <- subset(x, !is.na(lemma)) -#' x <- paste.data.frame(x, term = "text", group = "doc_id", collapse = " ") -#' x <- x$text +#' x <- split(x$text, list(x$doc_id, x$sentence_id)) #' -#' model <- word2vec(x = x, dim = 15, iter = 20, split = c(" ", ".\n?!")) +#' model <- word2vec(x = x, dim = 15, iter = 20) #' emb <- as.matrix(model) #' nn <- predict(model, c("cuisine/NN", "rencontrer/VB"), type = "nearest") #' nn diff --git a/man/word2vec.Rd b/man/word2vec.Rd index bca8f80..ee8ce9d 100644 --- a/man/word2vec.Rd +++ b/man/word2vec.Rd @@ -144,10 +144,9 @@ x <- subset(brussels_reviews_anno, language == "fr") x <- subset(x, grepl(xpos, pattern = paste(LETTERS, collapse = "|"))) x$text <- sprintf("\%s/\%s", x$lemma, x$xpos) x <- subset(x, !is.na(lemma)) -x <- paste.data.frame(x, term = "text", group = "doc_id", collapse = " ") -x <- x$text +x <- split(x$text, list(x$doc_id, x$sentence_id)) -model <- word2vec(x = x, dim = 15, iter = 20, split = c(" ", ".\n?!")) +model <- word2vec(x = x, dim = 15, iter = 20) emb <- as.matrix(model) nn <- predict(model, c("cuisine/NN", "rencontrer/VB"), type = "nearest") nn diff --git a/man/word2vec.character.Rd b/man/word2vec.character.Rd index 05c95ac..6a4aaa9 100644 --- a/man/word2vec.character.Rd +++ b/man/word2vec.character.Rd @@ -148,10 +148,9 @@ x <- subset(brussels_reviews_anno, language == "fr") x <- subset(x, grepl(xpos, pattern = paste(LETTERS, collapse = "|"))) x$text <- sprintf("\%s/\%s", x$lemma, x$xpos) x <- subset(x, !is.na(lemma)) -x <- paste.data.frame(x, term = "text", group = "doc_id", collapse = " ") -x <- x$text +x <- split(x$text, list(x$doc_id, x$sentence_id)) -model <- word2vec(x = x, dim = 15, iter = 20, split = c(" ", ".\n?!")) +model <- word2vec(x = x, dim = 15, iter = 20) emb <- as.matrix(model) nn <- predict(model, c("cuisine/NN", "rencontrer/VB"), type = "nearest") nn