Skip to content

Commit

Permalink
Changes to adress comments by @bockthom
Browse files Browse the repository at this point in the history
Fix style issues, modify README.md, add small test and add some comments
for clarity

Signed-off-by: Leo Sendelbach <[email protected]>
  • Loading branch information
Leo-Send committed Apr 4, 2024
1 parent 113ba6b commit ed0f265
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 80 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ Alternatively, you can run `Rscript install.R` to install the packages.
- `jsonlite`: For parsing the issue data
- `rTensor`: For calculating EDCPTD centrality
- `Matrix`: For sparse matrix representation of large adjacency matrices
- `fastmap`: For fast implementation of a map
- `purrr`: For fast implementtion of a mapping function

### Submodule

Expand Down Expand Up @@ -597,6 +599,9 @@ There is no way to update the entries, except for the revision-based parameters.
- `custom.event.timestamps.locked`:
* Lock custom event timestamps to prevent them from being read if empty or not yet present when calling the getter.
* [`TRUE`, *`FALSE`*]
- `commit.interactions`:
* Alloow construction of author and artifact networks using commit interaction data
* [`TRUE`, *`FALSE`*]

### NetworkConf

Expand Down
4 changes: 3 additions & 1 deletion install.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ packages = c(
"viridis",
"jsonlite",
"rTensor",
"Matrix"
"Matrix",
"fastmap",
"purrr"
)


Expand Down
1 change: 1 addition & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ We have two test projects you can use when writing your tests:
* Commit messages
* Pasta
* Synchronicity
* Commit Interactions
* Custom event timestamps in `custom-events.list`
* Revisions
2. - Casestudy: `test_empty`
Expand Down
8 changes: 8 additions & 0 deletions tests/test-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ test_that("Compare two ProjectData objects on empty data", {
proj.data.two$set.project.conf.entry("commit.messages", "message")
proj.data.two$get.commit.messages()
expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects (commit.messages).")

proj.data.one$set.project.conf.entry("commit.interactions", TRUE)
proj.data.one$get.commit.interactions()
expect_false(proj.data.one$equals(proj.data.two), "Two non-identical ProjectData objects (commit.interactions).")
proj.data.two$set.project.conf.entry("commit.interactions", TRUE)
proj.data.two$get.commit.interactions()
expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects (commit.interactions).")
})

test_that("Compare two ProjectData objects on non-empty data", {
Expand Down Expand Up @@ -540,6 +547,7 @@ test_that("Compare two ProjectData Objects with commit.interactions", {
commit.data[["hash"]][[5]] = 1
proj.data.one$set.commits(commit.data)

## use isTRUE to compress result of all.equal into a single boolean
expect_false(isTRUE(all.equal(proj.data.one$get.commit.interactions(),
proj.data.two$get.commit.interactions())))

Expand Down
8 changes: 4 additions & 4 deletions tests/test-networks-artifact.R
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ test_that("Network construction with commit-interactions as relation, artifact t
proj.data = ProjectData$new(project.conf = proj.conf)

net.conf = NetworkConf$new()
net.conf$update.value("artifact.relation", "interaction")
net.conf$update.value("artifact.relation", "commit.interaction")

network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf)
network.built = network.builder$get.artifact.network()
Expand All @@ -249,7 +249,7 @@ test_that("Network construction with commit-interactions as relation, artifact t
interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"),
weight = c(1, 1, 1, 1),
type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA),
relation = c("interaction", "interaction", "interaction", "interaction")
relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction")
)
network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices)

Expand All @@ -265,7 +265,7 @@ test_that("Network construction with commit-interactions as relation, artifact t
proj.data = ProjectData$new(project.conf = proj.conf)

net.conf = NetworkConf$new()
net.conf$update.value("artifact.relation", "interaction")
net.conf$update.value("artifact.relation", "commit.interaction")

network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf)
network.built = network.builder$get.artifact.network()
Expand All @@ -292,7 +292,7 @@ test_that("Network construction with commit-interactions as relation, artifact t
interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"),
weight = c(1, 1, 1, 1),
type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA),
relation = c("interaction", "interaction", "interaction", "interaction")
relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction")
)
network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices)

Expand Down
4 changes: 2 additions & 2 deletions tests/test-networks-author.R
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ test_that("Network construction with commit-interactions as relation", {
proj.data = ProjectData$new(project.conf = proj.conf)

net.conf = NetworkConf$new()
net.conf$update.value("author.relation", "interaction")
net.conf$update.value("author.relation", "commit.interaction")

network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf)
network.built = network.builder$get.author.network()
Expand Down Expand Up @@ -716,7 +716,7 @@ test_that("Network construction with commit-interactions as relation", {
base.file = c("test2.c", "test2.c", "test3.c", "test2.c"),
weight = c(1, 1, 1, 1),
type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA),
relation = c("interaction", "interaction", "interaction", "interaction")
relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction")
)
network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices)

Expand Down
4 changes: 2 additions & 2 deletions util-conf.R
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf,
author.relation = list(
default = "mail",
type = "character",
allowed = c("mail", "cochange", "issue", "interaction"),
allowed = c("mail", "cochange", "issue", "commit.interaction"),
allowed.number = Inf
),
author.directed = list(
Expand Down Expand Up @@ -821,7 +821,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf,
artifact.relation = list(
default = "cochange",
type = "character",
allowed = c("cochange", "callgraph", "mail", "issue", "interaction"),
allowed = c("cochange", "callgraph", "mail", "issue", "commit.interaction"),
allowed.number = Inf
),
artifact.directed = list(
Expand Down
119 changes: 66 additions & 53 deletions util-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ DATASOURCE.TO.ADDITIONAL.ARTIFACT.FUNCTION = list(
"synchronicity" = "get.synchronicity",
"pasta" = "get.pasta",
"gender" = "get.gender",
"custom.event.timestamps" = "get.custom.event.timestamps",
"commit.interactions" = "get.commit.interactions"
"commit.interactions" = "get.commit.interactions",
"custom.event.timestamps" = "get.custom.event.timestamps"
)

#' Applies a function to list keys
Expand Down Expand Up @@ -125,7 +125,8 @@ CONF.PARAMETERS.NO.RESET.ENVIRONMENT = c("commit.messages",
"issues.locked",
"mails.locked",
"custom.event.timestamps",
"custom.event.timestamps.locked")
"custom.event.timestamps.locked",
"commit.interactions")


## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
Expand Down Expand Up @@ -164,7 +165,7 @@ ProjectData = R6::R6Class("ProjectData",
commits = create.empty.commits.list(), # data.frame
commits.unfiltered = create.empty.commits.list(), # data.frame
commit.messages = create.empty.commit.message.list(), # data.frame
commit.interactions = create.empty.commit.interaction.list(),
commit.interactions = create.empty.commit.interaction.list(), # data.frame
## mails
mails.unfiltered = create.empty.mails.list(), # data.frame
mails = create.empty.mails.list(), # data.frame
Expand Down Expand Up @@ -414,46 +415,49 @@ ProjectData = R6::R6Class("ProjectData",
#'
#' This method should be called whenever the field \code{commit.interactions} is changed.
update.commit.interactions = function() {
if (!self$is.data.source.cached("commits.unfiltered")) {
self$get.commits()
}
if (self$is.data.source.cached("commit.interactions")) {
if (!self$is.data.source.cached("commits.unfiltered")) {
self$get.commits()
}

## remove existing columns named 'base.author' and 'interaction.author'
indices.to.remove = which("base.author" == colnames(private$commit.interactions))
if (length(indices.to.remove) > 0) {
private$commit.interactions = private$commit.interactions[, -indices.to.remove]
}
indices.to.remove = which("interacting.author" == colnames(private$commit.interactions))
if (length(indices.to.remove) > 0) {
private$commit.interactions = private$commit.interactions[, -indices.to.remove]
}
## remove existing columns named 'base.author' and 'interaction.author'
indices.to.remove = which("base.author" == colnames(private$commit.interactions))
if (length(indices.to.remove) > 0) {
private$commit.interactions = private$commit.interactions[, -indices.to.remove]
}
indices.to.remove = which("interacting.author" == colnames(private$commit.interactions))
if (length(indices.to.remove) > 0) {
private$commit.interactions = private$commit.interactions[, -indices.to.remove]
}

## get relevant data from commits
commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]],
author.name = private$commits.unfiltered[["author.name"]])
commit.data.subset = commit.data.subset[!duplicated(commit.data.subset[["hash"]]),]
## get relevant data from commits
commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]],
author.name = private$commits.unfiltered[["author.name"]])
commit.data.subset = commit.data.subset[!duplicated(commit.data.subset[["hash"]]),]

## merge commit interactions with commits and change colnames to avoid duplicates
commit.interaction.data = merge(private$commit.interactions, commit.data.subset,
by.x = "base.hash", by.y = "hash", all.x = TRUE)
## merge commit interactions with commits and change colnames to avoid duplicates
commit.interaction.data = merge(private$commit.interactions, commit.data.subset,
by.x = "base.hash", by.y = "hash", all.x = TRUE)

author.index = match("author.name", colnames(commit.interaction.data))
colnames(commit.interaction.data)[[author.index]] = "base.author"
author.index = match("author.name", colnames(commit.interaction.data))
colnames(commit.interaction.data)[[author.index]] = "base.author"

commit.interaction.data = merge(commit.interaction.data, commit.data.subset,
by.x = "commit.hash", by.y = "hash", all.x = TRUE)
commit.interaction.data = merge(commit.interaction.data, commit.data.subset,
by.x = "commit.hash", by.y = "hash", all.x = TRUE)

author.index = match("author.name", colnames(commit.interaction.data))
colnames(commit.interaction.data)[[author.index]] = "interacting.author"
author.index = match("author.name", colnames(commit.interaction.data))
colnames(commit.interaction.data)[[author.index]] = "interacting.author"

## warning if we have interactions without authors
if (anyNA(commit.interaction.data[["base.author"]]) ||
anyNA(commit.interaction.data[["interacting.author"]])) {
logging::logwarn("There are authors in the commit-interactions that are not in the commit data!
This results in the commit-interactions having empty entries.
To clean up these entries, call cleanup.commit.interactions.")
## warning if we have interactions without authors
if (anyNA(commit.interaction.data[["base.author"]]) ||
anyNA(commit.interaction.data[["interacting.author"]])) {
logging::logwarn("There are commits in the commit-interactions that are not in
the commit data, possibly due to incomplete commit data or deleted users.
This results in the commit-interactions having empty entries.
To clean up these entries, call cleanup.commit.interactions.")
}
private$commit.interactions = commit.interaction.data
}
private$commit.interactions = commit.interaction.data

},
## * * Gender data --------------------------------------------------
Expand Down Expand Up @@ -856,6 +860,7 @@ ProjectData = R6::R6Class("ProjectData",
private$pasta.commits = create.empty.pasta.list()
private$gender = create.empty.gender.list()
private$synchronicity = create.empty.synchronicity.list()
private$commit.interactions = create.empty.commit.interaction.list()
},

## * * configuration -----------------------------------------------
Expand Down Expand Up @@ -1256,19 +1261,26 @@ ProjectData = R6::R6Class("ProjectData",
get.commit.interactions = function(data.path = NULL) {
logging::loginfo("Getting commit interactions.")

## if the commit-interaction data have not yet been read do this
if (!self$is.data.source.cached("commit.interactions")) {
if (is.null(data.path)) {
commit.interaction.data = read.commit.interactions(self$get.data.path())
} else {
commit.interaction.data = read.commit.interactions(data.path)
}
## if commit-interaction data are to be read, do this
if (private$project.conf$get.value("commit.interactions")) {
## if the commit-interaction data have not yet been read do this
if (!self$is.data.source.cached("commit.interactions")) {
if (is.null(data.path)) {
commit.interaction.data = read.commit.interactions(self$get.data.path())
} else {
commit.interaction.data = read.commit.interactions(data.path)
}

## cache the result
private$commit.interactions = commit.interaction.data
private$update.commit.interactions()
## cache the result
private$commit.interactions = commit.interaction.data
private$update.commit.interactions()
}
} else {
logging::logwarn("You have not set the ProjectConf parameter
'commit.interactions' to 'TRUE'! Ignoring...")
## mark commit-interaction data as empty
private$commit.interactions = NULL
}

return(private$commit.interactions)
},

Expand All @@ -1289,9 +1301,10 @@ ProjectData = R6::R6Class("ProjectData",
private$commit.interactions = data
},

#' Remove lines in the commit-interactions data that do not contain authors.
#' This should only be called AFTER 'update.commit.interactions' has already been called, as otherwise
#' all commit-interactions data will be removed
#' Remove lines in the commit-interaction data for which the corresponding commit is missing in the
#' commit data, indicated by a missing author in the commit-interaction data.
#' This should only be called AFTER \code{update.commit.interactions} has already been called, as otherwise
#' all commit-interactions data will be removed.
cleanup.commit.interactions = function() {
logging::loginfo("Cleaning up commit-interactions")

Expand Down Expand Up @@ -1877,8 +1890,8 @@ ProjectData = R6::R6Class("ProjectData",
"commit.messages" = "commit.messages",
"synchronicity" = "synchronicity",
"pasta" = "pasta",
"custom.event.timestamps" = "custom.event.timestamps",
"commit.interactions" = "commit.interactions"
"commit.interactions" = "commit.interactions",
"custom.event.timestamps" = "custom.event.timestamps"
)
)
sources = self$get.cached.data.sources.internal(source.type)
Expand Down Expand Up @@ -1910,7 +1923,7 @@ ProjectData = R6::R6Class("ProjectData",
## define the data sources
unfiltered.data.sources = c("commits.unfiltered", "mails.unfiltered", "issues.unfiltered")
additional.data.sources = c("authors", "commit.messages", "synchronicity", "pasta",
"gender", "custom.event.timestamps", "commit.interactions")
"gender", "commit.interactions", "custom.event.timestamps")
main.data.sources = c("issues", "commits", "mails")

## set the right data sources to look for according to the argument
Expand Down
Loading

0 comments on commit ed0f265

Please sign in to comment.