From 8ebcf20b0aba0cb82dcd7e1d1b95e261a866d04e Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Wed, 18 Apr 2018 19:07:33 +0200 Subject: [PATCH 01/51] Enable contraction of imperfect ranges in the end When constructing ranges, in some situations, we might need to avoid that the last range is not a perfect range any more, i.e., it is smaller than the specified time period. Therefore, add a parameter 'imperfect.range.ratio' which quantifies the allowed imperfectness (e.g., at least 70% of the specified time period are needed to keep an imperfect range, otherwise it will be combined with the previous range.) This fixes #104. In addition to that, add a parameter 'include.end.date' to specify whether to add 1 second to the end (for including the end date as the end of ranges is exclusive) or not. (The default should be to add 1 second to the end, as hitherto. However, we came across situations where we explicitlydon't want that behavior, which is the reason for adding the additional parameter.) Also add some additional tests for the newly introduced behavior. Signed-off-by: Thomas Bock --- tests/test-misc.R | 93 ++++++++++++++++++++++++++++++++++++++++++++++- util-misc.R | 68 +++++++++++++++++++++++++++++----- 2 files changed, 151 insertions(+), 10 deletions(-) diff --git a/tests/test-misc.R b/tests/test-misc.R index e14981cb..2c79fc1a 100644 --- a/tests/test-misc.R +++ b/tests/test-misc.R @@ -274,6 +274,49 @@ test_that("Construct consecutive and overlapping ranges.", { expect_equal(result.raw, expected.raw, info = "Standard overlapping ranges (raw).") ## TODO use expect_identical here? why failing? + ## overlapping ranges without imperfect ranges: + ## 1) expected results + expected.formatted = c( + "2018-01-01 00:00:00-2018-01-01 02:00:00", + "2018-01-01 01:30:00-2018-01-01 03:30:00", + "2018-01-01 03:00:00-2018-01-01 06:05:01" + ) + expected.raw = lapply(expected.formatted, get.range.bounds) + names(expected.raw) = expected.formatted + ## 2) formatted + result.formatted = construct.overlapping.ranges(start, end, time.period = "2 hours", overlap = "30 minutes", + imperfect.range.ratio = 1.0, raw = FALSE) + expect_identical(result.formatted, expected.formatted, + info = "Overlapping ranges without imperfect ranges (formatted).") + ## 3) raw + result.raw = construct.overlapping.ranges(start, end, time.period = "2 hours", overlap = "30 minutes", + imperfect.range.ratio = 1.0, raw = TRUE) + expect_equal(result.raw, expected.raw, + info = "Overlapping ranges without imperfect ranges (raw).") + ## TODO use expect_identical here? why failing? + + ## overlapping ranges with imperfect ranges that last at least 8% of the time period: + ## 1) expected results + expected.formatted = c( + "2018-01-01 00:00:00-2018-01-01 02:00:00", + "2018-01-01 01:30:00-2018-01-01 03:30:00", + "2018-01-01 03:00:00-2018-01-01 05:00:00", + "2018-01-01 04:30:00-2018-01-01 06:05:01" + ) + expected.raw = lapply(expected.formatted, get.range.bounds) + names(expected.raw) = expected.formatted + ## 2) formatted + result.formatted = construct.overlapping.ranges(start, end, time.period = "2 hours", overlap = "30 minutes", + imperfect.range.ratio = 0.08, raw = FALSE) + expect_identical(result.formatted, expected.formatted, + info = "Overlapping ranges with imperfect ranges (at least 8% of the time period) (formatted).") + ## 3) raw + result.raw = construct.overlapping.ranges(start, end, time.period = "2 hours", overlap = "30 minutes", + imperfect.range.ratio = 0.08, raw = TRUE) + expect_equal(result.raw, expected.raw, + info = "Overlapping ranges with imperfect ranges (at least 8% of the time period) (raw).") + ## TODO use expect_identical here? why failing? + ## non-overlapping/consecutive ranges: ## 1) expected results expected.formatted = c( @@ -295,6 +338,32 @@ test_that("Construct consecutive and overlapping ranges.", { results.raw = construct.consecutive.ranges(start.date, end.date, time.period = "2 hours", raw = FALSE) expect_equal(result.raw, expected.raw, info = "Non-overlapping ranges (consecutive).") + ## non-overlapping/consecutive ranges without imperfect ranges and exclusive end: + ## 1) expected results + expected.formatted = c( + "2018-01-01 00:00:00-2018-01-01 02:00:00", + "2018-01-01 02:00:00-2018-01-01 04:00:00", + "2018-01-01 04:00:00-2018-01-01 06:05:00" + ) + expected.raw = lapply(expected.formatted, get.range.bounds) + names(expected.raw) = expected.formatted + ## 2) formatted + result.formatted = construct.overlapping.ranges(start.date, end.date, time.period = "2 hours", overlap = 0, + imperfect.range.ratio = 1.0, include.end.date = FALSE, raw = FALSE) + expect_identical(result.formatted, expected.formatted, + info = "Non-overlapping ranges without imperfect ranges and exclusive end (formatted).") + ## 3) raw + result.raw = construct.overlapping.ranges(start.date, end.date, time.period = "2 hours", overlap = 0, + imperfect.range.ratio = 1.0, include.end.date = FALSE, raw = TRUE) + expect_equal(result.raw, expected.raw, + info = "Non-overlapping ranges without imperfect ranges and exclusive end (raw).") + ## TODO use expect_identical here? why failing? + ## 4) matching with consecutive ranges + results.raw = construct.consecutive.ranges(start.date, end.date, time.period = "2 hours", + imperfect.range.ratio = 1.0, include.end.date = FALSE, raw = FALSE) + expect_equal(result.raw, expected.raw, + info = "Non-overlapping ranges without imperfect ranges and exclusive end (consecutive).") + ## illegal overlap expect_error( construct.overlapping.ranges(start.date, end.date, time.period = "2 hours", overlap = "1 year", raw = FALSE), @@ -314,7 +383,7 @@ test_that("Construct cumulative ranges.", { end.date = get.date.from.string(end) end.including = end.date + 1 - ## standard overlapping ranges: + ## standard cumulative ranges: ## 1) expected results expected.formatted = c( "2018-01-01 00:00:00-2018-01-01 02:00:00", @@ -331,6 +400,28 @@ test_that("Construct cumulative ranges.", { result.raw = construct.cumulative.ranges(start, end, time.period = "2 hours", raw = TRUE) expect_equal(result.raw, expected.raw, info = "Cumulative ranges (raw).") ## TODO use expect_identical here? why failing? + + ## cumulative ranges without imperfect ranges: + ## 1) expected results + expected.formatted = c( + "2018-01-01 00:00:00-2018-01-01 02:00:00", + "2018-01-01 00:00:00-2018-01-01 04:00:00", + "2018-01-01 00:00:00-2018-01-01 06:05:01" + ) + expected.raw = lapply(expected.formatted, get.range.bounds) + names(expected.raw) = expected.formatted + ## 2) formatted + result.formatted = construct.cumulative.ranges(start, end, time.period = "2 hours", + imperfect.range.ratio = 1.0, raw = FALSE) + expect_identical(result.formatted, expected.formatted, + info = "Cumulative ranges without imperfect ranges (formatted).") + ## 3) raw + result.raw = construct.cumulative.ranges(start, end, time.period = "2 hours", + imperfect.range.ratio = 1.0, raw = TRUE) + expect_equal(result.raw, expected.raw, + info = "Cumulative ranges without imperfect ranges (raw).") + ## TODO use expect_identical here? why failing? + }) ## diff --git a/util-misc.R b/util-misc.R index d3f24535..fd244029 100644 --- a/util-misc.R +++ b/util-misc.R @@ -361,7 +361,7 @@ construct.ranges = function(revs, sliding.window = FALSE, raw = FALSE) { #' #' Important: As the start of each range is supposed to be inclusive and the end of each range #' exclusive, 1 second is added to \code{end}. This way, the date \code{end} will be *included* -#' in the last range. +#' in the last range. To avoid that behavior, set parameter \code{include.end.date} to \code{FALSE}. #' #' Note: You may want to use the function \code{ProjectData$get.data.timestamps} with this #' function here. @@ -371,15 +371,25 @@ construct.ranges = function(revs, sliding.window = FALSE, raw = FALSE) { #' last range (see above) #' @param time.period The time period describing the length of the ranges, a character #' string, e.g., "3 mins" or "15 days" +#' @param imperfect.range.ratio The ratio of the \code{time.period} the last range has to last at least. +#' That is, if the last range would be shorter than ratio * \code{time.period}, +#' the last range will be combined the the second last range. +#' A ratio of 0.0 means that the last range could be as small as possible. +#' A ratio of 1.0 means that the last range has to last at least \code{time.period}. +#' [default: 0.0] +#' @param include.end.date whether to include the end date or not [default: TRUE] #' @param raw whether to return pairs of POSIXct objects or strings rather than #' formatted strings [default: FALSE] #' #' @return the constructed ranges, either formatted or raw; the raw ranges are a named list, #' for which the formatted ranges are the names -construct.consecutive.ranges = function(start, end, time.period, raw = FALSE) { +construct.consecutive.ranges = function(start, end, time.period, imperfect.range.ratio = 0.0, + include.end.date = TRUE, raw = FALSE) { ## just construct overlapping ranges without any overlap ;) - ranges = construct.overlapping.ranges(start, end, time.period, overlap = 0, raw) + ranges = construct.overlapping.ranges(start, end, time.period, overlap = 0, + imperfect.range.ratio = imperfect.range.ratio, + include.end.date = include.end.date, raw) return(ranges) } @@ -399,7 +409,7 @@ construct.consecutive.ranges = function(start, end, time.period, raw = FALSE) { #' #' Important: As the start of each range is supposed to be inclusive and the end of each range #' exclusive, 1 second is added to \code{end}. This way, the date \code{end} will be *included* -#' in the last range. +#' in the last range. To avoid that behavior, set parameter \code{include.end.date} to \code{FALSE}. #' #' Note: You may want to use the function \code{ProjectData$get.data.timestamps} with this #' function here. @@ -413,12 +423,20 @@ construct.consecutive.ranges = function(start, end, time.period, raw = FALSE) { #' (e.g., "3 mins" or "15 days") or a numeric indication the percentage of #' overlap (e.g., 1/4). Should be more than 0 seconds and must not be larger #' than the given \code{time.period}. +#' @param imperfect.range.ratio The ratio of the \code{time.period} the last range has to last at least. +#' That is, if the last range would be shorter than ratio * \code{time.period}, +#' the last range will be combined the the second last range. +#' A ratio of 0.0 means that the last range could be as small as possible. +#' A ratio of 1.0 means that the last range has to last at least \code{time.period}. +#' [default: 0.0] +#' @param include.end.date whether to include the end date or not [default: TRUE] #' @param raw whether to return pairs of POSIXct objects or strings rather than #' formatted strings [default: FALSE] #' #' @return the constructed ranges, either formatted or raw; the raw ranges are a named list, #' for which the formatted ranges are the names -construct.overlapping.ranges = function(start, end, time.period, overlap, raw = FALSE) { +construct.overlapping.ranges = function(start, end, time.period, overlap, imperfect.range.ratio = 0.0, + include.end.date = TRUE, raw = FALSE) { ## convert given periods to lubridate stuff: ## 1) time period @@ -431,7 +449,11 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, raw = } ## 3) the dates for theirselves start.date = get.date.from.string(start) - end.date = get.date.from.string(end) + 1 ## add 1 for inclusion of end.date + if(include.end.date) { + end.date = get.date.from.string(end) + 1 ## add 1 for inclusion of end.date + } else { + end.date = get.date.from.string(end) + } ## check the breaking case if (overlap >= time.period) { @@ -459,10 +481,28 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, raw = if (seq.end == seq.start) { ranges.approx = c(seq.start, end.date) } else { + + # check for imperfect range in the end + if (imperfect.range.ratio > 0.0 + && end.date - ranges.approx[[length(ranges.approx)]] < imperfect.range.ratio * time.period) { + + ranges.approx = ranges.approx[-length(ranges.approx)] + bins.number = bins.number - 1 + } ranges.approx = c(ranges.approx, end.date) } } + if(ranges.approx[[length(ranges.approx)]] > end.date) { + # check for imperfect range in the end + if (imperfect.range.ratio > 0.0 + && ranges.approx[[length(ranges.approx)]] - end.date < imperfect.range.ratio * time.period) { + + ranges.approx = ranges.approx[-(length(ranges.approx)-1)] + bins.number = bins.number - 1 + } + } + ## construct the raw ranges from the approximate ones ranges.raw = lapply(seq_len(bins.number), function(bin.index) { ## combine start and end dates @@ -508,7 +548,7 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, raw = #' #' Important: As the start of each range is supposed to be inclusive and the end of each range #' exclusive, 1 second is added to \code{end}. This way, the date \code{end} will be *included* -#' in the last range. +#' in the last range. To avoid that behavior, set parameter \code{include.end.date} to \code{FALSE}. #' #' Note: You may want to use the function \code{ProjectData$get.data.timestamps} with this #' function here. @@ -518,15 +558,25 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, raw = #' last range (see above) #' @param time.period The time period describing the length of the ranges, a character #' string, e.g., "3 mins" or "15 days" +#' @param imperfect.range.ratio The ratio of the \code{time.period} the last range has to last at least. +#' That is, if the last range would be shorter than ratio * \code{time.period}, +#' the last range will be combined the the second last range. +#' A ratio of 0.0 means that the last range could be as small as possible. +#' A ratio of 1.0 means that the last range has to last at least \code{time.period}. +#' [default: 0.0] +#' @param include.end.date whether to include the end date or not [default: TRUE] #' @param raw whether to return pairs of POSIXct objects or strings rather than #' formatted strings [default: FALSE] #' #' @return the constructed ranges, either formatted or raw; the raw ranges are a named list, #' for which the formatted ranges are the names -construct.cumulative.ranges = function(start, end, time.period, raw = FALSE) { +construct.cumulative.ranges = function(start, end, time.period, imperfect.range.ratio = 0.0, + include.end.date = TRUE, raw = FALSE) { ## get the consecutive ranges to alter them afterwards - ranges.consecutive = construct.overlapping.ranges(start, end, time.period, overlap = 0, raw = TRUE) + ranges.consecutive = construct.overlapping.ranges(start, end, time.period, overlap = 0, + imperfect.range.ratio = imperfect.range.ratio, + include.end.date = include.end.date, raw = TRUE) ## set the start of each range to global start date ranges.raw = lapply(ranges.consecutive, function(range.bounds) { From 7c0183cb3e1dcb1adbea0eeab2358cf73c46f284 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Wed, 18 Apr 2018 19:16:28 +0200 Subject: [PATCH 02/51] Update changelog Signed-off-by: Thomas Bock --- NEWS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS.md b/NEWS.md index 65cb0639..bb3bd807 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,12 @@ # codeface-extraction-r – Changelog +## unversioned + +### Added +- Possibility to contract imperfect ranges in the end (#104, 8ebcf20b0aba0cb82dcd7e1d1b95e261a866d04e) + + ## 3.1.2 ### Changed/Improved From 5f9c75a4eecb6df9cace78e1d9efef830d2a83fa Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Thu, 19 Apr 2018 13:14:46 +0200 Subject: [PATCH 03/51] Minor fixes from review in PR #117 Signed-off-by: Thomas Bock --- util-misc.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/util-misc.R b/util-misc.R index fd244029..1137b09e 100644 --- a/util-misc.R +++ b/util-misc.R @@ -373,7 +373,7 @@ construct.ranges = function(revs, sliding.window = FALSE, raw = FALSE) { #' string, e.g., "3 mins" or "15 days" #' @param imperfect.range.ratio The ratio of the \code{time.period} the last range has to last at least. #' That is, if the last range would be shorter than ratio * \code{time.period}, -#' the last range will be combined the the second last range. +#' the last range will be combined with the second last range. #' A ratio of 0.0 means that the last range could be as small as possible. #' A ratio of 1.0 means that the last range has to last at least \code{time.period}. #' [default: 0.0] @@ -425,7 +425,7 @@ construct.consecutive.ranges = function(start, end, time.period, imperfect.range #' than the given \code{time.period}. #' @param imperfect.range.ratio The ratio of the \code{time.period} the last range has to last at least. #' That is, if the last range would be shorter than ratio * \code{time.period}, -#' the last range will be combined the the second last range. +#' the last range will be combined with the second last range. #' A ratio of 0.0 means that the last range could be as small as possible. #' A ratio of 1.0 means that the last range has to last at least \code{time.period}. #' [default: 0.0] @@ -449,7 +449,7 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, imperf } ## 3) the dates for theirselves start.date = get.date.from.string(start) - if(include.end.date) { + if (include.end.date) { end.date = get.date.from.string(end) + 1 ## add 1 for inclusion of end.date } else { end.date = get.date.from.string(end) @@ -493,7 +493,7 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, imperf } } - if(ranges.approx[[length(ranges.approx)]] > end.date) { + if (ranges.approx[[length(ranges.approx)]] > end.date) { # check for imperfect range in the end if (imperfect.range.ratio > 0.0 && ranges.approx[[length(ranges.approx)]] - end.date < imperfect.range.ratio * time.period) { @@ -560,7 +560,7 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, imperf #' string, e.g., "3 mins" or "15 days" #' @param imperfect.range.ratio The ratio of the \code{time.period} the last range has to last at least. #' That is, if the last range would be shorter than ratio * \code{time.period}, -#' the last range will be combined the the second last range. +#' the last range will be combined with the second last range. #' A ratio of 0.0 means that the last range could be as small as possible. #' A ratio of 1.0 means that the last range has to last at least \code{time.period}. #' [default: 0.0] From 15d2072e42236506d4ea5556246089b42e4c4171 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Fri, 20 Apr 2018 21:47:36 +0200 Subject: [PATCH 04/51] Adapt 'imperfect.range.ratio' in function 'construct.overlapping.ranges' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To improve the current implementation of the function 'construct.overlapping.ranges' regarding the parameter 'imperfect.range.ratio', we now use one single compact if-block for correcting the ranges *after* their construction. The test suite is adapted to catch the corner case where we obtain a single range which is already shorter than the chosen time period – while passing also 'imperfect.time.ratio' in a manner that would cause the adaptation of the last range. Signed-off-by: Claus Hunsen Reviewed-by: Thomas Bock --- tests/test-misc.R | 6 +++--- util-misc.R | 35 +++++++++++++++++------------------ 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/tests/test-misc.R b/tests/test-misc.R index 2c79fc1a..a74c3f67 100644 --- a/tests/test-misc.R +++ b/tests/test-misc.R @@ -443,7 +443,7 @@ test_that("Construct ranges when difference between start and end is smaller tha ## consecutive ## 1) formatted - result.formatted = construct.consecutive.ranges(start, end, time.period = "1 week", raw = FALSE) + result.formatted = construct.consecutive.ranges(start, end, time.period = "1 week", imperfect.range.ratio = 0.2, raw = FALSE) expect_identical(result.formatted, expected.formatted, info = "Consecutive range (formatted).") ## 2) raw result.raw = construct.consecutive.ranges(start, end, time.period = "1 week", raw = TRUE) @@ -452,7 +452,7 @@ test_that("Construct ranges when difference between start and end is smaller tha ## cumulative ## 1) formatted - result.formatted = construct.cumulative.ranges(start, end, time.period = "1 week", raw = FALSE) + result.formatted = construct.cumulative.ranges(start, end, time.period = "1 week", imperfect.range.ratio = 0.2, raw = FALSE) expect_identical(result.formatted, expected.formatted, info = "Cumulative range (formatted).") ## 2) raw result.raw = construct.cumulative.ranges(start, end, time.period = "1 week", raw = TRUE) @@ -461,7 +461,7 @@ test_that("Construct ranges when difference between start and end is smaller tha ## overlapping ## 1) formatted - result.formatted = construct.overlapping.ranges(start, end, time.period = "1 week", + result.formatted = construct.overlapping.ranges(start, end, time.period = "1 week", imperfect.range.ratio = 0.2, overlap = "1 day", raw = FALSE) expect_identical(result.formatted, expected.formatted, info = "Overlapping range (formatted).") ## 2) raw diff --git a/util-misc.R b/util-misc.R index 1137b09e..4442870b 100644 --- a/util-misc.R +++ b/util-misc.R @@ -481,28 +481,10 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, imperf if (seq.end == seq.start) { ranges.approx = c(seq.start, end.date) } else { - - # check for imperfect range in the end - if (imperfect.range.ratio > 0.0 - && end.date - ranges.approx[[length(ranges.approx)]] < imperfect.range.ratio * time.period) { - - ranges.approx = ranges.approx[-length(ranges.approx)] - bins.number = bins.number - 1 - } ranges.approx = c(ranges.approx, end.date) } } - if (ranges.approx[[length(ranges.approx)]] > end.date) { - # check for imperfect range in the end - if (imperfect.range.ratio > 0.0 - && ranges.approx[[length(ranges.approx)]] - end.date < imperfect.range.ratio * time.period) { - - ranges.approx = ranges.approx[-(length(ranges.approx)-1)] - bins.number = bins.number - 1 - } - } - ## construct the raw ranges from the approximate ones ranges.raw = lapply(seq_len(bins.number), function(bin.index) { ## combine start and end dates @@ -518,6 +500,23 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, imperf return(c(bin.start, bin.end)) }) + # if wanted, check for imperfect range in the end: + if (imperfect.range.ratio > 0) { + ## 1) get the last range + last.range = ranges.raw[[bins.number]] + ## 2) get the last range's duration + last.range.duration = lubridate::as.duration(lubridate::interval(last.range[1], last.range[2])) + ## 3) check if the last range is too short + is.too.short = last.range.duration < imperfect.range.ratio * time.period + ## 4) combine the last range with the second-last one, if the last one is too short + if (bins.number > 1 && is.too.short) { + ## extend second-last range until end.date + ranges.raw[[bins.number - 1]][2] = end.date + ## remove last range + ranges.raw = ranges.raw[-bins.number] + } + } + ## construct actual range strings (without names) ranges = sapply(ranges.raw, construct.ranges, sliding.window = FALSE, raw = FALSE) ranges = unname(ranges) From 33871a7adbe6bd465d915a87e02bd193e9a457ac Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Mon, 23 Apr 2018 09:30:43 +0200 Subject: [PATCH 05/51] Update possible relations in showcase.R Signed-off-by: Thomas Bock --- showcase.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/showcase.R b/showcase.R index c3f62609..4dfe6abc 100644 --- a/showcase.R +++ b/showcase.R @@ -49,10 +49,10 @@ CF.DATA = "/path/to/codeface-data" # path to codeface data CF.SELECTION.PROCESS = "threemonth" # releases, threemonth(, testing) CASESTUDY = "busybox" -ARTIFACT = "feature" # function, feature, file, featureexpression +ARTIFACT = "feature" # function, feature, file, featureexpression (only relevant for cochange) -AUTHOR.RELATION = "mail" # mail, cochange -ARTIFACT.RELATION = "cochange" # cochange, callgraph +AUTHOR.RELATION = "mail" # mail, cochange, issue +ARTIFACT.RELATION = "cochange" # cochange, callgraph, mail, issue ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / From 2f1b4d9b0d6a629163a6dd3111b20930e15fcc13 Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Tue, 17 Apr 2018 09:47:50 +0200 Subject: [PATCH 06/51] Add implemention of multi-edge-networks for author and artifact networks Now, it is possible to construct networks which include more than one artifact and author relation. To provide this functionality, the allowed number of the 'author.relation' and the 'artifact.relation' is changed to Inf instead of 1. A further edge attribute 'relation' is added to describe the relation type. The functions 'get.artifact.network' and 'get.author.network' are changed to handle more than one relation. Therefore, the functions 'get.bipartite.network' and 'get.multi.network' are adjusted, too. The functions use the 'add.edges.for.bipartite.relations' function which can handle more than one relation now. To merge the networks of all relations, we add the functions 'merge.network.data' and 'merge.networks'. The function 'construct.network.from.list' is split into to functions 'construct.edge.list.from.key.value.list' and 'construct.network.from.edge.list'. Solves the second part of #98. Signed-off-by: Barbara Eckl Signed-off-by: Claus Hunsen --- tests/test-networks-artifact.R | 1 + tests/test-networks-author.R | 24 +- tests/test-networks-bipartite.R | 476 ++++++++++++++++++-------------- tests/test-networks-multi.R | 3 +- util-conf.R | 7 +- util-networks.R | 421 ++++++++++++++++++++-------- 6 files changed, 592 insertions(+), 340 deletions(-) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 3e16f47b..5ada97aa 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -57,6 +57,7 @@ test_that("Network construction of the undirected artifact-cochange network", { artifact.type = c("Feature", "Feature"), artifact = c("Base_Feature", "foo"), weight = c(1, 1), + relation = c("cochange", "cochange"), type = TYPE.EDGES.INTRA) ## build expected network diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 95e319a9..65d8431e 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -222,7 +222,8 @@ test_that("Network construction of the undirected author-cochange network", { artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature"), weight = c(1, 1, 1, 1, 1, 1, 1, 1), - type = TYPE.EDGES.INTRA + type = TYPE.EDGES.INTRA, + relation = c("cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange") ) ## build expected network @@ -262,7 +263,8 @@ test_that("Network construction of the directed author-cochange network", { artifact.type = c("Feature", "Feature", "Feature", "Feature"), artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), weight = c(1, 1, 1, 1), - type = TYPE.EDGES.INTRA + type = TYPE.EDGES.INTRA, + relation = c("cochange", "cochange", "cochange", "cochange") ) ## build expected network @@ -302,7 +304,8 @@ test_that("Network construction of the undirected simplified author-cochange net artifact.type = I(list(c("Feature", "Feature"), c("Feature", "Feature"), c("Feature", "Feature"), c("Feature", "Feature"))), artifact = I(list(c("A", "A"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"))), weight = c(2, 2, 2, 2), - type = TYPE.EDGES.INTRA + type = TYPE.EDGES.INTRA, + relation = c("cochange", "cochange", "cochange", "cochange") ) ## build expected network @@ -399,7 +402,17 @@ test_that("Network construction of the undirected author-issue network with all 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ), - type = TYPE.EDGES.INTRA) + type = TYPE.EDGES.INTRA, + relation = c("issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", + "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", + "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", + "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", + "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", + "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", + "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", + "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", + "issue", "issue", "issue", "issue") + ) network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) @@ -442,7 +455,8 @@ test_that("Network construction of the undirected author-issue network with just "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented" ), weight = c(1), - type = TYPE.EDGES.INTRA) + type = TYPE.EDGES.INTRA, + relation = c("issue")) network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/tests/test-networks-bipartite.R b/tests/test-networks-bipartite.R index 263303a5..85344a4c 100644 --- a/tests/test-networks-bipartite.R +++ b/tests/test-networks-bipartite.R @@ -46,30 +46,36 @@ test_that("Construction of the bipartite network for the feature artifact with a ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Karl", "Olaf", "Thomas") - author.net = create.empty.network(directed = FALSE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("A", "Base_Feature", "foo") - artifact.net = create.empty.network(directed = FALSE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "feature") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 5, 2, 6, 3, 5, 3, 6, 4, 6, 4, 7) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", - "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), - artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), - weight = c(1, 1, 1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Karl", "Olaf", "Thomas"), + type = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("A", "Base_Feature", "foo"), + type = TYPE.ARTIFACT, + artifact.type = "feature" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Karl", "Olaf", "Olaf", "Thomas", "Thomas"), + to = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c"), + artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), + artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), + type = TYPE.EDGES.INTER, + weight = 1, + relation = "cochange" + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -91,29 +97,35 @@ test_that("Construction of the bipartite network for the file artifact with auth ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf", "Thomas") - author.net = create.empty.network(directed = FALSE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("test.c", "test2.c") - artifact.net = create.empty.network(directed = FALSE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "file") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 4, 2, 4, 2, 5, 3, 5) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("File", "File", "File", "File"), - artifact = c("test.c", "test.c", "test2.c", "test2.c"), - weight = c(1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf", "Thomas"), + type = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("test.c", "test2.c"), + type = TYPE.ARTIFACT, + artifact.type = "file" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf", "Thomas"), + to = c("test.c", "test.c", "test2.c", "test2.c"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test.c", "test2.c", "test2.c"), + artifact.type = c("File", "File", "File", "File"), + artifact = c("test.c", "test.c", "test2.c", "test2.c"), + type = TYPE.EDGES.INTER, + weight = c(1, 1, 1, 1), + relation = c("cochange", "cochange", "cochange", "cochange") + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -135,29 +147,34 @@ test_that("Construction of the bipartite network for the function artifact with ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf", "Thomas") - author.net = create.empty.network(directed = FALSE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("File_Level") - artifact.net = create.empty.network(directed = FALSE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "function") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 4, 2, 4, 2, 4, 3, 4) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("Function", "Function", "Function", "Function"), - artifact = c("File_Level", "File_Level", "File_Level", "File_Level"), - weight = c(1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf", "Thomas"), + type = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("File_Level"), + type = TYPE.ARTIFACT, + artifact.type = "function" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf", "Thomas"), + to = c("File_Level", "File_Level", "File_Level", "File_Level"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test.c", "test2.c", "test2.c"), + artifact.type = c("Function", "Function", "Function", "Function"), + artifact = c("File_Level", "File_Level", "File_Level", "File_Level"), + type = TYPE.EDGES.INTER, + weight = c(1, 1, 1, 1), + relation = c("cochange", "cochange", "cochange", "cochange")) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -178,29 +195,34 @@ test_that("Construction of the bipartite network for the featureexpression artif ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf") - author.net = create.empty.network(directed = FALSE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("defined(A)", "Base_Feature") - artifact.net = create.empty.network(directed = FALSE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "featureexpression") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 3, 2, 3, 2, 4) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774"), - file = c("test.c", "test.c", "test2.c"), - artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), - artifact = c("defined(A)", "defined(A)", "Base_Feature"), - weight = c(1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf"), + type = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("defined(A)", "Base_Feature"), + type = TYPE.ARTIFACT, + artifact.type = "featureexpression" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf"), + to = c("defined(A)", "defined(A)", "Base_Feature"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41")), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774"), + file = c("test.c", "test.c", "test2.c"), + artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), + artifact = c("defined(A)", "defined(A)", "Base_Feature"), + type = TYPE.EDGES.INTER, + weight = c(1, 1, 1), + relation = c("cochange", "cochange", "cochange")) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -221,33 +243,41 @@ test_that("Construction of the bipartite network for the feature artifact with a ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Karl", "Max", "Olaf", "Thomas") - author.net = create.empty.network(directed = FALSE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("", "", "", "") - artifact.net = create.empty.network(directed = FALSE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "issue.id") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 6, 1, 7, 1, 6, 1, 6, 1, 6, 1, 6, 1, 8, 1, 8, 2, 9, 3, 8, 4, 7, 4, 6, 4, 6, 5, 7, 5, 7) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", - "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:53:02", - "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2013-05-05 23:28:57", - "2017-05-23 12:32:39", "2016-07-27 22:25:25", "2016-10-05 01:07:46", - "2016-12-07 15:37:21", "2016-07-14 02:03:14", "2016-07-15 08:37:57")), - issue.id = c("", "", "", "", "", - "", "", "", "", "", - "", "", "", "", ""), - event.name = c("commented", "commented", "commented", "commented", "commented", - "commented", "commented", "commented", "commented", "commented", - "commented", "commented", "commented", "commented", "commented"), - weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name =c("Björn", "Karl", "Max", "Olaf", "Thomas"), + type = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("", "", "", ""), + type = TYPE.ARTIFACT, + artifact.type = "issue" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Karl", "Max", "Olaf", "Olaf", "Olaf", + "Thomas", "Thomas"), + to = c("", "", "", "", "", + "", "", "", "", "", + "", "", "", "", ""), + date = get.date.from.string(c("2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", + "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:53:02", + "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2013-05-05 23:28:57", + "2017-05-23 12:32:39", "2016-07-27 22:25:25", "2016-10-05 01:07:46", + "2016-12-07 15:37:21", "2016-07-14 02:03:14", "2016-07-15 08:37:57")), + issue.id = c("", "", "", "", "", + "", "", "", "", "", + "", "", "", "", ""), + event.name = c("commented", "commented", "commented", "commented", "commented", + "commented", "commented", "commented", "commented", "commented", + "commented", "commented", "commented", "commented", "commented"), + type = TYPE.EDGES.INTER, + weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), + relation = "issue") + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -269,30 +299,35 @@ test_that("Construction of the directed bipartite network for the feature artifa ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Karl", "Olaf", "Thomas") - author.net = create.empty.network(directed = TRUE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("A", "Base_Feature", "foo") - artifact.net = create.empty.network(directed = TRUE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "feature") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 5, 2, 6, 3, 5, 3, 6, 4, 6, 4, 7) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", - "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), - artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), - weight = c(1, 1, 1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Karl", "Olaf", "Thomas"), + type = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("A", "Base_Feature", "foo"), + type = TYPE.ARTIFACT, + artifact.type = "feature" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Karl", "Olaf", "Olaf","Thomas", "Thomas"), + to = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c"), + artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), + artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), + type = TYPE.EDGES.INTER, + weight = c(1, 1, 1, 1, 1, 1), + relation = c("cochange", "cochange", "cochange", "cochange", "cochange", "cochange")) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -314,29 +349,34 @@ test_that("Construction of the directed bipartite network for the file artifact ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf", "Thomas") - author.net = create.empty.network(directed = TRUE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("test.c", "test2.c") - artifact.net = create.empty.network(directed = TRUE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "file") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 4, 2, 4, 2, 5, 3, 5) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("File", "File", "File", "File"), - artifact = c("test.c", "test.c", "test2.c", "test2.c"), - weight = c(1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf", "Thomas"), + type = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("test.c", "test2.c"), + type = TYPE.ARTIFACT, + artifact.type = "file" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf", "Thomas"), + to = c("test.c", "test.c", "test2.c", "test2.c"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test.c", "test2.c", "test2.c"), + artifact.type = c("File", "File", "File", "File"), + artifact = c("test.c", "test.c", "test2.c", "test2.c"), + type = TYPE.EDGES.INTER, + weight = c(1, 1, 1, 1), + relation = c("cochange", "cochange", "cochange", "cochange")) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -359,29 +399,34 @@ test_that("Construction of the directed bipartite network for the function artif ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf", "Thomas") - author.net = create.empty.network(directed = TRUE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("File_Level") - artifact.net = create.empty.network(directed = TRUE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "function") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 4, 2, 4, 2, 4, 3, 4) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("Function", "Function", "Function", "Function"), - artifact = c("File_Level", "File_Level", "File_Level", "File_Level"), - weight = c(1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf", "Thomas"), + type = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("File_Level"), + type = TYPE.ARTIFACT, + artifact.type = "function" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf", "Thomas"), + to = c("File_Level", "File_Level", "File_Level", "File_Level"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test.c", "test2.c", "test2.c"), + artifact.type = c("Function", "Function", "Function", "Function"), + artifact = c("File_Level", "File_Level", "File_Level", "File_Level"), + type = TYPE.EDGES.INTER, + weight = c(1, 1, 1, 1), + relation = c("cochange", "cochange", "cochange", "cochange")) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -403,29 +448,34 @@ test_that("Construction of the directed bipartite network for the featureexpress ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf") - author.net = create.empty.network(directed = TRUE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("defined(A)", "Base_Feature") - artifact.net = create.empty.network(directed = TRUE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "featureexpression") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 3, 2, 3, 2, 4) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774"), - file = c("test.c", "test.c", "test2.c"), - artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), - artifact = c("defined(A)", "defined(A)", "Base_Feature"), - weight = c(1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf"), + type = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("defined(A)", "Base_Feature"), + type = TYPE.ARTIFACT, + artifact.type = "featureexpression" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf"), + to = c("defined(A)", "defined(A)", "Base_Feature"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41")), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774"), + file = c("test.c", "test.c", "test2.c"), + artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), + artifact = c("defined(A)", "defined(A)", "Base_Feature"), + type = TYPE.EDGES.INTER, + weight = c(1, 1, 1), + relation = c("cochange", "cochange", "cochange")) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) diff --git a/tests/test-networks-multi.R b/tests/test-networks-multi.R index a9384302..0eea7fca 100644 --- a/tests/test-networks-multi.R +++ b/tests/test-networks-multi.R @@ -83,7 +83,8 @@ test_that("Construction of the multi network for the feature artifact with autho "Base_Feature", "Base_Feature", "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo"), weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), - type = c(rep(TYPE.EDGES.INTRA, 10), rep(TYPE.EDGES.INTER, 6))) + type = c(rep(TYPE.EDGES.INTRA, 10), rep(TYPE.EDGES.INTER, 6)), + relation =c(rep("cochange", 16))) network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/util-conf.R b/util-conf.R index 6275dabe..bd22c4f0 100644 --- a/util-conf.R +++ b/util-conf.R @@ -16,6 +16,7 @@ ## Copyright 2017 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017-2018 by Thomas Bock +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. @@ -89,7 +90,7 @@ Conf = R6::R6Class("Conf", #' - allowed, and #' - allowed.number. check.value = function(value, name) { - browser(expr = name == "revisions") + # browser(expr = name == "revisions") if (!exists(name, where = private[["attributes"]])) { result = c(existing = FALSE) } else { @@ -652,7 +653,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, default = "mail", type = "character", allowed = c("mail", "cochange", "issue"), - allowed.number = 1 + allowed.number = Inf ), author.directed = list( default = FALSE, @@ -677,7 +678,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, default = "cochange", type = "character", allowed = c("cochange", "callgraph", "mail", "issue"), - allowed.number = 1 + allowed.number = Inf ), artifact.directed = list( default = FALSE, diff --git a/util-networks.R b/util-networks.R index 2948a9f4..8d75a757 100644 --- a/util-networks.R +++ b/util-networks.R @@ -15,6 +15,7 @@ ## Copyright 2017 by Raphael Nömmer ## Copyright 2017 by Christian Hechtl ## Copyright 2017-2018 by Thomas Bock +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. @@ -39,6 +40,11 @@ TYPE.ARTIFACT = "Artifact" TYPE.EDGES.INTRA = "Unipartite" TYPE.EDGES.INTER = "Bipartite" +## intra edge types +RELATION.EDGES.INTRA.MAIL = "mail" +RELATION.EDGES.INTRA.ISSUE = "issue" +RELATION.EDGES.INTRA.COCHANGE = "cochange" + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Edge-attribute handling during simplification --------------------------- @@ -48,6 +54,7 @@ EDGE.ATTR.HANDLING = list( ## network-analytic data weight = "sum", type = "first", + relation = "concat", ## commit data changed.files = "sum", @@ -121,11 +128,20 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", } ## construct network based on artifact2author data - author.net = construct.network.from.list( + author.net.data = construct.edge.list.from.key.value.list( private$proj.data$get.artifact2author(), network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) + author.net = construct.network.from.edge.list( + author.net.data[["vertices"]], + author.net.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("author.directed") + ) + + igraph::E(author.net)$type = TYPE.EDGES.INTRA + igraph::E(author.net)$relation = RELATION.EDGES.INTRA.COCHANGE ## store network private$authors.network.cochange = author.net @@ -148,11 +164,19 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$authors.network.mail) } - author.relation = construct.network.from.list( + author.relation.data = construct.edge.list.from.key.value.list( private$proj.data$get.thread2author(), network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) + author.relation = construct.network.from.edge.list( + author.relation.data[["vertices"]], + author.relation.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("author.directed") + ) + igraph::E(author.relation)$type = TYPE.EDGES.INTRA + igraph::E(author.relation)$relation = RELATION.EDGES.INTRA.MAIL ## store network private$authors.network.mail = author.relation @@ -170,11 +194,20 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$authors.network.issue) } - author.relation = construct.network.from.list( + author.net.data = construct.edge.list.from.key.value.list( private$proj.data$get.issue2author(), network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) + author.relation = construct.network.from.edge.list( + author.net.data[["vertices"]], + author.net.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("author.directed") + ) + + igraph::E(author.relation)$type = TYPE.EDGES.INTRA + igraph::E(author.relation)$relation = RELATION.EDGES.INTRA.ISSUE private$authors.network.issue = author.relation logging::logdebug("get.author.network.issue: finished.") @@ -198,11 +231,19 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$artifacts.network.cochange) } - artifacts.net = construct.network.from.list( + artifacts.net.data = construct.edge.list.from.key.value.list( private$proj.data$get.commit2artifact(), network.conf = private$network.conf, directed = FALSE ) + artifacts.net = construct.network.from.edge.list( + artifacts.net.data[["vertices"]], + artifacts.net.data[["edges"]], + network.conf = private$network.conf, + directed = FALSE + ) + + igraph::E(artifacts.net)$relation = RELATION.EDGES.INTRA.COCHANGE ## store network private$artifacts.network.cochange = artifacts.net @@ -280,6 +321,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.net = igraph::set.vertex.attribute(artifacts.net, "id", value = names) ## store network + igraph::E(artifacts.net)$relation = RELATION.EDGES.INTRA.COCHANGE private$artifacts.network.callgraph = artifacts.net logging::logdebug("get.artifact.network.callgraph: finished.") @@ -311,6 +353,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts = names(private$proj.data$get.thread2author()) # thread IDs artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts, type = TYPE.ARTIFACT) + igraph::E(artifacts.net)$relation = RELATION.EDGES.INTRA.MAIL ## store network private$artifacts.network.mail = artifacts.net @@ -344,6 +387,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts = names(private$proj.data$get.issue2author()) # thread IDs artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts, type = TYPE.ARTIFACT) + igraph::E(artifacts.net)$relation = RELATION.EDGES.INTRA.ISSUE ## store network private$artifacts.network.issue = artifacts.net @@ -352,44 +396,52 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(artifacts.net) }, - ## * * bipartite relation ------------------------------------------ + ## * * bipartite relations ------------------------------------------ - #' Get the key-value data for the bipartite relation, - #' which is implied by the "artifact.relation" from the network configuration. + #' Get the key-value data for the bipartite relations, + #' which are implied by the "artifact.relation" from the network configuration. #' - #' @return the data for the bipartite relation, with the attribute - #' 'artifact.type' denoting the artifact type for the relation - get.bipartite.relation = function() { - logging::logdebug("get.bipartite.relation: starting.") - - relation = private$network.conf$get.variable("artifact.relation") - logging::logdebug("Using bipartite relation '%s'.", relation) - - switch( - relation, - cochange = { - bip.relation = private$proj.data$get.author2artifact() - artifact.type =private$proj.data$get.project.conf.entry("artifact") - }, - callgraph = { - bip.relation = private$proj.data$get.author2artifact() - artifact.type =private$proj.data$get.project.conf.entry("artifact") - }, - mail = { - bip.relation = private$proj.data$get.author2thread() - artifact.type = "thread" - }, - issue = { - bip.relation = private$proj.data$get.author2issue() - artifact.type = "issue.id" - } - ) - - ## set artifact.type attribute - attr(bip.relation, "artifact.type") = artifact.type + #' @return the list of data for the bipartite relations, with the attributes + #' 'artifact.type' denoting the artifact type for the relations + #' and 'relation' denoting the respective network-configuration entry + get.bipartite.relations = function() { + logging::logdebug("get.bipartite.relations: starting.") + + relations = private$network.conf$get.variable("artifact.relation") + logging::logdebug("Using bipartite relations '%s'.", relations) + + bip.relations = lapply(relations, function(relation) { + ## get the actual relation and the artifact type + switch( + relation, + cochange = { + bip.relation = private$proj.data$get.author2artifact() + artifact.type =private$proj.data$get.project.conf.entry("artifact") + }, + callgraph = { + bip.relation = private$proj.data$get.author2artifact() + artifact.type =private$proj.data$get.project.conf.entry("artifact") + }, + mail = { + bip.relation = private$proj.data$get.author2thread() + artifact.type = "thread" + }, + issue = { + bip.relation = private$proj.data$get.author2issue() + artifact.type = "issue" + } + ) + + ## set artifact.type and relation attributes + attr(bip.relation, "artifact.type") = artifact.type + attr(bip.relation, "relation") = relation + + return (bip.relation) + }) + names(bip.relations) = relations - logging::logdebug("get.bipartite.relation: finished.") - return(bip.relation) + logging::logdebug("get.bipartite.relations: finished.") + return(bip.relations) } ), @@ -497,14 +549,19 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::loginfo("Constructing author network.") ## construct network - relation = private$network.conf$get.value("author.relation") - net = switch( - relation, - cochange = private$get.author.network.cochange(), - mail = private$get.author.network.mail(), - issue = private$get.author.network.issue(), - stop(sprintf("The author relation '%s' does not exist.", relation)) - ) + relations = private$network.conf$get.value("author.relation") + networks = lapply(relations, function(relation) { + network = switch( + relation, + cochange = private$get.author.network.cochange(), + mail = private$get.author.network.mail(), + issue = private$get.author.network.issue(), + stop(sprintf("The author relation '%s' does not exist.", rel)) + ## TODO get edge and vertex data + ) + return(network) + }) + net = merge.networks(networks) ## add all missing authors to the network if wanted if (private$network.conf$get.value("author.all.authors")) { @@ -517,7 +574,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", if (private$network.conf$get.value("author.only.committers")) { ## authors-artifact relation authors.from.net = igraph::get.vertex.attribute(net, "name") - authors.from.artifacts = names(private$get.bipartite.relation()) + authors.from.artifacts = lapply(private$get.bipartite.relations(), function(bipartite.relation) { + return(names(bipartite.relation)) + }) + authors.from.artifacts = unlist(authors.from.artifacts) if (!is.null(authors.from.artifacts)) { net = igraph::delete.vertices(net, setdiff(authors.from.net, authors.from.artifacts)) } @@ -542,16 +602,19 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::loginfo("Constructing artifact network.") ## construct network - relation = private$network.conf$get.value("artifact.relation") - - net = switch( - relation, - cochange = private$get.artifact.network.cochange(), - callgraph = private$get.artifact.network.callgraph(), - mail = private$get.artifact.network.mail(), - issue = private$get.artifact.network.issue(), - stop(sprintf("The artifact relation '%s' does not exist.", relation)) - ) + relations = private$network.conf$get.value("artifact.relation") + networks = lapply(relations, function(relation){ + network = switch( + relation, + cochange = private$get.artifact.network.cochange(), + callgraph = private$get.artifact.network.callgraph(), + mail = private$get.artifact.network.mail(), + issue = private$get.artifact.network.issue(), + stop(sprintf("The artifact relation '%s' does not exist.", relation)) + ) + return(network) + }) + net = merge.networks(networks) ## set vertex and edge attributes for identifaction igraph::V(net)$type = TYPE.ARTIFACT @@ -570,44 +633,60 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' @return the bipartite network get.bipartite.network = function() { ## get data by the chosen relation - net.to.net = private$get.bipartite.relation() - artifact.type = attr(net.to.net, "artifact.type") + bipartite.relation.data = private$get.bipartite.relations() + directed = private$network.conf$get.value("author.directed") - ## extract vertices for author network - if (private$network.conf$get.value("author.all.authors")) { - authors = private$proj.data$get.authors()[[ "author.name" ]] - } else { - authors = names(net.to.net) - } + vertex.data = lapply(bipartite.relation.data, function(net.to.net) { + artifact.type = attr(net.to.net, "artifact.type") - ## construct networks from vertices: - directed = private$network.conf$get.value("author.directed") - ## 1) author network - authors.net = create.empty.network(directed = directed) + - igraph::vertices(authors, name = authors, type = TYPE.AUTHOR) - ## 2) artifact network - artifact.vertices = unique(unlist(lapply(net.to.net, function(df) { - return(df[["data.vertices"]]) - }))) - artifact.net = create.empty.network(directed = directed) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = artifact.type) - ## 3) combine both networks and bipartite relation - u = combine.networks(authors.net, artifact.net, net.to.net, network.conf = private$network.conf) + ## extract vertices for author network + if (private$network.conf$get.value("author.all.authors")) { + author.vertices = private$proj.data$get.authors()[[ "author.name" ]] + } else { + author.vertices = names(net.to.net) + } + + artifact.vertices = unique(unlist(lapply(net.to.net, function(df) { + return(df[["data.vertices"]]) + }))) + + vertices = data.frame( + name = c(author.vertices, artifact.vertices), + type = c(rep(TYPE.AUTHOR, length(author.vertices)), rep(TYPE.ARTIFACT, length(artifact.vertices))), + artifact.type = c(rep(NA, length(author.vertices)), rep(artifact.type, length(artifact.vertices))) + ) + return(vertices) + }) + + ## Merge network data + vertex.data = merge.network.data(vertex.data = vertex.data, edge.data = NULL)[["vertices"]] + vertex.network = igraph::graph.data.frame( + d = create.empty.edge.list(), + vertices = vertex.data, + directed = directed + ) + + ## Add edges to the vertex network + network = add.edges.for.bipartite.relation( + vertex.network, + bipartite.relations = bipartite.relation.data, + private$network.conf + ) ## remove vertices that are not committers if wanted if (private$network.conf$get.value("author.only.committers")) { committers = unique(private$proj.data$get.commits()[["author.name"]]) - authors = igraph::get.vertex.attribute(u, "name", igraph::V(u)[ type == TYPE.AUTHOR ]) + authors = igraph::get.vertex.attribute(network, "name", igraph::V(network)[ type == TYPE.AUTHOR ]) authors.to.remove = setdiff(authors, committers) - u = igraph::delete.vertices(u, authors.to.remove) + network = igraph::delete.vertices(network, authors.to.remove) } ## add range attribute for later analysis (if available) if ("RangeData" %in% class(private$proj.data)) { - attr(u, "range") = private$proj.data$get.range() + attr(network, "range") = private$proj.data$get.range() } - return(u) + return(network) }, #' Get all networks as list. @@ -618,7 +697,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::loginfo("Constructing all networks.") ## author-artifact relation - authors.to.artifacts = private$get.bipartite.relation() + authors.to.artifacts = private$get.bipartite.relations() ## bipartite network bipartite.net = self$get.bipartite.network() ## author relation @@ -653,15 +732,27 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## configured by 'author.only.committers', for example), thus, we need to remove any authors ## from the author--artifact relation that are superfluous authors.from.net = igraph::get.vertex.attribute(authors.net, "name") - authors.from.artifacts = names(authors.to.artifacts) - authors.to.artifacts = authors.to.artifacts[ intersect(authors.from.net, authors.from.artifacts) ] + + ## save relation and intersect the author vertices from the author network and the + ## bipartite networks + authors.to.artifacts = mapply(function(a2a.rel, relation.type) { + authors.from.artifacts = names(a2a.rel) + a2a = a2a.rel[ intersect(authors.from.net, authors.from.artifacts) ] + attr(a2a, "relation") = relation.type + return(a2a) + }, authors.to.artifacts, names(authors.to.artifacts), SIMPLIFY = FALSE) + ## unify vertices with artifacts from bipartite and artifact relation: ## when the source for artifacts is different for the bipartite relation and the artifact relation (e.g., ## "cochange" and "callgraph"), then the vertex names need to be unified so that all vertices are ## represented properly and, more important here, all bipartite relations can be added - artifacts.all = unique(plyr::rbind.fill(authors.to.artifacts)[[ "data.vertices" ]]) + artifacts = lapply(authors.to.artifacts, function(a2a.rel) { + return(unique(plyr::rbind.fill(a2a.rel)[[ "data.vertices" ]])) + }) + artifacts.all <- unlist(artifacts) + artifacts.from.net = igraph::get.vertex.attribute(artifacts.net, "name") - browser(expr = length(setdiff(artifacts.all, artifacts.from.net)) > 0) + # browser(expr = length(setdiff(artifacts.all, artifacts.from.net)) > 0) artifacts.net = artifacts.net + igraph::vertices(setdiff(artifacts.all, artifacts.from.net), type = TYPE.ARTIFACT) ## check directedness and adapt artifact network if needed @@ -696,10 +787,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Network and edge construction ------------------------------------------- -#' Construct a dependency network from the given list of lists. +#' Construct a edge list from the given list of lists. #' For example for a list of authors per thread, where all authors are connected if they are #' in the same thread (sublist). #' +#' Important: The input needs to be compatible with the function \code{get.key.to.value.from.df}. +#' #' If directed the order of things in the sublist is respected and the 'edge.attr's hold the #' vector of possible edge attributes in the given list. #' @@ -707,10 +800,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' @param network.conf the network configuration #' @param directed whether or not the network should be directed #' -#' @return the built network -construct.network.from.list = function(list, network.conf, directed = FALSE) { +#' @return the built edge list +construct.edge.list.from.key.value.list = function(list, network.conf, directed = FALSE) { logging::loginfo("Create edges.") - logging::logdebug("construct.network.from.list: starting.") + logging::logdebug("construct.edge.list.from.key.value.list: starting.") # initialize an edge list to fill and the set of nodes nodes.processed = c() @@ -833,10 +926,33 @@ construct.network.from.list = function(list, network.conf, directed = FALSE) { } + logging::logdebug("construct.edge.list.from.key.value.list: finished.") + + return(list( + vertices = nodes.processed, + edges = edge.list + )) +} + +#' Construct a network from the given lists of vertices and edges. +#' For example for a list of authors per thread, where all authors are connected if they are +#' in the same thread (sublist). +#' +#' If directed the order of things in the sublist is respected and the 'edge.attr's hold the +#' vector of possible edge attributes in the given list. +#' +#' @param vertices data frame with vertex data +#' @param edge.list list of edges +#' @param network.conf the network configuration +#' @param directed whether or not the network should be directed +#' +#' @return the built network +construct.network.from.edge.list = function(vertices, edge.list, network.conf, directed = TRUE) { + logging::logdebug("construct.network.from.edge.list: starting.") logging::loginfo("Construct network from edges.") ## get unique list of vertices to produce - nodes.processed = unique(nodes.processed) + nodes.processed = unique(vertices) # if we do not have nodes AND the edge.list is empty, return rightaway if (length(nodes.processed) == 0) { @@ -858,10 +974,11 @@ construct.network.from.list = function(list, network.conf, directed = FALSE) { net = igraph::set.edge.attribute(net, "weight", value = 1) # transform multiple edges to edge weights - if (network.conf$get.value("simplify")) + if (network.conf$get.value("simplify")) { net = simplify.network(net) + } - logging::logdebug("construct.network.from.list: finished.") + logging::logdebug("construct.network.from.edge.list: finished.") return(net) } @@ -898,41 +1015,101 @@ combine.networks = function(net1, net2, net1.to.net2, network.conf) { return(u) } +#' Merges a list vertex data frame and merges a list of edge +#' data frames +#' +#' @param vertex.data the list of vertex data frames +#' @param edge.data the list of edge data frames +#' +#' @return list containing one edge data frame and one vertex data frame +merge.network.data = function(vertex.data, edge.data) { + logging::logdebug("merge.network.data: starting.") + + vertices = plyr::rbind.fill(vertex.data) + + ## select unique vertices + vertices = unique.data.frame(vertices) + edges = plyr::rbind.fill(edge.data) + + logging::logdebug("merge.network.data: finished.") + return(list( + vertices = vertices, + edges = edges + )) +} + +#' Merges a list of network to one big network +#' +#' @param networks the list of networks +#' +#' @return the built one network +merge.networks = function(networks) { + logging::logdebug("merge.networks: starting.") + + ## list with all vertex data frames + vertex.data = lapply(networks, function(network) { + return(igraph::as_data_frame(network, what = "vertices")) + }) + + ## list of all edge data frames + edge.data = lapply(networks, function(network) { + return(igraph::as_data_frame(network, what = "edges")) + }) + + ## merge all edge and vertex data frames + new.network.data = merge.network.data(vertex.data, edge.data) + + ## build whole network form edge and vertex data frame + whole.network = igraph::graph.data.frame( + new.network.data[["edges"]], + vertices = new.network.data[["vertices"]], + directed = igraph::is.directed(networks[[1]]) + ) + + logging::logdebug("merge.networks: finished.") + + return(whole.network) +} + #' Add vertex relations to the given network. #' #' @param net the network -#' @param net1.to.net2 the vertex relations to add to the given network +#' @param bipartite.relations the list of the vertex relations to add to the given network for +#' all configured relations #' @param network.conf the network configuration #' #' @return the adjusted network -add.edges.for.bipartite.relation = function(net, net1.to.net2, network.conf) { +add.edges.for.bipartite.relation = function(net, bipartite.relations, network.conf) { - ## construct edges (i.e., a vertex sequence with c(source, target, source, target, ...)) - vertex.sequence.for.edges = parallel::mcmapply(function(d, a.df) { - a = a.df[["data.vertices"]] - new.edges = lapply(a, function(vert) { - igraph::V(net)[d, vert] # get two vertices from source network: c(author, artifact) - }) - return(new.edges) - }, names(net1.to.net2), net1.to.net2) + ## iterate about all bipartite.relations depending on the relation type + for (net1.to.net2 in bipartite.relations) { - ## get extra edge attributes - extra.edge.attributes.df = parallel::mclapply(net1.to.net2, function(a.df) { - cols.which = network.conf$get.value("edge.attributes") %in% colnames(a.df) - return(a.df[, network.conf$get.value("edge.attributes")[cols.which], drop = FALSE]) - }) - extra.edge.attributes.df = plyr::rbind.fill(extra.edge.attributes.df) - extra.edge.attributes.df["weight"] = 1 # add weight + ## construct edges (i.e., a vertex sequence with c(source, target, source, target, ...)) + vertex.sequence.for.edges = parallel::mcmapply(function(d, a.df) { + a = a.df[["data.vertices"]] + new.edges = lapply(a, function(vert) { + igraph::V(net)[d, vert] # get two vertices from source network: c(author, artifact) + }) + return(new.edges) + }, names(net1.to.net2), net1.to.net2) - extra.edge.attributes = as.list(extra.edge.attributes.df) + ## get extra edge attributes + extra.edge.attributes.df = parallel::mclapply(net1.to.net2, function(a.df) { + cols.which = network.conf$get.value("edge.attributes") %in% colnames(a.df) + return(a.df[, network.conf$get.value("edge.attributes")[cols.which], drop = FALSE]) + }) + extra.edge.attributes.df = plyr::rbind.fill(extra.edge.attributes.df) + extra.edge.attributes.df["type"] = TYPE.EDGES.INTER # add egde type + extra.edge.attributes.df["weight"] = 1 # add weight + extra.edge.attributes.df["relation"] = attr(net1.to.net2, "relation") # add relation type - ## set edge type - extra.edge.attributes = c(extra.edge.attributes, list(type = TYPE.EDGES.INTER)) + extra.edge.attributes = as.list(extra.edge.attributes.df) - ## add the vertex sequences as edges to the network - new.net = igraph::add_edges(net, unlist(vertex.sequence.for.edges), attr = extra.edge.attributes) + ## add the vertex sequences as edges to the network + net = igraph::add_edges(net, unlist(vertex.sequence.for.edges), attr = extra.edge.attributes) + } - return(new.net) + return(net) } #' Create an empty network that does not break the algorithms. @@ -952,6 +1129,14 @@ create.empty.network = function(directed = TRUE) { return(net) } +#' Create an empty data frame that does not break the algorithms and +#' represents an edge list. +#' +#' @return the new empty data frame as edge list +create.empty.edge.list = function() { + return(data.frame(from = character(0),to = character(0))) +} + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Network simplification -------------------------------------------------- From b55d3e84a5f9b122dacd0ee52784d930f22d1f4b Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Tue, 17 Apr 2018 11:13:45 +0200 Subject: [PATCH 07/51] Adapt plot functions to multi-edge networks Now, the colors of the edges depend on the edge attribute 'relation'. The colors of the edges are not any more saved in an edge attribute, because the edge attribute is never been used. Signed-off-by: Barbara Eckl --- util-plot.R | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/util-plot.R b/util-plot.R index c4c16c64..5c82c79a 100644 --- a/util-plot.R +++ b/util-plot.R @@ -12,6 +12,7 @@ ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## ## Copyright 2017-2018 by Claus Hunsen +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. @@ -41,18 +42,28 @@ names(PLOT.COLORS.BY.TYPE.VERTEX) = c(TYPE.AUTHOR, TYPE.ARTIFACT) PLOT.COLORS.BY.TYPE.EDGE = c("#999999", "#14CC3B") names(PLOT.COLORS.BY.TYPE.EDGE) = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTER) +PLOT.COLORS.BY.RELATION.EDGE = c("#142ACC", "#0BE5E0", "#960BE5") +names(PLOT.COLORS.BY.RELATION.EDGE) = c(RELATION.EDGES.INTRA.COCHANGE, RELATION.EDGES.INTRA.ISSUE, + RELATION.EDGES.INTRA.MAIL) + + ## colors for vertices and edges (grayscale) PLOT.COLORS.BY.TYPE.VERTEX.GRAY = c("gray40", "gray30") names(PLOT.COLORS.BY.TYPE.VERTEX.GRAY) = c(TYPE.AUTHOR, TYPE.ARTIFACT) PLOT.COLORS.BY.TYPE.EDGE.GRAY = c("gray60", "gray40") names(PLOT.COLORS.BY.TYPE.EDGE.GRAY) = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTER) +PLOT.COLORS.BY.RELATION.EDGE.GRAY = c("gray60", "gray62", "gray64") +names(PLOT.COLORS.BY.RELATION.EDGE.GRAY) = c(RELATION.EDGES.INTRA.COCHANGE, RELATION.EDGES.INTRA.ISSUE, + RELATION.EDGES.INTRA.MAIL) ## shapes of vertices and edges PLOT.SHAPE.VERTEX = c(16, 15) # (authors, artifacts) names(PLOT.SHAPE.VERTEX) = c(TYPE.AUTHOR, TYPE.ARTIFACT) PLOT.SHAPE.EDGE = c("dashed", "solid") # (unipartite, bipartite) names(PLOT.SHAPE.EDGE) = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTER) - +PLOT.NAMES.BY.RELATION.EDGE = c("mail", "issue", "cochange") +names(PLOT.NAMES.BY.RELATION.EDGE) = c(RELATION.EDGES.INTRA.MAIL, RELATION.EDGES.INTRA.ISSUE, + RELATION.EDGES.INTRA.COCHANGE) ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Plot functions ---------------------------------------------------------- @@ -126,12 +137,14 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) if (grayscale) { colors.vertex = PLOT.COLORS.BY.TYPE.VERTEX.GRAY colors.edge = PLOT.COLORS.BY.TYPE.EDGE.GRAY + colors.edge.relation = PLOT.COLORS.BY.RELATION.EDGE.GRAY colors.vertex.label = "white" } ## set colors for colored else { colors.vertex = PLOT.COLORS.BY.TYPE.VERTEX colors.edge = PLOT.COLORS.BY.TYPE.EDGE + colors.edge.relation = PLOT.COLORS.BY.RELATION.EDGE colors.vertex.label = "black" } @@ -147,7 +160,7 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) names(PLOT.VERTEX.TYPES) = c(TYPE.AUTHOR, TYPE.ARTIFACT) ## fix the type attributes (add new ones, also named) - network = plot.fix.type.attributes(network, colors.vertex = colors.vertex, colors.edge = colors.edge) + network = plot.fix.type.attributes(network) ## set network layout if (!("layout" %in% igraph::list.graph.attributes(network))) { @@ -161,7 +174,7 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) if (igraph::ecount(network) > 0) { p = p + ggraph::geom_edge_fan( - mapping = ggplot2::aes(colour = edge.type, linetype = edge.type), + mapping = ggplot2::aes(colour = relation, linetype = edge.type), end_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), start_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), arrow = if (igraph::is.directed(network)) { @@ -184,8 +197,8 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) ggplot2::scale_color_manual("Vertices", values = colors.vertex, labels = PLOT.VERTEX.TYPES) + ## scale edges (colors and styles) - ggraph::scale_edge_linetype_manual("Relations", values = PLOT.SHAPE.EDGE) + - ggraph::scale_edge_colour_manual("Relations", values = colors.edge) + + ggraph::scale_edge_linetype_manual("Relation Types", values = PLOT.SHAPE.EDGE) + + ggraph::scale_edge_colour_manual("Relations", values = colors.edge.relation) + ## theme ggplot2::theme_light() + @@ -236,19 +249,16 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) #' [default: PLOT.COLORS.BY.TYPE.VERTEX] #' @param colors.edge a vector of length 2, the entries named with the values of 'TYPE.EDGES.INTER' and 'TYPE.EDGES.INTRA' #' [default: PLOT.COLORS.BY.TYPE.EDGE] +#' @param colors.edge.relation a vector of length 3, the entries named with 'RELATION.EDGES.INTRA.MAIL', 'RELATION.EDGES.INTRA.ISSUE' +#' and 'RELATION.COLORS.BY.RELATION.EDGE' +#' [default: PLOT.COLORS.BY.RELATION.EDGE] #' #' @return the old network with the new and changed vertex and edge attributes -plot.fix.type.attributes = function(network, colors.vertex = PLOT.COLORS.BY.TYPE.VERTEX, colors.edge = PLOT.COLORS.BY.TYPE.EDGE) { +plot.fix.type.attributes = function(network) { ## copy type attribute to vertex.type and edge.type network = igraph::set.vertex.attribute(network, "vertex.type", value = igraph::get.vertex.attribute(network, "type")) network = igraph::set.edge.attribute(network, "edge.type", value = igraph::get.edge.attribute(network, "type")) - ## add edge and vertex colors - network = igraph::set.vertex.attribute(network, "vertex.color", - value = colors.vertex[ igraph::get.vertex.attribute(network, "vertex.type") ]) - network = igraph::set.edge.attribute(network, "edge.color", - value = colors.edge[ igraph::get.edge.attribute(network, "edge.type") ]) - ## adjust 'type' attribute for vertices for bipartite plotting (we need Booleans there) types = igraph::get.vertex.attribute(network, "type") network = igraph::remove.vertex.attribute(network, "type") From f190ca130a15a82e5eed836e9ffc53b8a34aac20 Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Tue, 17 Apr 2018 14:57:10 +0200 Subject: [PATCH 08/51] Use color palette 'viridis' for plotting for better flexibility To get rid of the global variables in the plot module, we now use the color palette 'viridis' to colorize vertices and edges. The setting of the 'relation' attribute is changed accordingly. Signed-off-by: Claus Hunsen Signed-off-by: Barbara Eckl --- util-networks.R | 21 +++++------ util-plot.R | 94 ++++++++++++++----------------------------------- 2 files changed, 35 insertions(+), 80 deletions(-) diff --git a/util-networks.R b/util-networks.R index 8d75a757..6b94628a 100644 --- a/util-networks.R +++ b/util-networks.R @@ -40,11 +40,6 @@ TYPE.ARTIFACT = "Artifact" TYPE.EDGES.INTRA = "Unipartite" TYPE.EDGES.INTER = "Bipartite" -## intra edge types -RELATION.EDGES.INTRA.MAIL = "mail" -RELATION.EDGES.INTRA.ISSUE = "issue" -RELATION.EDGES.INTRA.COCHANGE = "cochange" - ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Edge-attribute handling during simplification --------------------------- @@ -141,7 +136,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ) igraph::E(author.net)$type = TYPE.EDGES.INTRA - igraph::E(author.net)$relation = RELATION.EDGES.INTRA.COCHANGE ## store network private$authors.network.cochange = author.net @@ -176,7 +170,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", directed = private$network.conf$get.value("author.directed") ) igraph::E(author.relation)$type = TYPE.EDGES.INTRA - igraph::E(author.relation)$relation = RELATION.EDGES.INTRA.MAIL ## store network private$authors.network.mail = author.relation @@ -207,7 +200,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ) igraph::E(author.relation)$type = TYPE.EDGES.INTRA - igraph::E(author.relation)$relation = RELATION.EDGES.INTRA.ISSUE private$authors.network.issue = author.relation logging::logdebug("get.author.network.issue: finished.") @@ -243,8 +235,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", directed = FALSE ) - igraph::E(artifacts.net)$relation = RELATION.EDGES.INTRA.COCHANGE - ## store network private$artifacts.network.cochange = artifacts.net logging::logdebug("get.artifact.network.cochange: finished.") @@ -321,7 +311,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.net = igraph::set.vertex.attribute(artifacts.net, "id", value = names) ## store network - igraph::E(artifacts.net)$relation = RELATION.EDGES.INTRA.COCHANGE private$artifacts.network.callgraph = artifacts.net logging::logdebug("get.artifact.network.callgraph: finished.") @@ -353,7 +342,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts = names(private$proj.data$get.thread2author()) # thread IDs artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts, type = TYPE.ARTIFACT) - igraph::E(artifacts.net)$relation = RELATION.EDGES.INTRA.MAIL ## store network private$artifacts.network.mail = artifacts.net @@ -387,7 +375,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts = names(private$proj.data$get.issue2author()) # thread IDs artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts, type = TYPE.ARTIFACT) - igraph::E(artifacts.net)$relation = RELATION.EDGES.INTRA.ISSUE ## store network private$artifacts.network.issue = artifacts.net @@ -559,6 +546,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", stop(sprintf("The author relation '%s' does not exist.", rel)) ## TODO get edge and vertex data ) + igraph::E(network)$relation = relation return(network) }) net = merge.networks(networks) @@ -612,6 +600,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", issue = private$get.artifact.network.issue(), stop(sprintf("The artifact relation '%s' does not exist.", relation)) ) + igraph::E(network)$relation = relation return(network) }) net = merge.networks(networks) @@ -1143,6 +1132,9 @@ create.empty.edge.list = function() { #' Simplify a given network. #' +#' Important notice: This function only correctly works for networks with only one artifact and one +#' author relation. +#' #' @param network the given network #' #' @return the simplified network @@ -1159,6 +1151,9 @@ simplify.network = function(network) { #' Simplify a list of networks. #' +#' Important notice: This function only correctly works for networks with only one artifact and one +#' author relation. +#' #' @param networks the list of networks #' #' @return the simplified networks diff --git a/util-plot.R b/util-plot.R index 5c82c79a..66105f8c 100644 --- a/util-plot.R +++ b/util-plot.R @@ -36,34 +36,9 @@ PLOT.VERTEX.TYPE.ARTIFACT = TYPE.ARTIFACT # "Artifact" PLOT.VERTEX.SIZE = 10 PLOT.VERTEX.SIZE.LEGEND = PLOT.VERTEX.SIZE / 2 -## colors for vertices and edges (colored) -PLOT.COLORS.BY.TYPE.VERTEX = c("#00AEFF", "#FF8B00") -names(PLOT.COLORS.BY.TYPE.VERTEX) = c(TYPE.AUTHOR, TYPE.ARTIFACT) -PLOT.COLORS.BY.TYPE.EDGE = c("#999999", "#14CC3B") -names(PLOT.COLORS.BY.TYPE.EDGE) = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTER) +## vertex-label color +PLOT.VERTEX.LABEL.COLOR = "gray55" -PLOT.COLORS.BY.RELATION.EDGE = c("#142ACC", "#0BE5E0", "#960BE5") -names(PLOT.COLORS.BY.RELATION.EDGE) = c(RELATION.EDGES.INTRA.COCHANGE, RELATION.EDGES.INTRA.ISSUE, - RELATION.EDGES.INTRA.MAIL) - - -## colors for vertices and edges (grayscale) -PLOT.COLORS.BY.TYPE.VERTEX.GRAY = c("gray40", "gray30") -names(PLOT.COLORS.BY.TYPE.VERTEX.GRAY) = c(TYPE.AUTHOR, TYPE.ARTIFACT) -PLOT.COLORS.BY.TYPE.EDGE.GRAY = c("gray60", "gray40") -names(PLOT.COLORS.BY.TYPE.EDGE.GRAY) = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTER) -PLOT.COLORS.BY.RELATION.EDGE.GRAY = c("gray60", "gray62", "gray64") -names(PLOT.COLORS.BY.RELATION.EDGE.GRAY) = c(RELATION.EDGES.INTRA.COCHANGE, RELATION.EDGES.INTRA.ISSUE, - RELATION.EDGES.INTRA.MAIL) - -## shapes of vertices and edges -PLOT.SHAPE.VERTEX = c(16, 15) # (authors, artifacts) -names(PLOT.SHAPE.VERTEX) = c(TYPE.AUTHOR, TYPE.ARTIFACT) -PLOT.SHAPE.EDGE = c("dashed", "solid") # (unipartite, bipartite) -names(PLOT.SHAPE.EDGE) = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTER) -PLOT.NAMES.BY.RELATION.EDGE = c("mail", "issue", "cochange") -names(PLOT.NAMES.BY.RELATION.EDGE) = c(RELATION.EDGES.INTRA.MAIL, RELATION.EDGES.INTRA.ISSUE, - RELATION.EDGES.INTRA.COCHANGE) ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Plot functions ---------------------------------------------------------- @@ -81,14 +56,12 @@ names(PLOT.NAMES.BY.RELATION.EDGE) = c(RELATION.EDGES.INTRA.MAIL, RELATION.EDGES #' #' @param network the network to plot and print #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] -#' @param grayscale logical indicating whether the plot is to be in grayscale, by default, it is colored -#' [default: FALSE] #' #' @return the network (invisibly) #' #' @aliases plot.print.network -plot.network = function(network, labels = TRUE, grayscale = FALSE) { - plot.print.network(network, labels = labels, grayscale = grayscale) +plot.network = function(network, labels = TRUE) { + plot.print.network(network, labels = labels) } #' Construct a ggplot2/ggraph plot object for the given network and print it directly. @@ -104,14 +77,12 @@ plot.network = function(network, labels = TRUE, grayscale = FALSE) { #' #' @param network the network to plot and print #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] -#' @param grayscale logical indicating whether the plot is to be in grayscale, by default, it is colored -#' [default: FALSE] #' #' @return the network (invisibly) #' #' @aliases plot.network -plot.print.network = function(network, labels = TRUE, grayscale = FALSE) { - p = plot.get.plot.for.network(network, labels = labels, grayscale = grayscale) +plot.print.network = function(network, labels = TRUE) { + p = plot.get.plot.for.network(network, labels = labels) print(p) } @@ -128,26 +99,9 @@ plot.print.network = function(network, labels = TRUE, grayscale = FALSE) { #' #' @param network the network to plot #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] -#' @param grayscale logical indicating whether the plot is to be in grayscale, by default, it is colored -#' [default: FALSE] #' #' @return a ggplot2/ggraph plot object -plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) { - ## set colors for grayscale - if (grayscale) { - colors.vertex = PLOT.COLORS.BY.TYPE.VERTEX.GRAY - colors.edge = PLOT.COLORS.BY.TYPE.EDGE.GRAY - colors.edge.relation = PLOT.COLORS.BY.RELATION.EDGE.GRAY - colors.vertex.label = "white" - } - ## set colors for colored - else { - colors.vertex = PLOT.COLORS.BY.TYPE.VERTEX - colors.edge = PLOT.COLORS.BY.TYPE.EDGE - colors.edge.relation = PLOT.COLORS.BY.RELATION.EDGE - colors.vertex.label = "black" - } - +plot.get.plot.for.network = function(network, labels = TRUE) { ## check if network is empty if (igraph::vcount(network) == 0) { network = create.empty.network(directed = igraph::is.directed(network)) + @@ -175,6 +129,7 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) p = p + ggraph::geom_edge_fan( mapping = ggplot2::aes(colour = relation, linetype = edge.type), + width = 0.5, end_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), start_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), arrow = if (igraph::is.directed(network)) { @@ -190,15 +145,29 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) ## plot vertices ggraph::geom_node_point(ggplot2::aes(color = vertex.type, shape = vertex.type), size = PLOT.VERTEX.SIZE) + - ggraph::geom_node_text(ggplot2::aes(label = if (labels) name else c("")), size = 3.5, color = colors.vertex.label) + + ggraph::geom_node_text(ggplot2::aes(label = if (labels) name else c("")), size = 3.5, color = PLOT.VERTEX.LABEL.COLOR) + + ## scale vertices (colors and styles) - ggplot2::scale_shape_manual("Vertices", values = PLOT.SHAPE.VERTEX, labels = PLOT.VERTEX.TYPES) + - ggplot2::scale_color_manual("Vertices", values = colors.vertex, labels = PLOT.VERTEX.TYPES) + + ggplot2::scale_shape_discrete(name = "Vertices", solid = TRUE) + + viridis::scale_color_viridis(name = "Vertices", option = "plasma", discrete = TRUE, + end = 0.8, begin = 0.40) + ## scale edges (colors and styles) - ggraph::scale_edge_linetype_manual("Relation Types", values = PLOT.SHAPE.EDGE) + - ggraph::scale_edge_colour_manual("Relations", values = colors.edge.relation) + + ggraph::scale_edge_linetype(name = "Relations") + + ggplot2::discrete_scale(name = "Relations", "edge_colour", "viridis", + viridis::viridis_pal(option = "viridis", end = 0.8, begin = 0.25)) + + ## BROKEN RIGHT NOW due to bug in scale_edge_colour_viridis(): + # ggraph::scale_edge_colour_viridis(name = "Relations", option = "magma", discrete = TRUE, + # end = 0.85, begin = 0, direction = 1) + + + # ## scale vertices (colors and styles) + # ggplot2::scale_shape_manual("Vertices", values = PLOT.SHAPE.VERTEX, labels = PLOT.VERTEX.TYPES) + + # ggplot2::scale_color_manual("Vertices", values = colors.vertex, labels = PLOT.VERTEX.TYPES) + + # + # ## scale edges (colors and styles) + # ggraph::scale_edge_linetype_manual("Relation Types", values = PLOT.SHAPE.EDGE) + + # ggraph::scale_edge_colour_manual("Relations", values = colors.edge.relation) + ## theme ggplot2::theme_light() + @@ -239,19 +208,10 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) #' - TYPE.ARTIFACT = TRUE. #' #' Furthermore, the following attributes are added to either vertices or edges: -#' - vertex.color = a color code (hex or else) for coloring the vertices (see 'colors.vertex' parameter), -#' - edge.color = a color code (hex or else) for coloring the edges (see 'colors.edge' parameter), #' - vertex.type = a copy of the old vertex attribute 'type', and #' - edge.type = a copy of the old edge attribute 'type'. #' #' @param network the igraph object to augment -#' @param colors.vertex a vector of length 2, the entries named with the values of 'TYPE.AUTHOR' and 'TYPE.ARTIFACT' -#' [default: PLOT.COLORS.BY.TYPE.VERTEX] -#' @param colors.edge a vector of length 2, the entries named with the values of 'TYPE.EDGES.INTER' and 'TYPE.EDGES.INTRA' -#' [default: PLOT.COLORS.BY.TYPE.EDGE] -#' @param colors.edge.relation a vector of length 3, the entries named with 'RELATION.EDGES.INTRA.MAIL', 'RELATION.EDGES.INTRA.ISSUE' -#' and 'RELATION.COLORS.BY.RELATION.EDGE' -#' [default: PLOT.COLORS.BY.RELATION.EDGE] #' #' @return the old network with the new and changed vertex and edge attributes plot.fix.type.attributes = function(network) { From 7c628fb93eb21f280c7d9da66680f817e107fa24 Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Tue, 17 Apr 2018 18:28:21 +0200 Subject: [PATCH 09/51] Introduce the vertex attribute 'kind' Now, the different kindes of artifacts (Mail, Issue, File, Function, Feature, Featureexpression, Author) can be plotted with different colors. For this functionality, a new vertex attribute 'kind' is necessary, which includes the information of the attribute 'artifact.type' or 'Author' (for author vertices). The new vertex attribute is respected as vertex color during plotting. Signed-off-by: Barbara Eckl Signed-off-by: Claus Hunsen --- util-networks.R | 32 +++++++++++++++++++++++++------- util-plot.R | 6 +++--- util-read.R | 6 ++++++ 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/util-networks.R b/util-networks.R index 6b94628a..d38dcff6 100644 --- a/util-networks.R +++ b/util-networks.R @@ -403,11 +403,11 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", relation, cochange = { bip.relation = private$proj.data$get.author2artifact() - artifact.type =private$proj.data$get.project.conf.entry("artifact") + artifact.type = private$proj.data$get.project.conf.entry("artifact") }, callgraph = { bip.relation = private$proj.data$get.author2artifact() - artifact.type =private$proj.data$get.project.conf.entry("artifact") + artifact.type = private$proj.data$get.project.conf.entry("artifact") }, mail = { bip.relation = private$proj.data$get.author2thread() @@ -547,6 +547,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## TODO get edge and vertex data ) igraph::E(network)$relation = relation + + # the kind is in this case the same as the type attribute + igraph::V(network)$kind = TYPE.AUTHOR return(network) }) net = merge.networks(networks) @@ -591,7 +594,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## construct network relations = private$network.conf$get.value("artifact.relation") - networks = lapply(relations, function(relation){ + networks = lapply(relations, function(relation) { network = switch( relation, cochange = private$get.artifact.network.cochange(), @@ -601,6 +604,13 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", stop(sprintf("The artifact relation '%s' does not exist.", relation)) ) igraph::E(network)$relation = relation + + ## for cochange, we use the exact artifact type as vertex attribute 'kind' + if (relation == "cochange") { + igraph::V(network)$kind = private$proj.data$get.project.conf.entry("artifact") + } else { + igraph::V(network)$kind = relation + } return(network) }) net = merge.networks(networks) @@ -642,7 +652,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", vertices = data.frame( name = c(author.vertices, artifact.vertices), type = c(rep(TYPE.AUTHOR, length(author.vertices)), rep(TYPE.ARTIFACT, length(artifact.vertices))), - artifact.type = c(rep(NA, length(author.vertices)), rep(artifact.type, length(artifact.vertices))) + artifact.type = c(rep(NA, length(author.vertices)), rep(artifact.type, length(artifact.vertices))), + kind = c(rep(TYPE.AUTHOR, length(author.vertices)), rep(artifact.type , length(artifact.vertices))) ) return(vertices) }) @@ -714,6 +725,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", networks = self$get.networks() authors.to.artifacts = networks[["authors.to.artifacts"]] authors.net = networks[["authors.net"]] + igraph::V(authors.net)$kind = TYPE.AUTHOR artifacts.net = networks[["artifacts.net"]] ## remove authors from author-artifact relation, if needed: @@ -736,13 +748,19 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## "cochange" and "callgraph"), then the vertex names need to be unified so that all vertices are ## represented properly and, more important here, all bipartite relations can be added artifacts = lapply(authors.to.artifacts, function(a2a.rel) { - return(unique(plyr::rbind.fill(a2a.rel)[[ "data.vertices" ]])) + artifact.list = plyr::rbind.fill(a2a.rel)[ c("data.vertices", "artifact.type") ] + artifact.list = unique(artifact.list) + return(artifact.list) }) - artifacts.all <- unlist(artifacts) + artifacts.all = plyr::rbind.fill(artifacts) artifacts.from.net = igraph::get.vertex.attribute(artifacts.net, "name") # browser(expr = length(setdiff(artifacts.all, artifacts.from.net)) > 0) - artifacts.net = artifacts.net + igraph::vertices(setdiff(artifacts.all, artifacts.from.net), type = TYPE.ARTIFACT) + artifacts.to.add = setdiff(artifacts.all[["data.vertices"]], artifacts.from.net) + artifacts.to.add.kind = artifacts.all[ artifacts.all[["data.vertices"]] %in% artifacts.to.add, "artifact.type" ] + artifacts.net = artifacts.net + igraph::vertices(artifacts.to.add, type = TYPE.ARTIFACT, + artifact.type = artifacts.to.add.kind, + kind = artifacts.to.add.kind) ## check directedness and adapt artifact network if needed if (igraph::is.directed(authors.net) && !igraph::is.directed(artifacts.net)) { diff --git a/util-plot.R b/util-plot.R index 66105f8c..b9b77432 100644 --- a/util-plot.R +++ b/util-plot.R @@ -37,7 +37,7 @@ PLOT.VERTEX.SIZE = 10 PLOT.VERTEX.SIZE.LEGEND = PLOT.VERTEX.SIZE / 2 ## vertex-label color -PLOT.VERTEX.LABEL.COLOR = "gray55" +PLOT.VERTEX.LABEL.COLOR = "gray60" ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -144,14 +144,14 @@ plot.get.plot.for.network = function(network, labels = TRUE) { p = p + ## plot vertices - ggraph::geom_node_point(ggplot2::aes(color = vertex.type, shape = vertex.type), size = PLOT.VERTEX.SIZE) + + ggraph::geom_node_point(ggplot2::aes(color = kind, shape = vertex.type), size = PLOT.VERTEX.SIZE) + ggraph::geom_node_text(ggplot2::aes(label = if (labels) name else c("")), size = 3.5, color = PLOT.VERTEX.LABEL.COLOR) + ## scale vertices (colors and styles) ggplot2::scale_shape_discrete(name = "Vertices", solid = TRUE) + viridis::scale_color_viridis(name = "Vertices", option = "plasma", discrete = TRUE, - end = 0.8, begin = 0.40) + + end = 0.8, begin = 0.05) + ## scale edges (colors and styles) ggraph::scale_edge_linetype(name = "Relations") + diff --git a/util-read.R b/util-read.R index 4e62df73..29082793 100644 --- a/util-read.R +++ b/util-read.R @@ -216,6 +216,9 @@ read.mails = function(data.path) { ## set pattern for thread ID for better recognition mail.data[["thread"]] = sprintf("", mail.data[["thread"]]) + ## set proper artifact type for proper vertex attribute 'artifact.type' + mail.data["artifact.type"] = "Mail" + ## remove mails without a proper date as they mess up directed mail-based networks ## this basically only applies for project-level analysis empty.dates = which(mail.data[["date"]] == "" | is.na(mail.data[["date"]])) @@ -381,6 +384,9 @@ read.issues = function(data.path) { ## set pattern for issue ID for better recognition issue.data[["issue.id"]] = sprintf("", issue.data[["issue.id"]]) + ## set proper artifact type for proper vertex attribute 'artifact.type' + issue.data["artifact.type"] = "Issue" + ## convert 'is.pull.request' column to logicals issue.data[["is.pull.request"]] = as.logical(issue.data[["is.pull.request"]]) From 021ac8b88e9a181364a51e89807df55cb741ed44 Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Wed, 18 Apr 2018 09:47:45 +0200 Subject: [PATCH 10/51] Change the 'simplify.network' function to handle multi-edge networks We would respect the edge attribute 'relation' while simplifying a network, therefore, the new algorithm for simplification is now the following: First, the networks are split depending on the relation type. Then, they are simplified and, in a last step, the networks are merged to one simplified network which have parallel edges if these have different relation types. Also, this works on networks without the 'relation' attribute! Signed-off-by: Barbara Eckl Signed-off-by: Claus Hunsen --- util-networks.R | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/util-networks.R b/util-networks.R index d38dcff6..ce37b21f 100644 --- a/util-networks.R +++ b/util-networks.R @@ -49,7 +49,7 @@ EDGE.ATTR.HANDLING = list( ## network-analytic data weight = "sum", type = "first", - relation = "concat", + relation = "first", ## commit data changed.files = "sum", @@ -128,6 +128,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) + author.net = construct.network.from.edge.list( author.net.data[["vertices"]], author.net.data[["edges"]], @@ -1150,9 +1151,6 @@ create.empty.edge.list = function() { #' Simplify a given network. #' -#' Important notice: This function only correctly works for networks with only one artifact and one -#' author relation. -#' #' @param network the given network #' #' @return the simplified network @@ -1160,8 +1158,29 @@ simplify.network = function(network) { logging::logdebug("simplify.network: starting.") logging::loginfo("Simplifying network.") - ## simplify networks (contract edges and remove loops) - network = igraph::simplify(network, edge.attr.comb = EDGE.ATTR.HANDLING, remove.loops = TRUE) + ## data frame of the network + edge.data = igraph::as_data_frame(network, what = "edges") + vertex.data = igraph::as_data_frame(network, what = "vertices") + + ## select edges of one relation, build the network and simplify this network + if ("relation" %in% colnames(edge.data)) { + networks = lapply(unique(edge.data[["relation"]]), + function(relation) { + network.data = edge.data[edge.data[["relation"]] == relation, ] + net = igraph::graph_from_data_frame(d=network.data, + vertices = vertex.data, + directed = igraph::is.directed(network)) + + ## simplify networks (contract edges and remove loops) + net = igraph::simplify(net, edge.attr.comb = EDGE.ATTR.HANDLING, remove.loops = TRUE) + return(net) + }) + + ## merge the simplified networks + network = merge.networks(networks) + } else { + network = igraph::simplify(network, edge.attr.comb = EDGE.ATTR.HANDLING, remove.loops = TRUE) + } logging::logdebug("simplify.network: finished.") return(network) From 784c417c50eb1de5d0143908a390ead6ba22dbbf Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Wed, 18 Apr 2018 15:08:04 +0200 Subject: [PATCH 11/51] Include 'relation' and 'kind' in the expected data of the test suite Signed-off-by: Barbara Eckl --- tests/test-data-cut.R | 3 +- tests/test-networks-artifact.R | 6 ++-- tests/test-networks-author.R | 47 ++++++++++++------------ tests/test-networks-bipartite.R | 64 +++++++++++++++++++++++---------- tests/test-networks-cut.R | 3 +- tests/test-networks-multi.R | 4 ++- tests/test-read.R | 40 ++++++++++++++++----- 7 files changed, 111 insertions(+), 56 deletions(-) diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R index b65d7d00..cfcfd244 100644 --- a/tests/test-data-cut.R +++ b/tests/test-data-cut.R @@ -68,7 +68,8 @@ test_that("Cut commit and mail data to same date range.", { date = get.date.from.string("2016-07-12 16:04:40"), date.offset = as.integer(c(100)), subject = c("Re: Fw: busybox 2 tab"), - thread = sprintf("", c(9))) + thread = sprintf("", c(9)), + artifact.type = "Mail") commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits() rownames(commit.data) = 1:nrow(commit.data) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 5ada97aa..e2fdfe4f 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -47,7 +47,10 @@ test_that("Network construction of the undirected artifact-cochange network", { network.built = network.builder$get.artifact.network() ## vertex attributes - vertices = c("Base_Feature", "foo", "A") + vertices = data.frame(name=c("Base_Feature", "foo", "A"), + id=c("Base_Feature", "foo", "A"), + kind="feature", + type = TYPE.ARTIFACT) data = data.frame(from = c("Base_Feature", "Base_Feature"), to = c("foo", "foo"), @@ -63,7 +66,6 @@ test_that("Network construction of the undirected artifact-cochange network", { ## build expected network network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = vertices) network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) - igraph::V(network.expected)$type = TYPE.ARTIFACT expect_true(igraph::identical_graphs(network.built, network.expected)) }) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 65d8431e..df9b8c5e 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -208,7 +208,10 @@ test_that("Network construction of the undirected author-cochange network", { network.built = network.builder$get.author.network() ## vertex attributes - authors = c("Björn", "Olaf", "Karl", "Thomas") + authors = data.frame(name= c("Björn", "Olaf", "Karl", "Thomas"), + id = c("Björn", "Olaf", "Karl", "Thomas"), + kind= TYPE.AUTHOR, + type = TYPE.AUTHOR) ## edge attributes data = data.frame(comb.1. = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl"), @@ -229,7 +232,6 @@ test_that("Network construction of the undirected author-cochange network", { ## build expected network network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = authors) network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) - igraph::V(network.expected)$type = TYPE.AUTHOR expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -250,7 +252,10 @@ test_that("Network construction of the directed author-cochange network", { network.built = network.builder$get.author.network() ## vertex attributes - authors = c("Björn", "Olaf", "Karl", "Thomas") + authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas"), + id = c("Björn", "Olaf", "Karl", "Thomas"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR) ## edge attributes data = data.frame(from = c("Olaf", "Karl", "Thomas", "Thomas"), @@ -270,7 +275,7 @@ test_that("Network construction of the directed author-cochange network", { ## build expected network network.expected = igraph::graph.data.frame(data, directed = TRUE, vertices = authors) network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) - igraph::V(network.expected)$type = TYPE.AUTHOR + expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -291,7 +296,10 @@ test_that("Network construction of the undirected simplified author-cochange net network.built = network.builder$get.author.network() ## vertex attributes - authors = c("Björn", "Olaf", "Karl", "Thomas") + authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas"), + id = c("Björn", "Olaf", "Karl", "Thomas"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR) ## edge attributes data = data.frame(from = c("Björn", "Olaf", "Olaf", "Karl"), @@ -311,7 +319,6 @@ test_that("Network construction of the undirected simplified author-cochange net ## build expected network network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = authors) network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) - igraph::V(network.expected)$type = TYPE.AUTHOR expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -335,6 +342,7 @@ test_that("Network construction of the undirected author-issue network with all vertices = data.frame(name = c("Karl", "Olaf", "Thomas", "udo", "Björn", "Max"), id = c("Karl", "Olaf", "Thomas", "udo", "Björn", "Max"), + kind = TYPE.AUTHOR, type = TYPE.AUTHOR) edges = data.frame(from = c("Karl", "Karl", "Karl", "Karl", "Karl", "Karl", "Karl", "Karl", "Karl", "Karl", "Karl", @@ -372,6 +380,7 @@ test_that("Network construction of the undirected author-issue network with all "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:37:02", "2016-12-07 15:37:02", "2016-12-07 15:37:21", "2016-12-07 15:53:02", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:21", "2017-05-23 12:32:21", "2017-05-23 12:32:39" )), + artifact.type = "Issue", issue.id = c( "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", @@ -397,21 +406,9 @@ test_that("Network construction of the undirected author-issue network with all "closed", "commented", "created", "renamed", "commented", "commented", "commented", "commented", "commented", "merged", "closed", "commented", "commented", "created", "commented", "commented", "merged", "closed", "commented" ), - weight = c( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1 ), + weight = 1, type = TYPE.EDGES.INTRA, - relation = c("issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", - "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", - "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", - "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", - "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", - "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", - "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", - "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", "issue", - "issue", "issue", "issue", "issue") + relation = "issue" ) network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) @@ -436,6 +433,7 @@ test_that("Network construction of the undirected author-issue network with just vertices = data.frame(name = c("Karl", "Thomas", "Björn", "Olaf", "Max"), id = c("Karl", "Thomas", "Björn", "Olaf", "Max"), + kind = TYPE.AUTHOR, type = TYPE.AUTHOR) edges = data.frame(from = c( "Thomas", "Thomas", "Thomas", "Thomas", "Thomas", "Thomas", "Björn", "Björn", "Björn", @@ -448,15 +446,14 @@ test_that("Network construction of the undirected author-issue network with just "2016-08-31 18:21:48", "2016-10-05 01:07:46", "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:37:21", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:39" )), + artifact.type = "Issue", issue.id = c( "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "" ), - event.name = c( "commented", "commented", "commented", "commented", "commented", "commented", - "commented", "commented", "commented", "commented", "commented", "commented", - "commented", "commented", "commented", "commented", "commented", "commented" ), - weight = c(1), + event.name = "commented", + weight = 1, type = TYPE.EDGES.INTRA, - relation = c("issue")) + relation = "issue") network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/tests/test-networks-bipartite.R b/tests/test-networks-bipartite.R index 85344a4c..48afadb9 100644 --- a/tests/test-networks-bipartite.R +++ b/tests/test-networks-bipartite.R @@ -50,12 +50,15 @@ test_that("Construction of the bipartite network for the feature artifact with a ## 1) construct expected vertices authors = data.frame( name = c("Björn", "Karl", "Olaf", "Thomas"), - type = TYPE.AUTHOR + type = TYPE.AUTHOR, + artifact.type = NA, + kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("A", "Base_Feature", "foo"), type = TYPE.ARTIFACT, - artifact.type = "feature" + artifact.type = "feature", + kind = "feature" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -101,12 +104,15 @@ test_that("Construction of the bipartite network for the file artifact with auth ## 1) construct expected vertices authors = data.frame( name = c("Björn", "Olaf", "Thomas"), - type = TYPE.AUTHOR + type = TYPE.AUTHOR, + artifact.type = NA, + kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("test.c", "test2.c"), type = TYPE.ARTIFACT, - artifact.type = "file" + artifact.type = "file", + kind = "file" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -151,12 +157,15 @@ test_that("Construction of the bipartite network for the function artifact with ## 1) construct expected vertices authors = data.frame( name = c("Björn", "Olaf", "Thomas"), - type = TYPE.AUTHOR + type = TYPE.AUTHOR, + artifact.type = NA, + kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("File_Level"), type = TYPE.ARTIFACT, - artifact.type = "function" + artifact.type = "function", + kind ="function" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -199,12 +208,15 @@ test_that("Construction of the bipartite network for the featureexpression artif ## 1) construct expected vertices authors = data.frame( name = c("Björn", "Olaf"), - type = TYPE.AUTHOR + type = TYPE.AUTHOR, + artifact.type = NA, + kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("defined(A)", "Base_Feature"), type = TYPE.ARTIFACT, - artifact.type = "featureexpression" + artifact.type = "featureexpression", + kind = "featureexpression" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -247,12 +259,15 @@ test_that("Construction of the bipartite network for the feature artifact with a ## 1) construct expected vertices authors = data.frame( name =c("Björn", "Karl", "Max", "Olaf", "Thomas"), - type = TYPE.AUTHOR + type = TYPE.AUTHOR, + artifact.type = NA, + kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("", "", "", ""), type = TYPE.ARTIFACT, - artifact.type = "issue" + artifact.type = "issue", + kind = "issue" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -267,6 +282,7 @@ test_that("Construction of the bipartite network for the feature artifact with a "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2013-05-05 23:28:57", "2017-05-23 12:32:39", "2016-07-27 22:25:25", "2016-10-05 01:07:46", "2016-12-07 15:37:21", "2016-07-14 02:03:14", "2016-07-15 08:37:57")), + artifact.type = "Issue", issue.id = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), @@ -303,12 +319,15 @@ test_that("Construction of the directed bipartite network for the feature artifa ## 1) construct expected vertices authors = data.frame( name = c("Björn", "Karl", "Olaf", "Thomas"), - type = TYPE.AUTHOR + type = TYPE.AUTHOR, + artifact.type = NA, + kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("A", "Base_Feature", "foo"), type = TYPE.ARTIFACT, - artifact.type = "feature" + artifact.type = "feature", + kind = "feature" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -353,12 +372,15 @@ test_that("Construction of the directed bipartite network for the file artifact ## 1) construct expected vertices authors = data.frame( name = c("Björn", "Olaf", "Thomas"), - type = TYPE.AUTHOR + type = TYPE.AUTHOR, + artifact.type = NA, + kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("test.c", "test2.c"), type = TYPE.ARTIFACT, - artifact.type = "file" + artifact.type = "file", + kind ="file" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -403,12 +425,15 @@ test_that("Construction of the directed bipartite network for the function artif ## 1) construct expected vertices authors = data.frame( name = c("Björn", "Olaf", "Thomas"), - type = TYPE.AUTHOR + type = TYPE.AUTHOR, + artifact.type = NA, + kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("File_Level"), type = TYPE.ARTIFACT, - artifact.type = "function" + artifact.type = "function", + kind = "function" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -452,12 +477,15 @@ test_that("Construction of the directed bipartite network for the featureexpress ## 1) construct expected vertices authors = data.frame( name = c("Björn", "Olaf"), - type = TYPE.AUTHOR + type = TYPE.AUTHOR, + artifact.type = NA, + kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("defined(A)", "Base_Feature"), type = TYPE.ARTIFACT, - artifact.type = "featureexpression" + artifact.type = "featureexpression", + kind = "featureexpression" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes diff --git a/tests/test-networks-cut.R b/tests/test-networks-cut.R index 0b162061..491d2f2b 100644 --- a/tests/test-networks-cut.R +++ b/tests/test-networks-cut.R @@ -69,7 +69,8 @@ test_that("Cut commit and mail data to same date range.", { date = get.date.from.string(c("2016-07-12 16:04:40")), date.offset = as.integer(c(100)), subject = c("Re: Fw: busybox 2 tab"), - thread = sprintf("", c(9))) + thread = sprintf("", c(9)), + artifact.type = "Mail") commit.data = x$get.project.data()$get.commits() rownames(commit.data) = 1:nrow(commit.data) diff --git a/tests/test-networks-multi.R b/tests/test-networks-multi.R index 0eea7fca..e076b311 100644 --- a/tests/test-networks-multi.R +++ b/tests/test-networks-multi.R @@ -51,7 +51,9 @@ test_that("Construction of the multi network for the feature artifact with autho "Base_Feature", "foo", "A"), id = c("Björn", "Olaf", "Karl", "Thomas", "Base_Feature", "foo", "A"), - type = c(rep(TYPE.AUTHOR, 4), rep(TYPE.ARTIFACT, 3))) + kind = c(rep(TYPE.AUTHOR, 4), rep("feature", 3)), + type = c(rep(TYPE.AUTHOR, 4), rep(TYPE.ARTIFACT, 3)) + ) row.names(vertices) = c("Björn", "Olaf", "Karl", "Thomas", "Base_Feature", "foo", "A") diff --git a/tests/test-read.R b/tests/test-read.R index 5e5eab24..d9217f1d 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -172,7 +172,8 @@ test_that("Read the mail data.", { "=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= 2", "Re: busybox 1", "=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= tab", "Re: Fw: busybox 2 tab", "Re: Fw: busybox 10"), - thread = sprintf("", c(1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 7, 8, 8, 8, 9, 9)) + thread = sprintf("", c(1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 7, 8, 8, 8, 9, 9)), + artifact.type = "Mail" ) ## delete the line with the empty date mail.data.expected = mail.data.expected[-13, ] @@ -239,21 +240,44 @@ test_that("Read and parse the issue data.", { ## build the expected data.frame issue.data.expected = data.frame(issue.id = sprintf("", rep(c(2, 48, 51, 48, 2, 48, 51, 57), c(6, 2, 5, 5, 1, 3, 8, 6))), issue.state = rep(c("CLOSED", "OPEN", "CLOSED", "OPEN", "CLOSED", "OPEN", "CLOSED", "CLOSED"), c(6, 2, 5, 5, 1, 3, 8, 6)), - creation.date = get.date.from.string(rep(c("2013-04-21 23:52:09", "2016-04-17 02:06:38", "2016-07-12 15:59:25", "2016-04-17 02:06:38", "2013-04-21 23:52:09", "2016-04-17 02:06:38", "2016-07-12 15:59:25", "2016-12-07 15:53:02"), c(6, 2, 5, 5, 1, 3, 8, 6))), - closing.date = get.date.from.string(rep(c("2013-05-25 20:02:08", NA, "2016-12-07 15:37:02", NA, "2014-05-25 20:02:08", NA, "2016-12-07 15:37:02", "2017-05-23 12:32:21"), c(6, 2, 5, 5, 1, 3, 8, 6))), + creation.date = get.date.from.string(rep(c("2013-04-21 23:52:09", "2016-04-17 02:06:38", "2016-07-12 15:59:25", "2016-04-17 02:06:38", + "2013-04-21 23:52:09", "2016-04-17 02:06:38", "2016-07-12 15:59:25", "2016-12-07 15:53:02"), + c(6, 2, 5, 5, 1, 3, 8, 6))), + closing.date = get.date.from.string(rep(c("2013-05-25 20:02:08", NA, "2016-12-07 15:37:02", NA, "2014-05-25 20:02:08", NA, "2016-12-07 15:37:02", + "2017-05-23 12:32:21"), c(6, 2, 5, 5, 1, 3, 8, 6))), is.pull.request = rep(c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE), c(6, 2, 5, 5, 1, 3, 8, 6)), - author.name = c("Karl", "Karl", "Karl", "Olaf", "Olaf", "Karl", "udo", "udo", "Thomas", "Thomas", "Björn", "Björn", "Björn", "Thomas", "Björn", "Björn", "Björn", "Thomas", "Thomas", "Björn", "Björn", "Olaf", "Björn", "Olaf", "Björn", "Björn", "Olaf", "Olaf", "Olaf", "Björn", "Björn", "Björn", "Björn", "Max", "Max", "Max"), - author.email = c("karl@example.org", "karl@example.org", "karl@example.org", "olaf@example.org", "olaf@example.org", "karl@example.org", "udo@example.org", "udo@example.org", "thomas@example.org", "thomas@example.org", "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "thomas@example.org", "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "thomas@example.org", "thomas@example.org", "bjoern@example.org", "bjoern@example.org", "olaf@example.org", "bjoern@example.org", "olaf@example.org", "bjoern@example.org", "bjoern@example.org", "olaf@example.org", "olaf@example.org", "olaf@example.org", "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "max@example.org", "max@example.org", "max@example.org"), - date = get.date.from.string(c("2013-04-21 23:52:09", "2013-05-05 23:28:57", "2013-05-05 23:28:57", "2013-05-25 20:02:08", "2013-05-25 20:02:08", "2013-06-01 22:37:03", "2016-04-17 02:07:37", "2016-04-17 02:07:37", "2016-07-12 15:59:25", "2016-07-12 15:59:25", "2016-07-12 15:59:25", "2016-07-12 16:03:23", "2016-07-12 16:05:47", "2016-07-14 02:03:14", "2016-07-14 17:42:52", "2016-07-15 08:37:57", "2016-07-15 08:37:57", "2016-07-15 08:37:57", "2016-07-19 10:47:25", "2016-07-27 22:25:25", "2016-07-27 22:25:25", "2016-07-27 22:25:25", "2016-08-31 18:21:48", "2016-10-05 01:07:46", "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:37:02", "2016-12-07 15:37:02", "2016-12-07 15:37:21", "2016-12-07 15:53:02", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:21", "2017-05-23 12:32:21", "2017-05-23 12:32:39")), + author.name = c("Karl", "Karl", "Karl", "Olaf", "Olaf", "Karl", "udo", "udo", "Thomas", "Thomas", "Björn", "Björn", "Björn", "Thomas", + "Björn", "Björn", "Björn", "Thomas", "Thomas", "Björn", "Björn", "Olaf", "Björn", "Olaf", "Björn", "Björn", "Olaf", "Olaf", + "Olaf", "Björn", "Björn", "Björn", "Björn", "Max", "Max", "Max"), + author.email = c("karl@example.org", "karl@example.org", "karl@example.org", "olaf@example.org", "olaf@example.org", "karl@example.org", + "udo@example.org", "udo@example.org", "thomas@example.org", "thomas@example.org", "bjoern@example.org", "bjoern@example.org", + "bjoern@example.org", "thomas@example.org", "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "thomas@example.org", + "thomas@example.org", "bjoern@example.org", "bjoern@example.org", "olaf@example.org", "bjoern@example.org", "olaf@example.org", + "bjoern@example.org", "bjoern@example.org", "olaf@example.org", "olaf@example.org", "olaf@example.org", "bjoern@example.org", + "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "max@example.org", "max@example.org", "max@example.org"), + date = get.date.from.string(c("2013-04-21 23:52:09", "2013-05-05 23:28:57", "2013-05-05 23:28:57", "2013-05-25 20:02:08", "2013-05-25 20:02:08", + "2013-06-01 22:37:03", "2016-04-17 02:07:37", "2016-04-17 02:07:37", "2016-07-12 15:59:25", "2016-07-12 15:59:25", + "2016-07-12 15:59:25", "2016-07-12 16:03:23", "2016-07-12 16:05:47", "2016-07-14 02:03:14", "2016-07-14 17:42:52", + "2016-07-15 08:37:57", "2016-07-15 08:37:57", "2016-07-15 08:37:57", "2016-07-19 10:47:25", "2016-07-27 22:25:25", + "2016-07-27 22:25:25", "2016-07-27 22:25:25", "2016-08-31 18:21:48", "2016-10-05 01:07:46", "2016-10-13 15:33:56", + "2016-12-06 14:03:42", "2016-12-07 15:37:02", "2016-12-07 15:37:02", "2016-12-07 15:37:21", "2016-12-07 15:53:02", + "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:21", "2017-05-23 12:32:21", + "2017-05-23 12:32:39")), ref.name = c(rep("", 6), rep("Karl", 2), rep("Björn", 2), rep("", 5), rep("Thomas", 2), rep("", 2), rep("udo", 2), rep("", 15)), - event.name = c("created", "commented", "referenced", "merged", "closed", "head_ref_deleted", "mentioned", "subscribed", "mentioned", "subscribed", "created", "renamed", "commented", "commented", "commented", "mentioned", "subscribed", "commented", "referenced", "mentioned", "subscribed", "commented", "commented", "commented", "commented", "commented", "merged", "closed", "commented", "commented", "created", "commented", "commented", "merged", "closed", "commented")) + event.name = c("created", "commented", "referenced", "merged", "closed", "head_ref_deleted", "mentioned", "subscribed", "mentioned", "subscribed", + "created", "renamed", "commented", "commented", "commented", "mentioned", "subscribed", "commented", "referenced", "mentioned", + "subscribed", "commented", "commented", "commented", "commented", "commented", "merged", "closed", "commented", "commented", "created", + "commented", "commented", "merged", "closed", "commented"), + artifact.type = "Issue" + ) ## calculate event IDs issue.data.expected[["event.id"]] = sapply( paste(issue.data.expected[["issue.id"]], issue.data.expected[["author.name"]], issue.data.expected[["date"]], sep = "_"), digest::sha1 ) ## set row names as integers - attr(issue.data.expected, "row.names") = as.integer(c(1, 2, 3, 4, 5, 6, 8, 9, 18, 19, 20, 21, 22, 10, 11, 12, 13, 14, 7, 15, 16, 17, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)) + attr(issue.data.expected, "row.names") = as.integer(c(1, 2, 3, 4, 5, 6, 8, 9, 18, 19, 20, 21, 22, 10, 11, 12, 13, 14, 7, 15, 16, 17, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36)) ## check the results expect_identical(issue.data.read, issue.data.expected, info = "Issue data.") From 7ad49c4ad937c9a6c7398a45179e25d5d5c03faa Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Thu, 19 Apr 2018 17:59:15 +0200 Subject: [PATCH 12/51] Remove vertex attribute 'id' and add vertex attribute 'artifact.type' Remove vertex attribute 'id' of artifact networks with relation 'mail' and add vertex attribute 'artifact.type' in artifact networks of relations 'issues', 'mail' and 'callgraph'. Then these changes of the vertex attributes in artifact networks are included in the test suite. Signed-off-by: Barbara Eckl --- tests/test-networks-artifact.R | 9 ++++----- tests/test-networks-author.R | 9 --------- tests/test-networks-multi.R | 5 ++--- util-networks.R | 8 ++++++-- 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index e2fdfe4f..53cac3ae 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -47,10 +47,10 @@ test_that("Network construction of the undirected artifact-cochange network", { network.built = network.builder$get.artifact.network() ## vertex attributes - vertices = data.frame(name=c("Base_Feature", "foo", "A"), - id=c("Base_Feature", "foo", "A"), - kind="feature", - type = TYPE.ARTIFACT) + vertices = data.frame(name = c("Base_Feature", "foo", "A"), + artifact.type = "feature", + kind = "feature", + type = TYPE.ARTIFACT) data = data.frame(from = c("Base_Feature", "Base_Feature"), to = c("foo", "foo"), @@ -65,7 +65,6 @@ test_that("Network construction of the undirected artifact-cochange network", { ## build expected network network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = vertices) - network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) expect_true(igraph::identical_graphs(network.built, network.expected)) }) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index df9b8c5e..a2ed86fa 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -209,7 +209,6 @@ test_that("Network construction of the undirected author-cochange network", { ## vertex attributes authors = data.frame(name= c("Björn", "Olaf", "Karl", "Thomas"), - id = c("Björn", "Olaf", "Karl", "Thomas"), kind= TYPE.AUTHOR, type = TYPE.AUTHOR) @@ -231,7 +230,6 @@ test_that("Network construction of the undirected author-cochange network", { ## build expected network network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = authors) - network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -253,7 +251,6 @@ test_that("Network construction of the directed author-cochange network", { ## vertex attributes authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas"), - id = c("Björn", "Olaf", "Karl", "Thomas"), kind = TYPE.AUTHOR, type = TYPE.AUTHOR) @@ -274,8 +271,6 @@ test_that("Network construction of the directed author-cochange network", { ## build expected network network.expected = igraph::graph.data.frame(data, directed = TRUE, vertices = authors) - network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) - expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -297,7 +292,6 @@ test_that("Network construction of the undirected simplified author-cochange net ## vertex attributes authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas"), - id = c("Björn", "Olaf", "Karl", "Thomas"), kind = TYPE.AUTHOR, type = TYPE.AUTHOR) @@ -318,7 +312,6 @@ test_that("Network construction of the undirected simplified author-cochange net ## build expected network network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = authors) - network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -341,7 +334,6 @@ test_that("Network construction of the undirected author-issue network with all network.built = network.builder$get.author.network() vertices = data.frame(name = c("Karl", "Olaf", "Thomas", "udo", "Björn", "Max"), - id = c("Karl", "Olaf", "Thomas", "udo", "Björn", "Max"), kind = TYPE.AUTHOR, type = TYPE.AUTHOR) @@ -432,7 +424,6 @@ test_that("Network construction of the undirected author-issue network with just network.built = network.builder$get.author.network() vertices = data.frame(name = c("Karl", "Thomas", "Björn", "Olaf", "Max"), - id = c("Karl", "Thomas", "Björn", "Olaf", "Max"), kind = TYPE.AUTHOR, type = TYPE.AUTHOR) diff --git a/tests/test-networks-multi.R b/tests/test-networks-multi.R index e076b311..b60072ac 100644 --- a/tests/test-networks-multi.R +++ b/tests/test-networks-multi.R @@ -49,10 +49,9 @@ test_that("Construction of the multi network for the feature artifact with autho ## build expected network vertices = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas", "Base_Feature", "foo", "A"), - id = c("Björn", "Olaf", "Karl", "Thomas", - "Base_Feature", "foo", "A"), kind = c(rep(TYPE.AUTHOR, 4), rep("feature", 3)), - type = c(rep(TYPE.AUTHOR, 4), rep(TYPE.ARTIFACT, 3)) + type = c(rep(TYPE.AUTHOR, 4), rep(TYPE.ARTIFACT, 3)), + artifact.type = c(rep(NA, 4), rep("feature", 3)) ) row.names(vertices) = c("Björn", "Olaf", "Karl", "Thomas", "Base_Feature", "foo", "A") diff --git a/util-networks.R b/util-networks.R index ce37b21f..4f098c52 100644 --- a/util-networks.R +++ b/util-networks.R @@ -236,6 +236,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", directed = FALSE ) + artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", value = ARTIFACT) + ## store network private$artifacts.network.cochange = artifacts.net logging::logdebug("get.artifact.network.cochange: finished.") @@ -309,7 +311,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", } ## (3) set processed names inside graph object artifacts.net = igraph::set.vertex.attribute(artifacts.net, "name", value = names) - artifacts.net = igraph::set.vertex.attribute(artifacts.net, "id", value = names) + artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", value = ARTIFACT) ## store network private$artifacts.network.callgraph = artifacts.net @@ -344,6 +346,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts, type = TYPE.ARTIFACT) + artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", value = "thread") ## store network private$artifacts.network.mail = artifacts.net logging::logdebug("get.artifact.network.mail: finished.") @@ -377,6 +380,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts, type = TYPE.ARTIFACT) + artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", value = "issue") + ## store network private$artifacts.network.issue = artifacts.net logging::logdebug("get.artifact.network.issue: finished.") @@ -978,7 +983,6 @@ construct.network.from.edge.list = function(vertices, edge.list, network.conf, d net = igraph::graph.data.frame(edge.list, directed = directed, vertices = nodes.processed) } - net = igraph::set.vertex.attribute(net, "id", value = igraph::get.vertex.attribute(net, "name")) net = igraph::set.edge.attribute(net, "weight", value = 1) # transform multiple edges to edge weights From be6ee8cd48dc7692e02b7f1c512870591300fa8a Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Thu, 19 Apr 2018 18:12:53 +0200 Subject: [PATCH 13/51] Add tests for networks with multiple relations Signed-off-by: Barbara Eckl --- tests/test-networks-multi-relation.R | 269 +++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 tests/test-networks-multi-relation.R diff --git a/tests/test-networks-multi-relation.R b/tests/test-networks-multi-relation.R new file mode 100644 index 00000000..73a14802 --- /dev/null +++ b/tests/test-networks-multi-relation.R @@ -0,0 +1,269 @@ +## This file is part of codeface-extraction-r, which is free software: you +## can redistribute it and/or modify it under the terms of the GNU General +## Public License as published by the Free Software Foundation, version 2. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +## +## Copyright 2018 by Barbara Eckl +## All Rights Reserved. + + +context("Network-building functionality.") + +## +## Context +## + +CF.DATA = file.path(".", "codeface-data") +CF.SELECTION.PROCESS = "testing" +CASESTUDY = "test" +ARTIFACT = "feature" # function, feature, file, featureexpression + +## use only when debugging this file independently +if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") + +test_that("Network construction of the undirected author network with relation = c('cochange', 'mail')", { + + ## configurations + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("artifact.filter.base", FALSE) + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(author.relation = c("cochange", "mail"))) + + ## construct objects + proj.data = ProjectData$new(project.conf = proj.conf) + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + + ## build network + network.built = network.builder$get.author.network() + + ## vertex attributes + authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz fritz@example.org", "georg", "Hans"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR) + + ## edge attributes + data = data.frame(comb.1. = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", "Björn", "Björn", "Olaf", "Olaf"), + comb.2. = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", "Olaf", "Olaf", "Thomas", "Thomas"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:10", + "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:10", "2016-07-12 16:06:32", + "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", "2016-07-12 16:05:37")), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", + "1143db502761379c2bfcecc2007fc34282e7ee61", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", rep(NA, 4)), + file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", rep(NA, 4)), + artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", rep("Mail", 4)), + artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", rep(NA, 4)), + weight = 1, + type = TYPE.EDGES.INTRA, + relation = c("cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange", rep("mail", 4)), + message.id = c(NA, NA, NA, NA, NA, NA, NA, NA, "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<65a1sf31sagd684dfv31@mail.gmail.com>", "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>"), + thread = c(NA, NA, NA, NA, NA, NA, NA, NA, "", "", "", "") + ) + + ## build expected network + network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = authors) + + expect_true(igraph::identical_graphs(network.built, network.expected)) +}) + + +test_that("Construction of the bipartite network for the feature artifact with author.relation = c('cochange', 'issue') and artifact. + relation = c('issue', 'mail').", { + + ## configurations + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("artifact.filter.base", FALSE) + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(author.relation = c("cochange", "issue"), artifact.relation = c("issue", "mail"))) + + ## construct objects + proj.data = ProjectData$new(project.conf = proj.conf) + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + + ## build network + network.built = network.builder$get.bipartite.network() + + ## construct expected network: + ## 1) construct expected vertices + authors1 = data.frame( + name = c("Björn", "Karl", "Max", "Olaf", "Thomas"), + type = TYPE.AUTHOR, + artifact.type = NA, + kind = TYPE.AUTHOR + ) + authors2 = data.frame( + name = c("Fritz fritz@example.org", "georg", "Hans", "udo"), + type = TYPE.AUTHOR, + artifact.type = NA, + kind = TYPE.AUTHOR + ) + issues = data.frame( + name = c("", "", "", ""), + type = TYPE.ARTIFACT, + artifact.type = "issue", + kind = "issue" + ) + threads = data.frame( + name = c("", "", "", + "", "", "", "", "", ""), + type = TYPE.ARTIFACT, + artifact.type = "thread", + kind = "thread" + ) + vertices = plyr::rbind.fill(authors1, issues, authors2, threads) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Karl", "Max", + "Olaf", "Olaf", "Olaf", "Thomas", "Thomas", "Björn", "Björn", "Björn", "Fritz fritz@example.org", + "georg", "Hans", "Hans", "Hans", "Hans", "Hans", "Hans", "Hans", "Olaf", "Olaf", "Thomas", "udo"), + to = c("", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", + "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", ""), + date = get.date.from.string(c("2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", + "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:53:02", + "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2013-05-05 23:28:57", + "2017-05-23 12:32:39", "2016-07-27 22:25:25", "2016-10-05 01:07:46", + "2016-12-07 15:37:21", "2016-07-14 02:03:14", "2016-07-15 08:37:57", + "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", + "2010-07-12 11:05:35", "2010-07-12 12:05:34", "2010-07-12 12:05:40", + "2010-07-12 12:05:41", "2010-07-12 12:05:42", "2010-07-12 12:05:43", + "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", + "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", + "2010-07-12 10:05:36")), + artifact.type = c(rep("Issue", 15), rep("Mail", 16)), + issue.id = c("", "", "", "", "", + "", "", "", "", "", + "", "", "", "", "", rep(NA,16)), + event.name = c("commented", "commented", "commented", "commented", "commented", + "commented", "commented", "commented", "commented", "commented", + "commented", "commented", "commented", "commented", "commented", rep(NA, 16)), + type = TYPE.EDGES.INTER, + weight = 1, + relation = c(rep("issue", 15), rep("mail", 16)), + message.id = c(rep(NA, 15), + "", "<1107974989.17910.6.camel@jmcmullan>", + "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", "", + "", "", + "","", + "", "", "", + "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", + ""), + thread = c(rep(NA, 15), + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "") + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + + expect_true(igraph::identical_graphs(network.built, network.expected)) +}) + + +test_that("Construction of the multi network for the feature artifact with author.relation = c('cochange', 'mail') and artifact. + relation = c('cochange', 'issue').", { + + ## configurations + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("artifact.filter.base", FALSE) + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(author.relation = c("cochange", "mail"), artifact.relation = c("cochange", "issue"))) + + ## construct objects + proj.data = ProjectData$new(project.conf = proj.conf) + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + + ## build network + network.built = network.builder$get.multi.network() + + ## build expected network + vertices = data.frame( + name = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz fritz@example.org", "georg", "Hans", + "Base_Feature", "foo", "A", "", "", "", ""), + kind = c(rep(TYPE.AUTHOR, 8), rep("feature", 3), rep("issue", 4)), + type = c(rep(TYPE.AUTHOR, 8), rep(TYPE.ARTIFACT, 7)), + artifact.type = c(rep(NA, 8), rep("feature", 3), rep("issue", 4)) + ) + row.names(vertices) = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz fritz@example.org", "georg", "Hans", + "Base_Feature", "foo", "A", "", "", "", "") + + edges = data.frame(from = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", "Björn", "Björn", + "Olaf", "Olaf", "Base_Feature", "Base_Feature", "Björn", "Olaf", "Olaf", "Karl", "Thomas", + "Thomas", rep("Björn",8), rep("Olaf",3), "Karl", "Thomas", "Thomas"), + to = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", "Olaf", "Olaf", + "Thomas", "Thomas","foo", "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo", + "", "", rep("", 4), rep("", 2), "", + rep("", 2), "", rep("", 2)), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", ## author cochange + "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", + "2016-07-12 16:06:10", "2016-07-12 16:06:32", + "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", ## author mail + "2016-07-12 16:05:37", + "2016-07-12 16:06:32", "2016-07-12 16:06:32", "2016-07-12 15:58:59", ## bipartite cochange + "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:10", + "2016-07-12 16:06:32", "2016-07-12 16:06:32", + "2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", ## bipartite issue + "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:53:02", + "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2016-07-27 22:25:25", + "2016-10-05 01:07:46", "2016-12-07 15:37:21", "2013-05-05 23:28:57", + "2016-07-14 02:03:14", "2016-07-15 08:37:57")), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", + NA, NA, NA, NA, + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", + rep(NA, 14)), + file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", + NA, NA, NA, NA, + "test2.c", "test2.c", "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", + rep(NA, 14)), + artifact.type = c(rep("Feature",8), rep("Mail", 4), rep("Feature",8), rep("Issue", 14)), + artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", + rep(NA, 4), + "Base_Feature", "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo", + rep(NA, 14)), + weight = 1, + type = c(rep(TYPE.EDGES.INTRA, 14), rep(TYPE.EDGES.INTER, 20)), + relation = c(rep("cochange", 8), rep("mail", 4), rep("cochange", 8), rep("issue", 14)), + message.id = c(rep(NA, 8), + "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<65a1sf31sagd684dfv31@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", + rep(NA, 22)), + thread = c(rep(NA, 8), "", "", "", "", rep(NA, 22)), + issue.id = c(rep(NA, 20), "", "","", "", "", + "", "", "", "", "", "", + "", "", ""), + event.name = c(rep(NA, 20), rep("commented", 14)) + ) + + network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + + expected.edges = igraph::as_data_frame(network.expected, what = "edges") + expected.vertices = igraph::as_data_frame(network.expected, what = "vertices") + + built.edges = igraph::as_data_frame(network.built, what = "edges") + built.vertices = igraph::as_data_frame(network.built, what = "vertices") + + expect_identical(expected.edges, built.edges, info = "Multi network edges") + expect_identical(expected.vertices, built.vertices, info = "Multi network vertices") + ## TODO as soon as the bug in igraph is fixed switch to the expect_true function below + # expect_true(igraph::identical_graphs(network.expected, network.built)) +}) From 2941c22ab7df4452d7ff058cd74ffaf17efeb3c5 Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Fri, 20 Apr 2018 08:45:59 +0200 Subject: [PATCH 14/51] Set Copyright in test suite Signed-off-by: Barbara Eckl --- tests/test-data-cut.R | 1 + tests/test-networks-artifact.R | 1 + tests/test-networks-author.R | 1 + tests/test-networks-bipartite.R | 1 + tests/test-networks-multi.R | 1 + 5 files changed, 5 insertions(+) diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R index cfcfd244..65acfc5d 100644 --- a/tests/test-data-cut.R +++ b/tests/test-data-cut.R @@ -14,6 +14,7 @@ ## Copyright 2017 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2018 by Claus Hunsen +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 53cac3ae..f7080584 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -13,6 +13,7 @@ ## ## Copyright 2017-2018 by Christian Hechtl ## Copyright 2017 by Claus Hunsen +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index a2ed86fa..bdde3a75 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -14,6 +14,7 @@ ## Copyright 2017 by Claus Hunsen ## Copyright 2017 by Christian Hechtl ## Copyright 2017 by Felix Prasse +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. diff --git a/tests/test-networks-bipartite.R b/tests/test-networks-bipartite.R index 48afadb9..10079997 100644 --- a/tests/test-networks-bipartite.R +++ b/tests/test-networks-bipartite.R @@ -13,6 +13,7 @@ ## ## Copyright 2017-2018 by Christian Hechtl ## Copyright 2017-2018 by Claus Hunsen +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. diff --git a/tests/test-networks-multi.R b/tests/test-networks-multi.R index b60072ac..f631700c 100644 --- a/tests/test-networks-multi.R +++ b/tests/test-networks-multi.R @@ -13,6 +13,7 @@ ## ## Copyright 2018 by Christian Hechtl ## Copyright 2018 by Claus Hunsen +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. From cd4645f46cf5299ddc8076090c1731aff38833ab Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Fri, 20 Apr 2018 09:15:46 +0200 Subject: [PATCH 15/51] Add description of edge and vertex attributes in 'README' Signed-off-by: Barbara Eckl --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fa2bfc7f..b8cef20c 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. **Note**: Default values are shown in *italics*. - `author.relation` - * The relation among authors, encoded as edges in an author network + * The relation(s) among authors, encoded as edges in an author network * **Note**: The author--artifact relation in bipartite and multi networks is configured by `artifact.relation`! * possible values: [*`"mail"`*, `"cochange"`, `"issue"`] - `author.directed` @@ -228,7 +228,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. * Remove all authors from an author network (including bipartite and multi networks) who are not present in an author network constructed with `artifact.relation` as relation, i.e., all authors that have no biparite relations in a bipartite/multi network are removed. * [`TRUE`, *`FALSE`*] - `artifact.relation` - * The relation among artifacts, encoded as edges in an artifact network + * The relation(s) among artifacts, encoded as edges in an artifact network * **Note**: This relation configures also the author--artifact relation in bipartite and multi networks! * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`] - `artifact.directed` @@ -239,6 +239,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. * The list of edge-attribute names and information * a subset of the following as a single vector: - timestamp information: *`"date"`*, `"date.offset"` + - relation information: *`"relation"`* (possible values: [`"mail"`,`"issue"`,`"cochange"`, `"callgraph"`]) - author information: `"author.name"`, `"author.email"` - committer information: `"committer.date"`, `"committer.name"`, `"committer.email"` - e-mail information: *`"message.id"`*, *`"thread"`*, `"subject"` @@ -248,6 +249,11 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting. * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected. * **Note**: For the edge attributes `"pasta"` and `"synchronicity"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below). +- `vertex.attributes` + - *`"name"`* + - *`"type"`*: [`Author`, `Artifact`] + - `"artifact.type"`: [`file`, `feature`, `function`,`mail`, `issue`,`featureexpression`] + - *`"kind"`*: [`author`,`file`, `feature`, `function`,`mail`, `issue`,`featureexpression`] - `simplify` * Perform edge contraction to retrieve a simplified network * [`TRUE`, *`FALSE`*] From 3e286acaafa94369ae49759bd5aeea60cf6a62ba Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Mon, 23 Apr 2018 14:15:40 +0200 Subject: [PATCH 16/51] Bug fix: Set attribute 'artifact.type' correctly Signed-off-by: Barbara Eckl --- util-networks.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/util-networks.R b/util-networks.R index 4f098c52..2e2a0b83 100644 --- a/util-networks.R +++ b/util-networks.R @@ -236,7 +236,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", directed = FALSE ) - artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", value = ARTIFACT) + artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", + value = private$proj.data$get.project.conf.entry("artifact")) ## store network private$artifacts.network.cochange = artifacts.net @@ -311,7 +312,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", } ## (3) set processed names inside graph object artifacts.net = igraph::set.vertex.attribute(artifacts.net, "name", value = names) - artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", value = ARTIFACT) + artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", + value = private$proj.data$get.project.conf.entry("artifact")) ## store network private$artifacts.network.callgraph = artifacts.net From c2e92c71f1fccb985b78b508da498384d5793180 Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Mon, 23 Apr 2018 18:36:45 +0200 Subject: [PATCH 17/51] Bug fix: Plotting multi networks The colors and line types of edges were sometimes wrong. To delete loops from the network before plotting, solves the problem. Signed-off-by: Claus Hunsen Signed-off-by: Barbara Eckl --- util-plot.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/util-plot.R b/util-plot.R index b9b77432..24b607c0 100644 --- a/util-plot.R +++ b/util-plot.R @@ -53,6 +53,7 @@ PLOT.VERTEX.LABEL.COLOR = "gray60" #' #' Note: The names for the vertex types are taken from the variables \code{PLOT.VERTEX.TYPE.AUTHOR} and #' \code{PLOT.VERTEX.TYPE.ARTIFACT}. The defaults are \code{"Developer"} and \code{TYPE.ARTIFACT}, respectively. +#' All loops are deleted before plotting the network. #' #' @param network the network to plot and print #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] @@ -74,6 +75,7 @@ plot.network = function(network, labels = TRUE) { #' #' Note: The names for the vertex types are taken from the variables \code{PLOT.VERTEX.TYPE.AUTHOR} and #' \code{PLOT.VERTEX.TYPE.ARTIFACT}. The defaults are \code{"Developer"} and \code{TYPE.ARTIFACT}, respectively. +#' All loops are deleted before plotting the network. #' #' @param network the network to plot and print #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] @@ -96,6 +98,7 @@ plot.print.network = function(network, labels = TRUE) { #' #' Note: The names for the vertex types are taken from the variables \code{PLOT.VERTEX.TYPE.AUTHOR} and #' \code{PLOT.VERTEX.TYPE.ARTIFACT}. The defaults are \code{"Developer"} and \code{TYPE.ARTIFACT}, respectively. +#' All loops are deleted before plotting the network. #' #' @param network the network to plot #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] @@ -113,6 +116,9 @@ plot.get.plot.for.network = function(network, labels = TRUE) { PLOT.VERTEX.TYPES = c(PLOT.VERTEX.TYPE.AUTHOR, PLOT.VERTEX.TYPE.ARTIFACT) names(PLOT.VERTEX.TYPES) = c(TYPE.AUTHOR, TYPE.ARTIFACT) + ## remove loops because of weird behavior when plotting + network = igraph::delete.edges(network, igraph::E(network)[igraph::is.loop(network)]) + ## fix the type attributes (add new ones, also named) network = plot.fix.type.attributes(network) From e95afd01f11355839777c57b9649829d34fa50d7 Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Fri, 20 Apr 2018 11:12:27 +0200 Subject: [PATCH 18/51] Update changelog Signed-off-by: Barbara Eckl --- NEWS.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/NEWS.md b/NEWS.md index bb3bd807..9a5d4087 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,47 @@ # codeface-extraction-r – Changelog +## unversioned + +### Add relations to authors and artifacts (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- add for new relation types for each edge +- accept vector with more than one relation for `author.relation` and `artifact.relation` in util-conf.R + +### Changes in util-networks (#98, 2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) + +#### Build Networks (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- edit function `get.author.network` to handle more than one relation +- edit function `get.artifact.network` to handle more than one artifact relation +- add loop for handle more than one relation type and merge the resulting vertex lists and edge lists in `get.bipartite.network` +- add loops for different relations for `authors.to.artifacts` in `get.multi.network`, add information about the relation + and merge vertex sets +- add new vertex attribute `kind`, which include `artifact.type` and the `type` of author vertices +(7c628fb93eb21f280c7d9da66680f817e107fa24, 7ad49c4ad937c9a6c7398a45179e25d5d5c03faa) +- remove vertex attribute `id` in artifact vertices (7ad49c4ad937c9a6c7398a45179e25d5d5c03faa) + +#### Network and Edge Construction (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- function `construct.network.from.list` split in two functions `construct.edge.list.from.key.value.list` and `construct.network.from.edge.list` +- add function `merge.network.data` und `merge.networks` +- add loop for relations in function `add.edges.for.bipartite.relation` and set the edge attribute `relation` +- add function `create.empty.edge.list` +- add loop for relations in `get.bipartite.relation` and save the type of the relation in an attribute `relation` + +#### Simplify (021ac8b88e9a181364a51e89807df55cb741ed44) +- iterate over the different types of relations and simplfy the subnetworks relating to the `relation` attribute +- the `EDGE.ATTR.HANDLING` of the attribute `relation` is "first" + +### Changes in test suite (784c417c50eb1de5d0143908a390ead6ba22dbbf, 7ad49c4ad937c9a6c7398a45179e25d5d5c03faa, be6ee8cd48dc7692e02b7f1c512870591300fa8a) +- add relation attribute `relation` to result data frames +- remove vertex attribute `id` +- write new tests for networks with more than one relation type + +### Changes in util-plot (b55d3e84a5f9b122dacd0ee52784d930f22d1f4b, f190ca130a15a82e5eed836e9ffc53b8a34aac20) +- colors of the edges depending on the relation type +- line shape depend on the edge type (inter or intra) +- change colors of edges +- remove colors from `plot.fix.type.attributes` +- use palette 'viridis' +- different colors for artifacts + ## unversioned From ef094eb7e94691e2a29a8e0f598e514947ebd6b8 Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Thu, 26 Apr 2018 13:34:51 +0200 Subject: [PATCH 19/51] Minor fixes from review in PR #115 Adding some more comments. Include a new section in 'README.md' for attributes. Signed-off-by: Barbara Eckl --- NEWS.md | 8 +- README.md | 20 +++- tests/test-networks-author.R | 6 +- tests/test-networks-multi-relation.R | 161 ++++++++++++++++----------- tests/test-networks-multi.R | 2 +- util-networks.R | 47 ++++---- util-plot.R | 14 +-- 7 files changed, 146 insertions(+), 112 deletions(-) diff --git a/NEWS.md b/NEWS.md index 9a5d4087..6e1ccda8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,8 +11,8 @@ #### Build Networks (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) - edit function `get.author.network` to handle more than one relation - edit function `get.artifact.network` to handle more than one artifact relation -- add loop for handle more than one relation type and merge the resulting vertex lists and edge lists in `get.bipartite.network` -- add loops for different relations for `authors.to.artifacts` in `get.multi.network`, add information about the relation +- handle more than one relation type and merge the resulting vertex lists and edge lists in `get.bipartite.network` +- enable for different relations for `authors.to.artifacts` in `get.multi.network`, add information about the relation and merge vertex sets - add new vertex attribute `kind`, which include `artifact.type` and the `type` of author vertices (7c628fb93eb21f280c7d9da66680f817e107fa24, 7ad49c4ad937c9a6c7398a45179e25d5d5c03faa) @@ -21,9 +21,9 @@ #### Network and Edge Construction (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) - function `construct.network.from.list` split in two functions `construct.edge.list.from.key.value.list` and `construct.network.from.edge.list` - add function `merge.network.data` und `merge.networks` -- add loop for relations in function `add.edges.for.bipartite.relation` and set the edge attribute `relation` +- handle more than one relation in function `add.edges.for.bipartite.relation` and set the edge attribute `relation` - add function `create.empty.edge.list` -- add loop for relations in `get.bipartite.relation` and save the type of the relation in an attribute `relation` +- enable for different relations in `get.bipartite.relation` and save the type of the relation in an attribute `relation` #### Simplify (021ac8b88e9a181364a51e89807df55cb741ed44) - iterate over the different types of relations and simplfy the subnetworks relating to the `relation` attribute diff --git a/README.md b/README.md index b8cef20c..b56c847a 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ When selecting a version to work with, you should consider the following points: - `ggraph`: For plotting of networks (needs `udunits2` system library, e.g., `libudunits2-dev` on Ubuntu!) - `markovchain`: For core/peripheral transition probabilities - `lubridate`: For convenient date conversion and parsing +- `viridis`: For plotting of networks with nice colours ## How-To @@ -239,7 +240,6 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. * The list of edge-attribute names and information * a subset of the following as a single vector: - timestamp information: *`"date"`*, `"date.offset"` - - relation information: *`"relation"`* (possible values: [`"mail"`,`"issue"`,`"cochange"`, `"callgraph"`]) - author information: `"author.name"`, `"author.email"` - committer information: `"committer.date"`, `"committer.name"`, `"committer.email"` - e-mail information: *`"message.id"`*, *`"thread"`*, `"subject"` @@ -249,11 +249,6 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting. * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected. * **Note**: For the edge attributes `"pasta"` and `"synchronicity"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below). -- `vertex.attributes` - - *`"name"`* - - *`"type"`*: [`Author`, `Artifact`] - - `"artifact.type"`: [`file`, `feature`, `function`,`mail`, `issue`,`featureexpression`] - - *`"kind"`*: [`author`,`file`, `feature`, `function`,`mail`, `issue`,`featureexpression`] - `simplify` * Perform edge contraction to retrieve a simplified network * [`TRUE`, *`FALSE`*] @@ -273,6 +268,19 @@ You can also update the `NetworkConf` object at any time by calling `NetworkBuil For more examples, please look in the file `showcase.R`. +### Network properties +- mandatory vertex attributes (`vertex.attributes`) + * *`"name"`* + * *`"type"`*: [`Author`, `Artifact`] + * `"artifact.type"`: [`file`, `feature`, `function`,`mail`, `issue`,`featureexpression`] + * *`"kind"`*: [`author`,`file`, `feature`, `function`,`mail`, `issue`,`featureexpression`] + +- mandatory edge attributes + * *`"type"`*: [`Unipartite`, `Bipartite`] + * *`"relation"`*: [`mail`, `cochange`, `issue`, `callgraph`] + * *`"date"`* + + ## File/Module overview - `util-init.R` diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index bdde3a75..9e564dde 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -209,9 +209,9 @@ test_that("Network construction of the undirected author-cochange network", { network.built = network.builder$get.author.network() ## vertex attributes - authors = data.frame(name= c("Björn", "Olaf", "Karl", "Thomas"), - kind= TYPE.AUTHOR, - type = TYPE.AUTHOR) + authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR) ## edge attributes data = data.frame(comb.1. = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl"), diff --git a/tests/test-networks-multi-relation.R b/tests/test-networks-multi-relation.R index 73a14802..b8cc3647 100644 --- a/tests/test-networks-multi-relation.R +++ b/tests/test-networks-multi-relation.R @@ -50,24 +50,36 @@ test_that("Network construction of the undirected author network with relation = type = TYPE.AUTHOR) ## edge attributes - data = data.frame(comb.1. = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", "Björn", "Björn", "Olaf", "Olaf"), - comb.2. = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", "Olaf", "Olaf", "Thomas", "Thomas"), - date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:10", - "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:10", "2016-07-12 16:06:32", - "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", "2016-07-12 16:05:37")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", - "1143db502761379c2bfcecc2007fc34282e7ee61", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", rep(NA, 4)), - file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", rep(NA, 4)), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", rep("Mail", 4)), - artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", rep(NA, 4)), + data = data.frame(comb.1. = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", # cochange + "Björn", "Björn", "Olaf", "Olaf"), # mail + comb.2. = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", # cochange + "Olaf", "Olaf", "Thomas", "Thomas"), # mail + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # cochange + "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", + "2016-07-12 16:06:10", "2016-07-12 16:06:32", + "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", # mail + "2016-07-12 16:05:37")), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", + rep(NA, 4)), + file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", + rep(NA, 4)), + artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", #cochange + rep("Mail", 4)), #mail + artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", + rep(NA, 4)), weight = 1, type = TYPE.EDGES.INTRA, - relation = c("cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange", rep("mail", 4)), - message.id = c(NA, NA, NA, NA, NA, NA, NA, NA, "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + relation = c("cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange", + rep("mail", 4)), + message.id = c(NA, NA, NA, NA, NA, NA, NA, NA, + "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>"), - thread = c(NA, NA, NA, NA, NA, NA, NA, NA, "", "", "", "") + thread = c(NA, NA, NA, NA, NA, NA, NA, NA, + "", "", "", "") ) ## build expected network @@ -95,13 +107,13 @@ test_that("Construction of the bipartite network for the feature artifact with a ## construct expected network: ## 1) construct expected vertices - authors1 = data.frame( + authors1 = data.frame( # author -- issue name = c("Björn", "Karl", "Max", "Olaf", "Thomas"), type = TYPE.AUTHOR, artifact.type = NA, kind = TYPE.AUTHOR ) - authors2 = data.frame( + authors2 = data.frame( # author --mail name = c("Fritz fritz@example.org", "georg", "Hans", "udo"), type = TYPE.AUTHOR, artifact.type = NA, @@ -123,20 +135,22 @@ test_that("Construction of the bipartite network for the feature artifact with a vertices = plyr::rbind.fill(authors1, issues, authors2, threads) ## 2) construct expected edge attributes network.expected.data = data.frame( - from = c("Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Karl", "Max", - "Olaf", "Olaf", "Olaf", "Thomas", "Thomas", "Björn", "Björn", "Björn", "Fritz fritz@example.org", - "georg", "Hans", "Hans", "Hans", "Hans", "Hans", "Hans", "Hans", "Olaf", "Olaf", "Thomas", "udo"), - to = c("", "", "", "", "", "", "", "", - "", "", "", "", "", "", "", - "", - "", "", "", "", "", "", "", "", - "", "", "", "", "", "", ""), - date = get.date.from.string(c("2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", + from = c("Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Karl", "Max", # issue + "Olaf", "Olaf", "Olaf", "Thomas", "Thomas", + "Björn", "Björn", "Björn", "Fritz fritz@example.org", "georg", "Hans", "Hans", "Hans", # mail + "Hans", "Hans", "Hans", "Hans", "Olaf", "Olaf", "Thomas", "udo"), + to = c("", "", "", "", "", "", + "", "", "", "", "", "", + "", "", "", "", + "", "", "", "", "", "", + "", "", "", "", "", "", + "", "", ""), + date = get.date.from.string(c("2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", # issue "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2013-05-05 23:28:57", "2017-05-23 12:32:39", "2016-07-27 22:25:25", "2016-10-05 01:07:46", "2016-12-07 15:37:21", "2016-07-14 02:03:14", "2016-07-15 08:37:57", - "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", + "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", # mail "2010-07-12 11:05:35", "2010-07-12 12:05:34", "2010-07-12 12:05:40", "2010-07-12 12:05:41", "2010-07-12 12:05:42", "2010-07-12 12:05:43", "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", @@ -145,25 +159,28 @@ test_that("Construction of the bipartite network for the feature artifact with a artifact.type = c(rep("Issue", 15), rep("Mail", 16)), issue.id = c("", "", "", "", "", "", "", "", "", "", - "", "", "", "", "", rep(NA,16)), + "", "", "", "", "", + rep(NA,16)), event.name = c("commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", - "commented", "commented", "commented", "commented", "commented", rep(NA, 16)), + "commented", "commented", "commented", "commented", "commented", + rep(NA, 16)), type = TYPE.EDGES.INTER, weight = 1, relation = c(rep("issue", 15), rep("mail", 16)), message.id = c(rep(NA, 15), "", "<1107974989.17910.6.camel@jmcmullan>", - "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", "", - "", "", - "","", + "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "", "", + "", "","", "", "", "", "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", ""), thread = c(rep(NA, 15), - "", "", "", "", "", "", "", "", - "", "", "", "", "", "", "", "") + "", "", "", "", "", "", + "", "", "", "", "", "", + "", "", "", "") ) ## 3) construct expected network network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) @@ -199,56 +216,74 @@ test_that("Construction of the multi network for the feature artifact with autho row.names(vertices) = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz fritz@example.org", "georg", "Hans", "Base_Feature", "foo", "A", "", "", "", "") - edges = data.frame(from = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", "Björn", "Björn", - "Olaf", "Olaf", "Base_Feature", "Base_Feature", "Björn", "Olaf", "Olaf", "Karl", "Thomas", - "Thomas", rep("Björn",8), rep("Olaf",3), "Karl", "Thomas", "Thomas"), - to = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", "Olaf", "Olaf", - "Thomas", "Thomas","foo", "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo", + edges = data.frame(from = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", # author cochange + "Björn", "Björn", "Olaf", "Olaf", # author mail + "Base_Feature", "Base_Feature", # artifact cochange + "Björn", "Olaf", "Olaf", "Karl", "Thomas", "Thomas", # bipartite cochange + rep("Björn",8), rep("Olaf",3), "Karl", "Thomas", "Thomas"), # bipartite issue + to = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", # author cochange + "Olaf", "Olaf", "Thomas", "Thomas", # author mail + "foo", "foo", # artifact cochange + "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo", # bipartite cochange "", "", rep("", 4), rep("", 2), "", - rep("", 2), "", rep("", 2)), - date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", ## author cochange + rep("", 2), "", rep("", 2)), # bipartite issue + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # author cochange "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:10", "2016-07-12 16:06:32", - "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", ## author mail + "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", "2016-07-12 16:05:37", - "2016-07-12 16:06:32", "2016-07-12 16:06:32", "2016-07-12 15:58:59", ## bipartite cochange - "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:10", - "2016-07-12 16:06:32", "2016-07-12 16:06:32", - "2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", ## bipartite issue + "2016-07-12 16:06:32", "2016-07-12 16:06:32", # artifact cochange + "2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # bipartite cochange + "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32", + "2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", # bipartite issue "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2016-07-27 22:25:25", "2016-10-05 01:07:46", "2016-12-07 15:37:21", "2013-05-05 23:28:57", "2016-07-14 02:03:14", "2016-07-15 08:37:57")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", - NA, NA, NA, NA, - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", - rep(NA, 14)), - file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", # author cochange + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", + NA, NA, NA, NA, # author mail + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", # artifact cochange + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", # bipartite cochange + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", + rep(NA, 14)), # bipartite issue + file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", # author cochange NA, NA, NA, NA, - "test2.c", "test2.c", "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", + "test2.c", "test2.c", # artifact cochange + "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", # bipartite cochange rep(NA, 14)), - artifact.type = c(rep("Feature",8), rep("Mail", 4), rep("Feature",8), rep("Issue", 14)), - artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", + artifact.type = c(rep("Feature", 8), + rep("Mail", 4), + rep("Feature", 2), + rep("Feature", 6), + rep("Issue", 14)), + artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", # author cochange + "Base_Feature", rep(NA, 4), - "Base_Feature", "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo", + "Base_Feature", "foo", # bipartite cochange + "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo", # bipartite cochange rep(NA, 14)), weight = 1, type = c(rep(TYPE.EDGES.INTRA, 14), rep(TYPE.EDGES.INTER, 20)), - relation = c(rep("cochange", 8), rep("mail", 4), rep("cochange", 8), rep("issue", 14)), + relation = c(rep("cochange", 8), + rep("mail", 4), + rep("cochange", 2), + rep("cochange", 6), + rep("issue", 14)), message.id = c(rep(NA, 8), "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", rep(NA, 22)), - thread = c(rep(NA, 8), "", "", "", "", rep(NA, 22)), - issue.id = c(rep(NA, 20), "", "","", "", "", + thread = c(rep(NA, 8), + "", "", "", "", + rep(NA, 22)), + issue.id = c(rep(NA, 20), + "", "","", "", "", "", "", "", "", "", "", "", "", ""), event.name = c(rep(NA, 20), rep("commented", 14)) diff --git a/tests/test-networks-multi.R b/tests/test-networks-multi.R index f631700c..f16d6845 100644 --- a/tests/test-networks-multi.R +++ b/tests/test-networks-multi.R @@ -86,7 +86,7 @@ test_that("Construction of the multi network for the feature artifact with autho "foo"), weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), type = c(rep(TYPE.EDGES.INTRA, 10), rep(TYPE.EDGES.INTER, 6)), - relation =c(rep("cochange", 16))) + relation = c(rep("cochange", 16))) network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/util-networks.R b/util-networks.R index 2e2a0b83..a7a62a7d 100644 --- a/util-networks.R +++ b/util-networks.R @@ -159,24 +159,24 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$authors.network.mail) } - author.relation.data = construct.edge.list.from.key.value.list( + author.net.data = construct.edge.list.from.key.value.list( private$proj.data$get.thread2author(), network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) - author.relation = construct.network.from.edge.list( - author.relation.data[["vertices"]], - author.relation.data[["edges"]], + author.net = construct.network.from.edge.list( + author.net.data[["vertices"]], + author.net.data[["edges"]], network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) - igraph::E(author.relation)$type = TYPE.EDGES.INTRA + igraph::E(author.net)$type = TYPE.EDGES.INTRA ## store network - private$authors.network.mail = author.relation + private$authors.network.mail = author.net logging::logdebug("get.author.network.mail: finished.") - return(author.relation) + return(author.net) }, ##get the issue based author relation as network @@ -193,19 +193,19 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) - author.relation = construct.network.from.edge.list( + author.net = construct.network.from.edge.list( author.net.data[["vertices"]], author.net.data[["edges"]], network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) - igraph::E(author.relation)$type = TYPE.EDGES.INTRA + igraph::E(author.net)$type = TYPE.EDGES.INTRA - private$authors.network.issue = author.relation + private$authors.network.issue = author.net logging::logdebug("get.author.network.issue: finished.") - return(author.relation) + return(author.net) }, ## * * artifact networks ------------------------------------------- @@ -653,10 +653,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", author.vertices = names(net.to.net) } + ## select all artifact vertices artifact.vertices = unique(unlist(lapply(net.to.net, function(df) { return(df[["data.vertices"]]) }))) + ## join author and artifact vertices to the list of all vertices in the network vertices = data.frame( name = c(author.vertices, artifact.vertices), type = c(rep(TYPE.AUTHOR, length(author.vertices)), rep(TYPE.ARTIFACT, length(artifact.vertices))), @@ -666,7 +668,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(vertices) }) - ## Merge network data + ## Merge network data and construct a vertex-only network for now vertex.data = merge.network.data(vertex.data = vertex.data, edge.data = NULL)[["vertices"]] vertex.network = igraph::graph.data.frame( d = create.empty.edge.list(), @@ -741,7 +743,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## configured by 'author.only.committers', for example), thus, we need to remove any authors ## from the author--artifact relation that are superfluous authors.from.net = igraph::get.vertex.attribute(authors.net, "name") - ## save relation and intersect the author vertices from the author network and the ## bipartite networks authors.to.artifacts = mapply(function(a2a.rel, relation.type) { @@ -1052,7 +1053,7 @@ merge.network.data = function(vertex.data, edge.data) { )) } -#' Merges a list of network to one big network +#' Merges a list of networks to one big network #' #' @param networks the list of networks #' @@ -1148,7 +1149,7 @@ create.empty.network = function(directed = TRUE) { #' #' @return the new empty data frame as edge list create.empty.edge.list = function() { - return(data.frame(from = character(0),to = character(0))) + return(data.frame(from = character(0), to = character(0))) } @@ -1164,21 +1165,22 @@ simplify.network = function(network) { logging::logdebug("simplify.network: starting.") logging::loginfo("Simplifying network.") - ## data frame of the network - edge.data = igraph::as_data_frame(network, what = "edges") - vertex.data = igraph::as_data_frame(network, what = "vertices") + if (length(unique(igraph::get.edge.attribute(network, "relation"))) > 1) { + ## data frame of the network + edge.data = igraph::as_data_frame(network, what = "edges") + vertex.data = igraph::as_data_frame(network, what = "vertices") - ## select edges of one relation, build the network and simplify this network - if ("relation" %in% colnames(edge.data)) { + ## select edges of one relation, build the network and simplify this network networks = lapply(unique(edge.data[["relation"]]), function(relation) { network.data = edge.data[edge.data[["relation"]] == relation, ] - net = igraph::graph_from_data_frame(d=network.data, + net = igraph::graph_from_data_frame(d = network.data, vertices = vertex.data, directed = igraph::is.directed(network)) ## simplify networks (contract edges and remove loops) net = igraph::simplify(net, edge.attr.comb = EDGE.ATTR.HANDLING, remove.loops = TRUE) + ## TODO perform simplification on edge list? return(net) }) @@ -1194,9 +1196,6 @@ simplify.network = function(network) { #' Simplify a list of networks. #' -#' Important notice: This function only correctly works for networks with only one artifact and one -#' author relation. -#' #' @param networks the list of networks #' #' @return the simplified networks diff --git a/util-plot.R b/util-plot.R index 24b607c0..df69980b 100644 --- a/util-plot.R +++ b/util-plot.R @@ -53,7 +53,7 @@ PLOT.VERTEX.LABEL.COLOR = "gray60" #' #' Note: The names for the vertex types are taken from the variables \code{PLOT.VERTEX.TYPE.AUTHOR} and #' \code{PLOT.VERTEX.TYPE.ARTIFACT}. The defaults are \code{"Developer"} and \code{TYPE.ARTIFACT}, respectively. -#' All loops are deleted before plotting the network. +#' All loops are deleted for plotting the network. #' #' @param network the network to plot and print #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] @@ -75,7 +75,7 @@ plot.network = function(network, labels = TRUE) { #' #' Note: The names for the vertex types are taken from the variables \code{PLOT.VERTEX.TYPE.AUTHOR} and #' \code{PLOT.VERTEX.TYPE.ARTIFACT}. The defaults are \code{"Developer"} and \code{TYPE.ARTIFACT}, respectively. -#' All loops are deleted before plotting the network. +#' All loops are deleted for plotting the network. #' #' @param network the network to plot and print #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] @@ -98,7 +98,7 @@ plot.print.network = function(network, labels = TRUE) { #' #' Note: The names for the vertex types are taken from the variables \code{PLOT.VERTEX.TYPE.AUTHOR} and #' \code{PLOT.VERTEX.TYPE.ARTIFACT}. The defaults are \code{"Developer"} and \code{TYPE.ARTIFACT}, respectively. -#' All loops are deleted before plotting the network. +#' All loops are deleted for plotting the network. #' #' @param network the network to plot #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] @@ -167,14 +167,6 @@ plot.get.plot.for.network = function(network, labels = TRUE) { # ggraph::scale_edge_colour_viridis(name = "Relations", option = "magma", discrete = TRUE, # end = 0.85, begin = 0, direction = 1) + - # ## scale vertices (colors and styles) - # ggplot2::scale_shape_manual("Vertices", values = PLOT.SHAPE.VERTEX, labels = PLOT.VERTEX.TYPES) + - # ggplot2::scale_color_manual("Vertices", values = colors.vertex, labels = PLOT.VERTEX.TYPES) + - # - # ## scale edges (colors and styles) - # ggraph::scale_edge_linetype_manual("Relation Types", values = PLOT.SHAPE.EDGE) + - # ggraph::scale_edge_colour_manual("Relations", values = colors.edge.relation) + - ## theme ggplot2::theme_light() + ggplot2::guides( From d791df8e2c41314f86c36b3af566141e7713f46c Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Thu, 26 Apr 2018 17:05:35 +0200 Subject: [PATCH 20/51] Change plot function: edge width depends on edge weight Signed-off-by: Barbara Eckl --- util-plot.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/util-plot.R b/util-plot.R index df69980b..e301b104 100644 --- a/util-plot.R +++ b/util-plot.R @@ -134,8 +134,7 @@ plot.get.plot.for.network = function(network, labels = TRUE) { if (igraph::ecount(network) > 0) { p = p + ggraph::geom_edge_fan( - mapping = ggplot2::aes(colour = relation, linetype = edge.type), - width = 0.5, + mapping = ggplot2::aes(colour = relation, linetype = edge.type, width = 0.3 + 0.5 * log(weight)), end_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), start_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), arrow = if (igraph::is.directed(network)) { From 86f39c588c29a9f39f3857ac1d2de1eabac2df14 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Thu, 26 Apr 2018 18:27:42 +0200 Subject: [PATCH 21/51] Remove unneeded browser() statements Signed-off-by: Claus Hunsen --- util-conf.R | 2 -- util-networks.R | 1 - 2 files changed, 3 deletions(-) diff --git a/util-conf.R b/util-conf.R index bd22c4f0..4e8ed2d9 100644 --- a/util-conf.R +++ b/util-conf.R @@ -90,7 +90,6 @@ Conf = R6::R6Class("Conf", #' - allowed, and #' - allowed.number. check.value = function(value, name) { - # browser(expr = name == "revisions") if (!exists(name, where = private[["attributes"]])) { result = c(existing = FALSE) } else { @@ -810,7 +809,6 @@ get.configuration.string = function(conf, title = deparse(substitute(conf))) { if (len == 1) { field = paste0(" = ", paste(struct, collapse = ", "), "\n") } else if (len > 7) { - # browser() entries = paste0(indentation, indentation.item, indentation.item, struct) field = paste0( " = [\n", diff --git a/util-networks.R b/util-networks.R index a7a62a7d..447d6cf6 100644 --- a/util-networks.R +++ b/util-networks.R @@ -764,7 +764,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.all = plyr::rbind.fill(artifacts) artifacts.from.net = igraph::get.vertex.attribute(artifacts.net, "name") - # browser(expr = length(setdiff(artifacts.all, artifacts.from.net)) > 0) artifacts.to.add = setdiff(artifacts.all[["data.vertices"]], artifacts.from.net) artifacts.to.add.kind = artifacts.all[ artifacts.all[["data.vertices"]] %in% artifacts.to.add, "artifact.type" ] artifacts.net = artifacts.net + igraph::vertices(artifacts.to.add, type = TYPE.ARTIFACT, From 84516fcbae9dad37db6721c06345dc549e121c20 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Thu, 26 Apr 2018 19:07:45 +0200 Subject: [PATCH 22/51] Streamline and improve vertex and edge attributes In this patch, we improve the recently introduced vertex and edge attributes. Details are listed below. Changes regarding vertex attributes: - The attribute 'type' is either TYPE.AUTHOR or TYPE.ARTIFACT. - The attribute 'kind' now contains solely artifact types (such as "Feature" or "Function") and "Author" as values. - The attribute 'artifact.type' is removed completely as it has been partly redundant to the 'kind' attribute. Changes regarding edge attributes: - The attribute 'type' is either TYPE.EDGES.INTRA or TYPE.EDGES.INTER. - The attribute 'artifact.type' is sourced solely from the column 'artifact.type', present in all data sources. - The attribute 'relation' is filled by the NetworkConf values for 'author.relation' or 'artifact.relation', respectively. Additionally, some TODO items are added or adapted. Signed-off-by: Claus Hunsen Signed-off-by: Barbara Eckl --- NEWS.md | 2 +- README.md | 20 ++-- tests/test-networks-artifact.R | 25 ++--- tests/test-networks-author.R | 55 ++++++----- tests/test-networks-bipartite.R | 130 ++++++++++++------------ tests/test-networks-multi-relation.R | 44 ++++----- tests/test-networks-multi.R | 67 ++++++------- util-conf.R | 16 ++- util-networks.R | 142 ++++++++++++++++++--------- 9 files changed, 282 insertions(+), 219 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6e1ccda8..74333b3d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,7 +14,7 @@ - handle more than one relation type and merge the resulting vertex lists and edge lists in `get.bipartite.network` - enable for different relations for `authors.to.artifacts` in `get.multi.network`, add information about the relation and merge vertex sets -- add new vertex attribute `kind`, which include `artifact.type` and the `type` of author vertices +- add new vertex attribute `kind` (7c628fb93eb21f280c7d9da66680f817e107fa24, 7ad49c4ad937c9a6c7398a45179e25d5d5c03faa) - remove vertex attribute `id` in artifact vertices (7ad49c4ad937c9a6c7398a45179e25d5d5c03faa) diff --git a/README.md b/README.md index b56c847a..086b68f0 100644 --- a/README.md +++ b/README.md @@ -240,13 +240,14 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. * The list of edge-attribute names and information * a subset of the following as a single vector: - timestamp information: *`"date"`*, `"date.offset"` + - general information: *`"artifact.type"`* - author information: `"author.name"`, `"author.email"` - committer information: `"committer.date"`, `"committer.name"`, `"committer.email"` - e-mail information: *`"message.id"`*, *`"thread"`*, `"subject"` - - commit information: *`"hash"`*, *`"file"`*, *`"artifact.type"`*, *`"artifact"`*, `"changed.files"`, `"added.lines"`, `"deleted.lines"`, `"diff.size"`, `"artifact.diff.size"`, `"synchronicity"` + - commit information: *`"hash"`*, *`"file"`*, *`"artifact"`*, `"changed.files"`, `"added.lines"`, `"deleted.lines"`, `"diff.size"`, `"artifact.diff.size"`, `"synchronicity"` - PaStA information: `"pasta"`, - issue information: *`"issue.id"`*, *`"event.name"`*, `"issue.state"`, `"creation.date"`, `"closing.date"`, `"is.pull.request"` - * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting. + * **Note**: `"date"` and `"artifact.type"` are always included as this information is needed for several parts of the library, e.g., time-based splitting. * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected. * **Note**: For the edge attributes `"pasta"` and `"synchronicity"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below). - `simplify` @@ -269,15 +270,16 @@ For more examples, please look in the file `showcase.R`. ### Network properties -- mandatory vertex attributes (`vertex.attributes`) - * *`"name"`* - * *`"type"`*: [`Author`, `Artifact`] - * `"artifact.type"`: [`file`, `feature`, `function`,`mail`, `issue`,`featureexpression`] - * *`"kind"`*: [`author`,`file`, `feature`, `function`,`mail`, `issue`,`featureexpression`] -- mandatory edge attributes +- Mandatory vertex attributes + * *`"type"`*: [`"Author"`, `"Artifact"`] + * *`"kind"`*: [`"Author"`,`"File"`, `"Feature"`, `"Function"`, `"Mail"`, `"Issue"`,`"FeatureExpression"`] + * *`"name"`* + +- Mandatory edge attributes * *`"type"`*: [`Unipartite`, `Bipartite`] - * *`"relation"`*: [`mail`, `cochange`, `issue`, `callgraph`] + * *`"artifact.type"`*: [`"Author"`,`"File"`, `"Feature"`, `"Function"`, `"Mail"`, `"Issue"`,`"FeatureExpression"`] + * *`"relation"`*: [`mail`, `cochange`, `issue`, `callgraph`] (from `artifact.relation` and `author.relation` attributes in the `NetworkConf` class) * *`"date"`* diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index f7080584..d2f1fa5a 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -49,20 +49,21 @@ test_that("Network construction of the undirected artifact-cochange network", { ## vertex attributes vertices = data.frame(name = c("Base_Feature", "foo", "A"), - artifact.type = "feature", - kind = "feature", + kind = "Feature", type = TYPE.ARTIFACT) - data = data.frame(from = c("Base_Feature", "Base_Feature"), - to = c("foo", "foo"), - date = get.date.from.string(c("2016-07-12 16:06:32", "2016-07-12 16:06:32")), - hash = c("0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test2.c", "test2.c"), - artifact.type = c("Feature", "Feature"), - artifact = c("Base_Feature", "foo"), - weight = c(1, 1), - relation = c("cochange", "cochange"), - type = TYPE.EDGES.INTRA) + data = data.frame( + from = c("Base_Feature", "Base_Feature"), + to = c("foo", "foo"), + date = get.date.from.string(c("2016-07-12 16:06:32", "2016-07-12 16:06:32")), + artifact.type = c("Feature", "Feature"), + hash = c("0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test2.c", "test2.c"), + artifact = c("Base_Feature", "foo"), + weight = 1, + type = TYPE.EDGES.INTRA, + relation = "cochange" + ) ## build expected network network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = vertices) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 9e564dde..90441657 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -216,17 +216,20 @@ test_that("Network construction of the undirected author-cochange network", { ## edge attributes data = data.frame(comb.1. = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl"), comb.2. = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas"), - date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:10", - "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:10", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", - "1143db502761379c2bfcecc2007fc34282e7ee61", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", + "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", + "2016-07-12 16:06:10", "2016-07-12 16:06:32")), + artifact.type = "Feature", + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), - artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature"), - weight = c(1, 1, 1, 1, 1, 1, 1, 1), + artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", + "Base_Feature", "Base_Feature"), + weight = 1, type = TYPE.EDGES.INTRA, - relation = c("cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange") + relation = "cochange" ) ## build expected network @@ -260,14 +263,14 @@ test_that("Network construction of the directed author-cochange network", { to = c("Björn", "Olaf", "Olaf", "Karl"), date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), + artifact.type = "Feature", hash = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), file = c("test.c", "test3.c", "test2.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature"), artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), - weight = c(1, 1, 1, 1), + weight = 1, type = TYPE.EDGES.INTRA, - relation = c("cochange", "cochange", "cochange", "cochange") + relation = "cochange" ) ## build expected network @@ -297,18 +300,24 @@ test_that("Network construction of the undirected simplified author-cochange net type = TYPE.AUTHOR) ## edge attributes - data = data.frame(from = c("Björn", "Olaf", "Olaf", "Karl"), - to = c("Olaf", "Karl", "Thomas", "Thomas"), - date = I(list(c(1468339139, 1468339245), c(1468339541, 1468339570), c(1468339541, 1468339592), c(1468339570, 1468339592))), - hash = I(list(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"), c("3a0ed78458b3976243db6829f63eba3eead26774", - "1143db502761379c2bfcecc2007fc34282e7ee61"), c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"))), - file = I(list(c("test.c", "test.c"), c("test2.c", "test3.c"), c("test2.c", "test2.c"), c("test3.c", "test2.c"))), - artifact.type = I(list(c("Feature", "Feature"), c("Feature", "Feature"), c("Feature", "Feature"), c("Feature", "Feature"))), - artifact = I(list(c("A", "A"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"))), - weight = c(2, 2, 2, 2), - type = TYPE.EDGES.INTRA, - relation = c("cochange", "cochange", "cochange", "cochange") + data = data.frame( + from = c("Björn", "Olaf", "Olaf", "Karl"), + to = c("Olaf", "Karl", "Thomas", "Thomas"), + date = I(list(c(1468339139, 1468339245), c(1468339541, 1468339570), c(1468339541, 1468339592), + c(1468339570, 1468339592))), + artifact.type = I(list(c("Feature", "Feature"), c("Feature", "Feature"), c("Feature", "Feature"), + c("Feature", "Feature"))), + hash = I(list( + c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"), + c("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"))), + file = I(list(c("test.c", "test.c"), c("test2.c", "test3.c"), c("test2.c", "test2.c"), c("test3.c", "test2.c"))), + artifact = I(list(c("A", "A"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"), + c("Base_Feature", "Base_Feature"))), + weight = 2, + type = TYPE.EDGES.INTRA, + relation = "cochange" ) ## build expected network diff --git a/tests/test-networks-bipartite.R b/tests/test-networks-bipartite.R index 10079997..30d17840 100644 --- a/tests/test-networks-bipartite.R +++ b/tests/test-networks-bipartite.R @@ -52,14 +52,12 @@ test_that("Construction of the bipartite network for the feature artifact with a authors = data.frame( name = c("Björn", "Karl", "Olaf", "Thomas"), type = TYPE.AUTHOR, - artifact.type = NA, kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("A", "Base_Feature", "foo"), type = TYPE.ARTIFACT, - artifact.type = "feature", - kind = "feature" + kind = "Feature" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -68,18 +66,19 @@ test_that("Construction of the bipartite network for the feature artifact with a to = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), + artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), - type = TYPE.EDGES.INTER, weight = 1, + type = TYPE.EDGES.INTER, relation = "cochange" ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -106,14 +105,12 @@ test_that("Construction of the bipartite network for the file artifact with auth authors = data.frame( name = c("Björn", "Olaf", "Thomas"), type = TYPE.AUTHOR, - artifact.type = NA, kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("test.c", "test2.c"), type = TYPE.ARTIFACT, - artifact.type = "file", - kind = "file" + kind = "File" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -122,20 +119,21 @@ test_that("Construction of the bipartite network for the file artifact with auth to = c("test.c", "test.c", "test2.c", "test2.c"), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + artifact.type = c("File", "File", "File", "File"), hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("File", "File", "File", "File"), artifact = c("test.c", "test.c", "test2.c", "test2.c"), + weight = 1, type = TYPE.EDGES.INTER, - weight = c(1, 1, 1, 1), - relation = c("cochange", "cochange", "cochange", "cochange") + relation = "cochange" ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the bipartite network for the function artifact with author.relation = 'cochange' and artifact. @@ -159,14 +157,12 @@ test_that("Construction of the bipartite network for the function artifact with authors = data.frame( name = c("Björn", "Olaf", "Thomas"), type = TYPE.AUTHOR, - artifact.type = NA, kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("File_Level"), type = TYPE.ARTIFACT, - artifact.type = "function", - kind ="function" + kind ="Function" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -175,19 +171,20 @@ test_that("Construction of the bipartite network for the function artifact with to = c("File_Level", "File_Level", "File_Level", "File_Level"), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + artifact.type = c("Function", "Function", "Function", "Function"), hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("Function", "Function", "Function", "Function"), artifact = c("File_Level", "File_Level", "File_Level", "File_Level"), + weight = 1, type = TYPE.EDGES.INTER, - weight = c(1, 1, 1, 1), - relation = c("cochange", "cochange", "cochange", "cochange")) + relation = "cochange" + ) ## 3) construct expected network network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the bipartite network for the featureexpression artifact with author.relation = 'cochange' and artifact. relation = 'cochange'.", { @@ -210,14 +207,12 @@ test_that("Construction of the bipartite network for the featureexpression artif authors = data.frame( name = c("Björn", "Olaf"), type = TYPE.AUTHOR, - artifact.type = NA, kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("defined(A)", "Base_Feature"), type = TYPE.ARTIFACT, - artifact.type = "featureexpression", - kind = "featureexpression" + kind = "FeatureExpression" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -226,19 +221,21 @@ test_that("Construction of the bipartite network for the featureexpression artif to = c("defined(A)", "defined(A)", "Base_Feature"), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41")), + artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774"), file = c("test.c", "test.c", "test2.c"), - artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), artifact = c("defined(A)", "defined(A)", "Base_Feature"), + weight = 1, type = TYPE.EDGES.INTER, - weight = c(1, 1, 1), - relation = c("cochange", "cochange", "cochange")) + relation = "cochange" + ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the bipartite network for the feature artifact with author.relation = 'cochange' and artifact. relation = 'issue'.", { @@ -261,14 +258,12 @@ test_that("Construction of the bipartite network for the feature artifact with a authors = data.frame( name =c("Björn", "Karl", "Max", "Olaf", "Thomas"), type = TYPE.AUTHOR, - artifact.type = NA, kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("", "", "", ""), type = TYPE.ARTIFACT, - artifact.type = "issue", - kind = "issue" + kind = "Issue" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -290,14 +285,15 @@ test_that("Construction of the bipartite network for the feature artifact with a event.name = c("commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented"), + weight = 1, type = TYPE.EDGES.INTER, - weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), - relation = "issue") + relation = "issue" + ) ## 3) construct expected network network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the directed bipartite network for the feature artifact with author.relation = 'cochange' and artifact. relation = 'cochange'.", { @@ -321,14 +317,12 @@ test_that("Construction of the directed bipartite network for the feature artifa authors = data.frame( name = c("Björn", "Karl", "Olaf", "Thomas"), type = TYPE.AUTHOR, - artifact.type = NA, kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("A", "Base_Feature", "foo"), type = TYPE.ARTIFACT, - artifact.type = "feature", - kind = "feature" + kind = "Feature" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -337,20 +331,22 @@ test_that("Construction of the directed bipartite network for the feature artifa to = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), + artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), + weight = 1, type = TYPE.EDGES.INTER, - weight = c(1, 1, 1, 1, 1, 1), - relation = c("cochange", "cochange", "cochange", "cochange", "cochange", "cochange")) + relation = "cochange" + ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the directed bipartite network for the file artifact with author.relation = 'cochange' and artifact. relation = 'cochange'.", { @@ -374,14 +370,12 @@ test_that("Construction of the directed bipartite network for the file artifact authors = data.frame( name = c("Björn", "Olaf", "Thomas"), type = TYPE.AUTHOR, - artifact.type = NA, kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("test.c", "test2.c"), type = TYPE.ARTIFACT, - artifact.type = "file", - kind ="file" + kind = "File" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -390,19 +384,21 @@ test_that("Construction of the directed bipartite network for the file artifact to = c("test.c", "test.c", "test2.c", "test2.c"), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + artifact.type = c("File", "File", "File", "File"), hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("File", "File", "File", "File"), artifact = c("test.c", "test.c", "test2.c", "test2.c"), + weight = 1, type = TYPE.EDGES.INTER, - weight = c(1, 1, 1, 1), - relation = c("cochange", "cochange", "cochange", "cochange")) + relation = "cochange" + ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the directed bipartite network for the function artifact with author.relation = 'cochange' and artifact. @@ -427,14 +423,12 @@ test_that("Construction of the directed bipartite network for the function artif authors = data.frame( name = c("Björn", "Olaf", "Thomas"), type = TYPE.AUTHOR, - artifact.type = NA, kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("File_Level"), type = TYPE.ARTIFACT, - artifact.type = "function", - kind = "function" + kind = "Function" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -443,19 +437,21 @@ test_that("Construction of the directed bipartite network for the function artif to = c("File_Level", "File_Level", "File_Level", "File_Level"), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + artifact.type = c("Function", "Function", "Function", "Function"), hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("Function", "Function", "Function", "Function"), artifact = c("File_Level", "File_Level", "File_Level", "File_Level"), + weight = 1, type = TYPE.EDGES.INTER, - weight = c(1, 1, 1, 1), - relation = c("cochange", "cochange", "cochange", "cochange")) + relation = "cochange" + ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the directed bipartite network for the featureexpression artifact with author.relation = 'cochange' and artifact. relation = 'cochange'.", { @@ -479,14 +475,12 @@ test_that("Construction of the directed bipartite network for the featureexpress authors = data.frame( name = c("Björn", "Olaf"), type = TYPE.AUTHOR, - artifact.type = NA, kind = TYPE.AUTHOR ) artifacts = data.frame( name = c("defined(A)", "Base_Feature"), type = TYPE.ARTIFACT, - artifact.type = "featureexpression", - kind = "featureexpression" + kind = "FeatureExpression" ) vertices = plyr::rbind.fill(authors, artifacts) ## 2) construct expected edge attributes @@ -495,16 +489,18 @@ test_that("Construction of the directed bipartite network for the featureexpress to = c("defined(A)", "defined(A)", "Base_Feature"), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41")), + artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774"), file = c("test.c", "test.c", "test2.c"), - artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), artifact = c("defined(A)", "defined(A)", "Base_Feature"), + weight = 1, type = TYPE.EDGES.INTER, - weight = c(1, 1, 1), - relation = c("cochange", "cochange", "cochange")) + relation = "cochange" + ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) diff --git a/tests/test-networks-multi-relation.R b/tests/test-networks-multi-relation.R index b8cc3647..348b1424 100644 --- a/tests/test-networks-multi-relation.R +++ b/tests/test-networks-multi-relation.R @@ -59,6 +59,8 @@ test_that("Network construction of the undirected author network with relation = "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", # mail "2016-07-12 16:05:37")), + artifact.type = c(rep("Feature", 8), # cochange + rep("Mail", 4)), # mail hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", @@ -66,13 +68,11 @@ test_that("Network construction of the undirected author network with relation = rep(NA, 4)), file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", rep(NA, 4)), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", #cochange - rep("Mail", 4)), #mail artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", rep(NA, 4)), weight = 1, type = TYPE.EDGES.INTRA, - relation = c("cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange", "cochange", + relation = c(rep("cochange", 8), rep("mail", 4)), message.id = c(NA, NA, NA, NA, NA, NA, NA, NA, "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", @@ -83,7 +83,8 @@ test_that("Network construction of the undirected author network with relation = ) ## build expected network - network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = authors) + network.expected = igraph::graph.data.frame(data, vertices = authors, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -110,27 +111,23 @@ test_that("Construction of the bipartite network for the feature artifact with a authors1 = data.frame( # author -- issue name = c("Björn", "Karl", "Max", "Olaf", "Thomas"), type = TYPE.AUTHOR, - artifact.type = NA, kind = TYPE.AUTHOR ) authors2 = data.frame( # author --mail name = c("Fritz fritz@example.org", "georg", "Hans", "udo"), type = TYPE.AUTHOR, - artifact.type = NA, kind = TYPE.AUTHOR ) issues = data.frame( name = c("", "", "", ""), type = TYPE.ARTIFACT, - artifact.type = "issue", - kind = "issue" + kind = "Issue" ) threads = data.frame( name = c("", "", "", "", "", "", "", "", ""), type = TYPE.ARTIFACT, - artifact.type = "thread", - kind = "thread" + kind = "Mail" ) vertices = plyr::rbind.fill(authors1, issues, authors2, threads) ## 2) construct expected edge attributes @@ -161,12 +158,10 @@ test_that("Construction of the bipartite network for the feature artifact with a "", "", "", "", "", "", "", "", "", "", rep(NA,16)), - event.name = c("commented", "commented", "commented", "commented", "commented", - "commented", "commented", "commented", "commented", "commented", - "commented", "commented", "commented", "commented", "commented", + event.name = c(rep("commented", 15), rep(NA, 16)), - type = TYPE.EDGES.INTER, weight = 1, + type = TYPE.EDGES.INTER, relation = c(rep("issue", 15), rep("mail", 16)), message.id = c(rep(NA, 15), "", "<1107974989.17910.6.camel@jmcmullan>", @@ -183,7 +178,8 @@ test_that("Construction of the bipartite network for the feature artifact with a "", "", "", "") ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -209,9 +205,8 @@ test_that("Construction of the multi network for the feature artifact with autho vertices = data.frame( name = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz fritz@example.org", "georg", "Hans", "Base_Feature", "foo", "A", "", "", "", ""), - kind = c(rep(TYPE.AUTHOR, 8), rep("feature", 3), rep("issue", 4)), - type = c(rep(TYPE.AUTHOR, 8), rep(TYPE.ARTIFACT, 7)), - artifact.type = c(rep(NA, 8), rep("feature", 3), rep("issue", 4)) + kind = c(rep(TYPE.AUTHOR, 8), rep("Feature", 3), rep("Issue", 4)), + type = c(rep(TYPE.AUTHOR, 8), rep(TYPE.ARTIFACT, 7)) ) row.names(vertices) = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz fritz@example.org", "georg", "Hans", "Base_Feature", "foo", "A", "", "", "", "") @@ -240,6 +235,11 @@ test_that("Construction of the multi network for the feature artifact with autho "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2016-07-27 22:25:25", "2016-10-05 01:07:46", "2016-12-07 15:37:21", "2013-05-05 23:28:57", "2016-07-14 02:03:14", "2016-07-15 08:37:57")), + artifact.type = c(rep("Feature", 8), + rep("Mail", 4), + rep("Feature", 2), + rep("Feature", 6), + rep("Issue", 14)), hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", # author cochange "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", @@ -255,11 +255,6 @@ test_that("Construction of the multi network for the feature artifact with autho "test2.c", "test2.c", # artifact cochange "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", # bipartite cochange rep(NA, 14)), - artifact.type = c(rep("Feature", 8), - rep("Mail", 4), - rep("Feature", 2), - rep("Feature", 6), - rep("Issue", 14)), artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", # author cochange "Base_Feature", rep(NA, 4), @@ -289,7 +284,8 @@ test_that("Construction of the multi network for the feature artifact with autho event.name = c(rep(NA, 20), rep("commented", 14)) ) - network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.expected = igraph::graph.data.frame(edges, vertices = vertices, + directed = net.conf$get.value("author.directed")) expected.edges = igraph::as_data_frame(network.expected, what = "edges") expected.vertices = igraph::as_data_frame(network.expected, what = "vertices") diff --git a/tests/test-networks-multi.R b/tests/test-networks-multi.R index f16d6845..fcdcd9df 100644 --- a/tests/test-networks-multi.R +++ b/tests/test-networks-multi.R @@ -50,43 +50,44 @@ test_that("Construction of the multi network for the feature artifact with autho ## build expected network vertices = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas", "Base_Feature", "foo", "A"), - kind = c(rep(TYPE.AUTHOR, 4), rep("feature", 3)), - type = c(rep(TYPE.AUTHOR, 4), rep(TYPE.ARTIFACT, 3)), - artifact.type = c(rep(NA, 4), rep("feature", 3)) + kind = c(rep(TYPE.AUTHOR, 4), rep("Feature", 3)), + type = c(rep(TYPE.AUTHOR, 4), rep(TYPE.ARTIFACT, 3)) ) row.names(vertices) = c("Björn", "Olaf", "Karl", "Thomas", "Base_Feature", "foo", "A") - edges = data.frame(from= c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", - "Base_Feature", "Base_Feature", "Björn", "Olaf", "Olaf", "Karl", "Thomas", - "Thomas"), - to = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", "foo", - "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo"), - date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", - "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", - "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32", - "2016-07-12 16:06:32", "2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:10", "2016-07-12 16:06:32", - "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", - "test2.c", "test2.c", "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", - "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", - "Feature", "Feature"), - artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", - "Base_Feature", "Base_Feature", "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", - "foo"), - weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), - type = c(rep(TYPE.EDGES.INTRA, 10), rep(TYPE.EDGES.INTER, 6)), - relation = c(rep("cochange", 16))) + edges = data.frame( + from = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", + "Base_Feature", "Base_Feature", "Björn", "Olaf", "Olaf", "Karl", "Thomas", + "Thomas"), + to = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", "foo", + "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", + "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", + "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32", + "2016-07-12 16:06:32", "2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:10", "2016-07-12 16:06:32", + "2016-07-12 16:06:32")), + artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", + "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", + "Feature", "Feature"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", + "test2.c", "test2.c", "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c"), + artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", + "Base_Feature", "Base_Feature", "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", + "foo"), + weight = 1, + type = c(rep(TYPE.EDGES.INTRA, 10), rep(TYPE.EDGES.INTER, 6)), + relation = "cochange" + ) network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/util-conf.R b/util-conf.R index 4e8ed2d9..f614791e 100644 --- a/util-conf.R +++ b/util-conf.R @@ -687,15 +687,17 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, ), edge.attributes = list( default = c( - "date", # general + "date", "artifact.type", # general "message.id", "thread", # mail data - "hash", "file", "artifact.type", "artifact", # commit data + "hash", "file", "artifact", # commit data "issue.id", "event.name" # issue data ), type = "character", allowed = c( # the date "date", "date.offset", + # the artifact type indicating the edge + "artifact.type", # author information "author.name", "author.email", # committer information @@ -703,7 +705,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, # e-mail information "message.id", "thread", "subject", ## commit information - "hash", "file", "artifact.type", "artifact", "changed.files", "added.lines", + "hash", "file", "artifact", "changed.files", "added.lines", "deleted.lines", "diff.size", "artifact.diff.size", "synchronicity", # pasta information "pasta", @@ -752,9 +754,13 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, update.values = function(updated.values = list(), stop.on.error = FALSE) { super$update.values(updated.values = updated.values, stop.on.error = stop.on.error) - ## 1) "date" always as edge attribute + ## 1) "date" and "artifact.type" always as edge attribute name = "edge.attributes" - private[["attributes"]][[name]][["value"]] = unique(c(self$get.value(name), "date")) + private[["attributes"]][[name]][["value"]] = unique(c( + "date", + "artifact.type", + self$get.value(name) + )) ## return invisible invisible() diff --git a/util-networks.R b/util-networks.R index 447d6cf6..d8f64fb6 100644 --- a/util-networks.R +++ b/util-networks.R @@ -122,13 +122,14 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$authors.network.cochange) } - ## construct network based on artifact2author data + ## construct edge list based on artifact2author data author.net.data = construct.edge.list.from.key.value.list( private$proj.data$get.artifact2author(), network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) + ## construct network from obtained data author.net = construct.network.from.edge.list( author.net.data[["vertices"]], author.net.data[["edges"]], @@ -136,8 +137,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", directed = private$network.conf$get.value("author.directed") ) - igraph::E(author.net)$type = TYPE.EDGES.INTRA - ## store network private$authors.network.cochange = author.net logging::logdebug("get.author.network.cochange: finished.") @@ -159,18 +158,21 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$authors.network.mail) } + + ## construct edge list based on thread2author data author.net.data = construct.edge.list.from.key.value.list( private$proj.data$get.thread2author(), network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) + + ## construct network from obtained data author.net = construct.network.from.edge.list( author.net.data[["vertices"]], author.net.data[["edges"]], network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) - igraph::E(author.net)$type = TYPE.EDGES.INTRA ## store network private$authors.network.mail = author.net @@ -188,11 +190,14 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$authors.network.issue) } + ## construct edge list based on issue2author data author.net.data = construct.edge.list.from.key.value.list( private$proj.data$get.issue2author(), network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) + + ## construct network from obtained data author.net = construct.network.from.edge.list( author.net.data[["vertices"]], author.net.data[["edges"]], @@ -200,8 +205,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", directed = private$network.conf$get.value("author.directed") ) - igraph::E(author.net)$type = TYPE.EDGES.INTRA - private$authors.network.issue = author.net logging::logdebug("get.author.network.issue: finished.") @@ -224,11 +227,15 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$artifacts.network.cochange) } + ## construct edge list based on commit2artifact data + artifacts.net.data.raw = private$proj.data$get.commit2artifact() artifacts.net.data = construct.edge.list.from.key.value.list( - private$proj.data$get.commit2artifact(), + artifacts.net.data.raw, network.conf = private$network.conf, directed = FALSE ) + + ## construct network from obtained data artifacts.net = construct.network.from.edge.list( artifacts.net.data[["vertices"]], artifacts.net.data[["edges"]], @@ -236,8 +243,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", directed = FALSE ) - artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", - value = private$proj.data$get.project.conf.entry("artifact")) + ## set network attribute 'vertex.kind' to ensure the correct setting of the + ## vertex attribute 'kind' later + artifacts.net = igraph::set.graph.attribute( + artifacts.net, "vertex.kind", + value = private$proj.data$get.project.conf.entry("artifact.codeface") + ) ## store network private$artifacts.network.cochange = artifacts.net @@ -278,6 +289,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## read network from disk artifacts.net = igraph::read.graph(file, format = "pajek") + # set vertex labels properly (copy "id" attribute to "name" attribute) artifacts.net = igraph::set.vertex.attribute( artifacts.net, @@ -312,8 +324,19 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", } ## (3) set processed names inside graph object artifacts.net = igraph::set.vertex.attribute(artifacts.net, "name", value = names) - artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", - value = private$proj.data$get.project.conf.entry("artifact")) + + ## set edge attribute 'artifact.type' as the raw data do not contain this! + artifacts.net = igraph::set.edge.attribute( + artifacts.net, "artifact.type", + value = private$proj.data$get.project.conf.entry("artifact.codeface") + ) + + ## set network attribute 'vertex.kind' to ensure the correct setting of the + ## vertex attribute 'kind' later + artifacts.net = igraph::set.graph.attribute( + artifacts.net, "vertex.kind", + value = private$proj.data$get.project.conf.entry("artifact.codeface") + ) ## store network private$artifacts.network.callgraph = artifacts.net @@ -344,11 +367,19 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## construct empty network directed = private$network.conf$get.value("artifact.directed") - artifacts = names(private$proj.data$get.thread2author()) # thread IDs - artifacts.net = create.empty.network(directed = directed) + - igraph::vertices(artifacts, type = TYPE.ARTIFACT) + artifacts.net.data.raw = private$proj.data$get.thread2author() + artifacts = names(artifacts.net.data.raw) # thread IDs + artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts) + + ## set network attribute 'vertex.kind' to ensure the correct setting of the + ## vertex attribute 'kind' later + if (length(artifacts.net.data.raw) > 0) { + artifacts.net = igraph::set.graph.attribute( + artifacts.net, "vertex.kind", + value = unique(artifacts.net.data.raw[[1]][["artifact.type"]]) + ) + } - artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", value = "thread") ## store network private$artifacts.network.mail = artifacts.net logging::logdebug("get.artifact.network.mail: finished.") @@ -378,11 +409,18 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## construct empty network directed = private$network.conf$get.value("artifact.directed") - artifacts = names(private$proj.data$get.issue2author()) # thread IDs - artifacts.net = create.empty.network(directed = directed) + - igraph::vertices(artifacts, type = TYPE.ARTIFACT) - - artifacts.net = igraph::set.vertex.attribute(artifacts.net, "artifact.type", value = "issue") + artifacts.net.data.raw = private$proj.data$get.issue2author() + artifacts = names(artifacts.net.data.raw) # thread IDs + artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts) + + ## set network attribute 'vertex.kind' to ensure the correct setting of the + ## vertex attribute 'kind' later + if (length(artifacts.net.data.raw) > 0) { + artifacts.net = igraph::set.graph.attribute( + artifacts.net, "vertex.kind", + value = unique(artifacts.net.data.raw[[1]][["artifact.type"]]) + ) + } ## store network private$artifacts.network.issue = artifacts.net @@ -411,24 +449,28 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", relation, cochange = { bip.relation = private$proj.data$get.author2artifact() - artifact.type = private$proj.data$get.project.conf.entry("artifact") }, callgraph = { bip.relation = private$proj.data$get.author2artifact() - artifact.type = private$proj.data$get.project.conf.entry("artifact") }, mail = { bip.relation = private$proj.data$get.author2thread() - artifact.type = "thread" }, issue = { bip.relation = private$proj.data$get.author2issue() - artifact.type = "issue" } ) - ## set artifact.type and relation attributes - attr(bip.relation, "artifact.type") = artifact.type + ## set vertex attribute 'kind' on all edges, corresponding to relation + ## FIXME remove? + if (length(bip.relation) > 0) { + vertex.kind = unique(bip.relation[[1]][["artifact.type"]]) + } else { + vertex.kind = NA + } + + ## set vertex.kind and relation attributes + attr(bip.relation, "vertex.kind") = vertex.kind attr(bip.relation, "relation") = relation return (bip.relation) @@ -552,12 +594,13 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", mail = private$get.author.network.mail(), issue = private$get.author.network.issue(), stop(sprintf("The author relation '%s' does not exist.", rel)) - ## TODO get edge and vertex data + ## TODO construct edge lists here and merge those (inline the private methods) ) + + ## set edge attributes on all edges + igraph::E(network)$type = TYPE.EDGES.INTRA igraph::E(network)$relation = relation - # the kind is in this case the same as the type attribute - igraph::V(network)$kind = TYPE.AUTHOR return(network) }) net = merge.networks(networks) @@ -582,9 +625,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", } } - ## set vertex and edge attributes for identifaction + ## set vertex attributes for identifaction + igraph::V(net)$kind = TYPE.AUTHOR igraph::V(net)$type = TYPE.AUTHOR - igraph::E(net)$type = TYPE.EDGES.INTRA ## add range attribute for later analysis (if available) if ("RangeData" %in% class(private$proj.data)) { @@ -611,21 +654,21 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", issue = private$get.artifact.network.issue(), stop(sprintf("The artifact relation '%s' does not exist.", relation)) ) + + ## set edge attributes on all edges + igraph::E(network)$type = TYPE.EDGES.INTRA igraph::E(network)$relation = relation - ## for cochange, we use the exact artifact type as vertex attribute 'kind' - if (relation == "cochange") { - igraph::V(network)$kind = private$proj.data$get.project.conf.entry("artifact") - } else { - igraph::V(network)$kind = relation - } + ## set vertex attribute 'kind' on all edges, corresponding to relation + kind = unique(igraph::get.graph.attribute(network, "vertex.kind")) + network = igraph::set.vertex.attribute(network, "kind", value = kind) + return(network) }) net = merge.networks(networks) ## set vertex and edge attributes for identifaction igraph::V(net)$type = TYPE.ARTIFACT - igraph::E(net)$type = TYPE.EDGES.INTRA ## add range attribute for later analysis (if available) if ("RangeData" %in% class(private$proj.data)) { @@ -644,7 +687,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", directed = private$network.conf$get.value("author.directed") vertex.data = lapply(bipartite.relation.data, function(net.to.net) { - artifact.type = attr(net.to.net, "artifact.type") ## extract vertices for author network if (private$network.conf$get.value("author.all.authors")) { @@ -658,12 +700,20 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(df[["data.vertices"]]) }))) + ## get vertex.kind from bipartite relation + vertex.kind = attr(net.to.net, "vertex.kind") + ## join author and artifact vertices to the list of all vertices in the network vertices = data.frame( name = c(author.vertices, artifact.vertices), - type = c(rep(TYPE.AUTHOR, length(author.vertices)), rep(TYPE.ARTIFACT, length(artifact.vertices))), - artifact.type = c(rep(NA, length(author.vertices)), rep(artifact.type, length(artifact.vertices))), - kind = c(rep(TYPE.AUTHOR, length(author.vertices)), rep(artifact.type , length(artifact.vertices))) + type = c( + rep(TYPE.AUTHOR, length(author.vertices)), + rep(TYPE.ARTIFACT, length(artifact.vertices)) + ), + kind = c( + rep(TYPE.AUTHOR, length(author.vertices)), + rep(vertex.kind , length(artifact.vertices)) + ) ) return(vertices) }) @@ -765,9 +815,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.from.net = igraph::get.vertex.attribute(artifacts.net, "name") artifacts.to.add = setdiff(artifacts.all[["data.vertices"]], artifacts.from.net) - artifacts.to.add.kind = artifacts.all[ artifacts.all[["data.vertices"]] %in% artifacts.to.add, "artifact.type" ] + artifacts.to.add.kind = artifacts.all[ + artifacts.all[["data.vertices"]] %in% artifacts.to.add, "artifact.type" + ] artifacts.net = artifacts.net + igraph::vertices(artifacts.to.add, type = TYPE.ARTIFACT, - artifact.type = artifacts.to.add.kind, kind = artifacts.to.add.kind) ## check directedness and adapt artifact network if needed @@ -784,6 +835,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", gc() ## combine the networks + ## TODO use merge.networks and add.edges.for.bipartite.relations here (remove combine.networks then!) u = combine.networks(authors.net, artifacts.net, authors.to.artifacts, network.conf = private$network.conf) @@ -1113,8 +1165,8 @@ add.edges.for.bipartite.relation = function(net, bipartite.relations, network.co return(a.df[, network.conf$get.value("edge.attributes")[cols.which], drop = FALSE]) }) extra.edge.attributes.df = plyr::rbind.fill(extra.edge.attributes.df) - extra.edge.attributes.df["type"] = TYPE.EDGES.INTER # add egde type extra.edge.attributes.df["weight"] = 1 # add weight + extra.edge.attributes.df["type"] = TYPE.EDGES.INTER # add egde type extra.edge.attributes.df["relation"] = attr(net1.to.net2, "relation") # add relation type extra.edge.attributes = as.list(extra.edge.attributes.df) From 4dead555ecb3873ff713273845fa3caeca933c4a Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Thu, 26 Apr 2018 19:41:00 +0200 Subject: [PATCH 23/51] Adjust resolution of vertex attribute 'kind' To reliably resolve the correct vertex attribute 'kind' for the current artifact relation, we now use a mapping method. This way, we are open to rename any 'kind' value in later steps. Signed-off-by: Claus Hunsen --- util-networks.R | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/util-networks.R b/util-networks.R index d8f64fb6..8ee51cc9 100644 --- a/util-networks.R +++ b/util-networks.R @@ -87,8 +87,21 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.network.mail = NULL, # igraph artifacts.network.issue = NULL, # igraph - ## * * data cutting --------------------------------------------- + ## * * relation-to-vertex-kind mapping ----------------------------- + get.vertex.kind.for.relation = function(relation) { + + vertex.kind = switch(relation, + cochange = private$proj.data$get.project.conf.entry("artifact.codeface"), + callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), + mail = "Mail", + issue = "Issue" + ) + + return(vertex.kind) + }, + + ## * * data cutting ------------------------------------------------ #' Cut the data sources of the data object to the same date ranges. cut.data.to.same.timestamps = function() { @@ -247,7 +260,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## vertex attribute 'kind' later artifacts.net = igraph::set.graph.attribute( artifacts.net, "vertex.kind", - value = private$proj.data$get.project.conf.entry("artifact.codeface") + value = private$get.vertex.kind.for.relation("cochange") ) ## store network @@ -335,7 +348,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## vertex attribute 'kind' later artifacts.net = igraph::set.graph.attribute( artifacts.net, "vertex.kind", - value = private$proj.data$get.project.conf.entry("artifact.codeface") + value = private$get.vertex.kind.for.relation("callgraph") ) ## store network @@ -373,12 +386,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set network attribute 'vertex.kind' to ensure the correct setting of the ## vertex attribute 'kind' later - if (length(artifacts.net.data.raw) > 0) { - artifacts.net = igraph::set.graph.attribute( - artifacts.net, "vertex.kind", - value = unique(artifacts.net.data.raw[[1]][["artifact.type"]]) - ) - } + artifacts.net = igraph::set.graph.attribute( + artifacts.net, "vertex.kind", + value = private$get.vertex.kind.for.relation("mail") + ) ## store network private$artifacts.network.mail = artifacts.net @@ -415,12 +426,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set network attribute 'vertex.kind' to ensure the correct setting of the ## vertex attribute 'kind' later - if (length(artifacts.net.data.raw) > 0) { - artifacts.net = igraph::set.graph.attribute( - artifacts.net, "vertex.kind", - value = unique(artifacts.net.data.raw[[1]][["artifact.type"]]) - ) - } + artifacts.net = igraph::set.graph.attribute( + artifacts.net, "vertex.kind", + value = private$get.vertex.kind.for.relation("issue") + ) ## store network private$artifacts.network.issue = artifacts.net @@ -461,16 +470,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", } ) - ## set vertex attribute 'kind' on all edges, corresponding to relation - ## FIXME remove? - if (length(bip.relation) > 0) { - vertex.kind = unique(bip.relation[[1]][["artifact.type"]]) - } else { - vertex.kind = NA - } - ## set vertex.kind and relation attributes - attr(bip.relation, "vertex.kind") = vertex.kind + attr(bip.relation, "vertex.kind") = private$get.vertex.kind.for.relation(relation) attr(bip.relation, "relation") = relation return (bip.relation) From a9605c6d56de4f69123e969844c281c226d652d3 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Thu, 26 Apr 2018 19:55:06 +0200 Subject: [PATCH 24/51] Rename artifact-vertex kind for mail relation to 'MailThread' As the vertices that we produce in the artifact relation 'mail' represent mail threads, the corresponding vertex kind is renamed to 'MailThread'. Signed-off-by: Claus Hunsen --- tests/test-networks-multi-relation.R | 2 +- util-networks.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-networks-multi-relation.R b/tests/test-networks-multi-relation.R index 348b1424..1648048d 100644 --- a/tests/test-networks-multi-relation.R +++ b/tests/test-networks-multi-relation.R @@ -127,7 +127,7 @@ test_that("Construction of the bipartite network for the feature artifact with a name = c("", "", "", "", "", "", "", "", ""), type = TYPE.ARTIFACT, - kind = "Mail" + kind = "MailThread" ) vertices = plyr::rbind.fill(authors1, issues, authors2, threads) ## 2) construct expected edge attributes diff --git a/util-networks.R b/util-networks.R index 8ee51cc9..578bcb6c 100644 --- a/util-networks.R +++ b/util-networks.R @@ -94,7 +94,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", vertex.kind = switch(relation, cochange = private$proj.data$get.project.conf.entry("artifact.codeface"), callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), - mail = "Mail", + mail = "MailThread", issue = "Issue" ) From 78c7c8777ea30f3ae871fc30186901636e77629a Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Thu, 26 Apr 2018 19:58:19 +0200 Subject: [PATCH 25/51] Rename artifact type for issue relation to 'IssueComment' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the edges that we produce for the relation 'issue' are initiated by an issue comment – and not the complete issue–, the corresponding artifact type is renamed to 'IssueComment'. Signed-off-by: Claus Hunsen --- tests/test-networks-author.R | 4 ++-- tests/test-networks-bipartite.R | 2 +- tests/test-networks-multi-relation.R | 4 ++-- tests/test-read.R | 2 +- util-read.R | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 90441657..e2cc50e1 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -382,7 +382,7 @@ test_that("Network construction of the undirected author-issue network with all "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:37:02", "2016-12-07 15:37:02", "2016-12-07 15:37:21", "2016-12-07 15:53:02", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:21", "2017-05-23 12:32:21", "2017-05-23 12:32:39" )), - artifact.type = "Issue", + artifact.type = "IssueComment", issue.id = c( "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", @@ -447,7 +447,7 @@ test_that("Network construction of the undirected author-issue network with just "2016-08-31 18:21:48", "2016-10-05 01:07:46", "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:37:21", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:39" )), - artifact.type = "Issue", + artifact.type = "IssueComment", issue.id = c( "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "" ), diff --git a/tests/test-networks-bipartite.R b/tests/test-networks-bipartite.R index 30d17840..413f8a53 100644 --- a/tests/test-networks-bipartite.R +++ b/tests/test-networks-bipartite.R @@ -278,7 +278,7 @@ test_that("Construction of the bipartite network for the feature artifact with a "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2013-05-05 23:28:57", "2017-05-23 12:32:39", "2016-07-27 22:25:25", "2016-10-05 01:07:46", "2016-12-07 15:37:21", "2016-07-14 02:03:14", "2016-07-15 08:37:57")), - artifact.type = "Issue", + artifact.type = "IssueComment", issue.id = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), diff --git a/tests/test-networks-multi-relation.R b/tests/test-networks-multi-relation.R index 1648048d..9abf6edb 100644 --- a/tests/test-networks-multi-relation.R +++ b/tests/test-networks-multi-relation.R @@ -153,7 +153,7 @@ test_that("Construction of the bipartite network for the feature artifact with a "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", "2010-07-12 10:05:36")), - artifact.type = c(rep("Issue", 15), rep("Mail", 16)), + artifact.type = c(rep("IssueComment", 15), rep("Mail", 16)), issue.id = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", @@ -239,7 +239,7 @@ test_that("Construction of the multi network for the feature artifact with autho rep("Mail", 4), rep("Feature", 2), rep("Feature", 6), - rep("Issue", 14)), + rep("IssueComment", 14)), hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", # author cochange "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", diff --git a/tests/test-read.R b/tests/test-read.R index d9217f1d..ea3b3d16 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -268,7 +268,7 @@ test_that("Read and parse the issue data.", { "created", "renamed", "commented", "commented", "commented", "mentioned", "subscribed", "commented", "referenced", "mentioned", "subscribed", "commented", "commented", "commented", "commented", "commented", "merged", "closed", "commented", "commented", "created", "commented", "commented", "merged", "closed", "commented"), - artifact.type = "Issue" + artifact.type = "IssueComment" ) ## calculate event IDs issue.data.expected[["event.id"]] = sapply( diff --git a/util-read.R b/util-read.R index 29082793..e363a745 100644 --- a/util-read.R +++ b/util-read.R @@ -385,7 +385,7 @@ read.issues = function(data.path) { issue.data[["issue.id"]] = sprintf("", issue.data[["issue.id"]]) ## set proper artifact type for proper vertex attribute 'artifact.type' - issue.data["artifact.type"] = "Issue" + issue.data["artifact.type"] = "IssueComment" ## convert 'is.pull.request' column to logicals issue.data[["is.pull.request"]] = as.logical(issue.data[["is.pull.request"]]) From 26f0c59073a104d025a5e6b8f735b77acf0bd414 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Thu, 26 Apr 2018 20:00:17 +0200 Subject: [PATCH 26/51] Fix plot legends For consistent and comprehensible legends for network plots, the legend keys for shapes/linetypes and colors are properly distinguished now. Additionally, the size of the vertex colors are reduced in size to properly match the one of the vertex shapes. Signed-off-by: Claus Hunsen --- util-plot.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/util-plot.R b/util-plot.R index e301b104..095cbb60 100644 --- a/util-plot.R +++ b/util-plot.R @@ -154,12 +154,12 @@ plot.get.plot.for.network = function(network, labels = TRUE) { ## scale vertices (colors and styles) - ggplot2::scale_shape_discrete(name = "Vertices", solid = TRUE) + + ggplot2::scale_shape_discrete(name = "Vertex Types", solid = TRUE) + viridis::scale_color_viridis(name = "Vertices", option = "plasma", discrete = TRUE, end = 0.8, begin = 0.05) + ## scale edges (colors and styles) - ggraph::scale_edge_linetype(name = "Relations") + + ggraph::scale_edge_linetype(name = "Relation Types") + ggplot2::discrete_scale(name = "Relations", "edge_colour", "viridis", viridis::viridis_pal(option = "viridis", end = 0.8, begin = 0.25)) + ## BROKEN RIGHT NOW due to bug in scale_edge_colour_viridis(): @@ -170,7 +170,8 @@ plot.get.plot.for.network = function(network, labels = TRUE) { ggplot2::theme_light() + ggplot2::guides( ## reduce size of symbols in legend - shape = ggplot2::guide_legend(override.aes = list(size = PLOT.VERTEX.SIZE.LEGEND)) + shape = ggplot2::guide_legend(override.aes = list(size = PLOT.VERTEX.SIZE.LEGEND)), + color = ggplot2::guide_legend(override.aes = list(size = PLOT.VERTEX.SIZE.LEGEND)) ) + ggplot2::theme( legend.position = "bottom", From 0817dd82fbefe189651f690ef8a6d781cc671f14 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Fri, 27 Apr 2018 12:47:39 +0200 Subject: [PATCH 27/51] Add additional parameters to network simplification functions Add the parameters 'remove.multiple' and 'remove.loops' to the functions 'simplify.network' and 'simplify.networks' and pass the them to the 'igraph::simplify' function. Signed-off-by: Thomas Bock --- util-networks.R | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/util-networks.R b/util-networks.R index 578bcb6c..9edc25f0 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1211,9 +1211,11 @@ create.empty.edge.list = function() { #' Simplify a given network. #' #' @param network the given network +#' @param remove.multiple whether to contract multiple edges between the same pair of nodes [default: TRUE] +#' @param remove.loops whether to remove loops [default: TRUE] #' #' @return the simplified network -simplify.network = function(network) { +simplify.network = function(network, remove.multiple = TRUE, remove.loops = TRUE) { logging::logdebug("simplify.network: starting.") logging::loginfo("Simplifying network.") @@ -1231,7 +1233,9 @@ simplify.network = function(network) { directed = igraph::is.directed(network)) ## simplify networks (contract edges and remove loops) - net = igraph::simplify(net, edge.attr.comb = EDGE.ATTR.HANDLING, remove.loops = TRUE) + net = igraph::simplify(net, edge.attr.comb = EDGE.ATTR.HANDLING, + remove.multiple = remove.multiple, + remove.loops = remove.loops) ## TODO perform simplification on edge list? return(net) }) @@ -1239,7 +1243,8 @@ simplify.network = function(network) { ## merge the simplified networks network = merge.networks(networks) } else { - network = igraph::simplify(network, edge.attr.comb = EDGE.ATTR.HANDLING, remove.loops = TRUE) + network = igraph::simplify(network, edge.attr.comb = EDGE.ATTR.HANDLING, + remove.multiple = remove.multiple, remove.loops = remove.loops) } logging::logdebug("simplify.network: finished.") @@ -1249,16 +1254,19 @@ simplify.network = function(network) { #' Simplify a list of networks. #' #' @param networks the list of networks +#' @param remove.multiple whether to contract multiple edges between the same pair of nodes [default: TRUE] +#' @param remove.loops whether to remove loops [default: TRUE] #' #' @return the simplified networks -simplify.networks = function(networks) { +simplify.networks = function(networks, remove.multiple = TRUE, remove.loops = TRUE) { logging::logdebug("simplify.networks: starting.") logging::loginfo( "Simplifying networks (names = [%s]).", paste(names(networks), collapse = ", ") ) - nets = parallel::mclapply(networks, simplify.network) + nets = parallel::mclapply(networks, simplify.network, remove.multiple = remove.multiple, + remove.loops = remove.loops) logging::logdebug("simplify.networks: finished.") return(nets) From 74bd0909854dec2f584c9812e6396a1d92d0c886 Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Fri, 27 Apr 2018 13:06:08 +0200 Subject: [PATCH 28/51] Rename artifact type for issue relation to 'IssueEvent' As the edges that we produce for the relation 'issue' are not only initiated by an issue comment, but also by other events, for example opening or closing the issue, the corresponding artifact type is renamed to 'IssueEvent'. Signed-off-by: Barbara Eckl --- tests/test-networks-author.R | 4 ++-- tests/test-networks-bipartite.R | 2 +- tests/test-networks-multi-relation.R | 4 ++-- tests/test-read.R | 2 +- util-read.R | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index e2cc50e1..54732c7a 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -382,7 +382,7 @@ test_that("Network construction of the undirected author-issue network with all "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:37:02", "2016-12-07 15:37:02", "2016-12-07 15:37:21", "2016-12-07 15:53:02", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:21", "2017-05-23 12:32:21", "2017-05-23 12:32:39" )), - artifact.type = "IssueComment", + artifact.type = "IssueEvent", issue.id = c( "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", @@ -447,7 +447,7 @@ test_that("Network construction of the undirected author-issue network with just "2016-08-31 18:21:48", "2016-10-05 01:07:46", "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:37:21", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:39" )), - artifact.type = "IssueComment", + artifact.type = "IssueEvent", issue.id = c( "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "" ), diff --git a/tests/test-networks-bipartite.R b/tests/test-networks-bipartite.R index 413f8a53..d72c498e 100644 --- a/tests/test-networks-bipartite.R +++ b/tests/test-networks-bipartite.R @@ -278,7 +278,7 @@ test_that("Construction of the bipartite network for the feature artifact with a "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2013-05-05 23:28:57", "2017-05-23 12:32:39", "2016-07-27 22:25:25", "2016-10-05 01:07:46", "2016-12-07 15:37:21", "2016-07-14 02:03:14", "2016-07-15 08:37:57")), - artifact.type = "IssueComment", + artifact.type = "IssueEvent", issue.id = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), diff --git a/tests/test-networks-multi-relation.R b/tests/test-networks-multi-relation.R index 9abf6edb..06ea1327 100644 --- a/tests/test-networks-multi-relation.R +++ b/tests/test-networks-multi-relation.R @@ -153,7 +153,7 @@ test_that("Construction of the bipartite network for the feature artifact with a "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", "2010-07-12 10:05:36")), - artifact.type = c(rep("IssueComment", 15), rep("Mail", 16)), + artifact.type = c(rep("IssueEvent", 15), rep("Mail", 16)), issue.id = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", @@ -239,7 +239,7 @@ test_that("Construction of the multi network for the feature artifact with autho rep("Mail", 4), rep("Feature", 2), rep("Feature", 6), - rep("IssueComment", 14)), + rep("IssueEvent", 14)), hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", # author cochange "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", diff --git a/tests/test-read.R b/tests/test-read.R index ea3b3d16..ed0c8f2b 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -268,7 +268,7 @@ test_that("Read and parse the issue data.", { "created", "renamed", "commented", "commented", "commented", "mentioned", "subscribed", "commented", "referenced", "mentioned", "subscribed", "commented", "commented", "commented", "commented", "commented", "merged", "closed", "commented", "commented", "created", "commented", "commented", "merged", "closed", "commented"), - artifact.type = "IssueComment" + artifact.type = "IssueEvent" ) ## calculate event IDs issue.data.expected[["event.id"]] = sapply( diff --git a/util-read.R b/util-read.R index e363a745..996e08fd 100644 --- a/util-read.R +++ b/util-read.R @@ -385,7 +385,7 @@ read.issues = function(data.path) { issue.data[["issue.id"]] = sprintf("", issue.data[["issue.id"]]) ## set proper artifact type for proper vertex attribute 'artifact.type' - issue.data["artifact.type"] = "IssueComment" + issue.data["artifact.type"] = "IssueEvent" ## convert 'is.pull.request' column to logicals issue.data[["is.pull.request"]] = as.logical(issue.data[["is.pull.request"]]) From 8833ae893202ac8293a116f56e2a07a5ca7c1267 Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Mon, 30 Apr 2018 10:40:19 +0200 Subject: [PATCH 29/51] Minor fixes from review in PR #115 Signed-off-by: Barbara Eckl --- README.md | 6 ++++-- util-networks.R | 18 +++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 086b68f0..a8456e34 100644 --- a/README.md +++ b/README.md @@ -273,12 +273,14 @@ For more examples, please look in the file `showcase.R`. - Mandatory vertex attributes * *`"type"`*: [`"Author"`, `"Artifact"`] - * *`"kind"`*: [`"Author"`,`"File"`, `"Feature"`, `"Function"`, `"Mail"`, `"Issue"`,`"FeatureExpression"`] + * *`"kind"`*: [`"Author"`,`"File"`, `"Feature"`, `"Function"`, `"MailThread"`, + `"Issue"`,`"FeatureExpression"`] * *`"name"`* - Mandatory edge attributes * *`"type"`*: [`Unipartite`, `Bipartite`] - * *`"artifact.type"`*: [`"Author"`,`"File"`, `"Feature"`, `"Function"`, `"Mail"`, `"Issue"`,`"FeatureExpression"`] + * *`"artifact.type"`*: [`"File"`, `"Feature"`, `"Function"`, `"Mail"`, + `"IssueEvent"`,`"FeatureExpression"`] * *`"relation"`*: [`mail`, `cochange`, `issue`, `callgraph`] (from `artifact.relation` and `author.relation` attributes in the `NetworkConf` class) * *`"date"`* diff --git a/util-networks.R b/util-networks.R index 9edc25f0..077ad248 100644 --- a/util-networks.R +++ b/util-networks.R @@ -89,6 +89,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## * * relation-to-vertex-kind mapping ----------------------------- + #' Determine which vertex kind should be chose for the vertex depending on the relation + #' between the vertices. + #' + #' @return the vertex kind to be used get.vertex.kind.for.relation = function(relation) { vertex.kind = switch(relation, @@ -443,8 +447,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' Get the key-value data for the bipartite relations, #' which are implied by the "artifact.relation" from the network configuration. #' - #' @return the list of data for the bipartite relations, with the attributes - #' 'artifact.type' denoting the artifact type for the relations + #' @return the list of data for the bipartite relations, each with the attributes + #' 'vertex.kind' denoting the artifact type for the relations #' and 'relation' denoting the respective network-configuration entry get.bipartite.relations = function() { logging::logdebug("get.bipartite.relations: starting.") @@ -540,7 +544,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' #' @param network.conf the new network configuration. #' @param reset.environment parameter to determine whether the environment - #' has to be reset or not + #' has to be reset or not [default: FALSE] set.network.conf = function(network.conf, reset.environment = FALSE) { private$network.conf = network.conf @@ -866,7 +870,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' #' @param list the list of lists with data #' @param network.conf the network configuration -#' @param directed whether or not the network should be directed +#' @param directed whether or not the network should be directed [default: FALSE] #' #' @return the built edge list construct.edge.list.from.key.value.list = function(list, network.conf, directed = FALSE) { @@ -1012,10 +1016,10 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed #' @param vertices data frame with vertex data #' @param edge.list list of edges #' @param network.conf the network configuration -#' @param directed whether or not the network should be directed +#' @param directed whether or not the network should be directed [default: FALSE] #' #' @return the built network -construct.network.from.edge.list = function(vertices, edge.list, network.conf, directed = TRUE) { +construct.network.from.edge.list = function(vertices, edge.list, network.conf, directed = FALSE) { logging::logdebug("construct.network.from.edge.list: starting.") logging::loginfo("Construct network from edges.") @@ -1181,7 +1185,7 @@ add.edges.for.bipartite.relation = function(net, bipartite.relations, network.co #' Create an empty network that does not break the algorithms. #' -#' @param directed whether or not the network should be directed +#' @param directed whether or not the network should be directed [default: TRUE] #' #' @return the new empty network create.empty.network = function(directed = TRUE) { From 9e68293913bc5ac64a99514fbd0b9b492383fd47 Mon Sep 17 00:00:00 2001 From: Barbara Eckl Date: Mon, 30 Apr 2018 14:14:16 +0200 Subject: [PATCH 30/51] Add comment Signed-off-by: Barbara Eckl --- util-networks.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/util-networks.R b/util-networks.R index 077ad248..c5fedb21 100644 --- a/util-networks.R +++ b/util-networks.R @@ -92,6 +92,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' Determine which vertex kind should be chose for the vertex depending on the relation #' between the vertices. #' + #' @param relation the given relation + #' #' @return the vertex kind to be used get.vertex.kind.for.relation = function(relation) { @@ -496,7 +498,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' given data object and the network configuration #' #' @param project.data the given data object - #' @param network.conf the network configuration + #' @param network.conf the network configuration initialize = function(project.data, network.conf) { ## check arguments From b5e03acfbbf995b6f4f5b679b86da0ace6e5f56e Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Mon, 30 Apr 2018 16:04:53 +0200 Subject: [PATCH 31/51] Simplify resolution of vertex kind for artifact networks As discussed regarding PR #115 and commit 4dead555ecb3873ff713273845fa3caeca933c4a, the resolution of the vertex kind for artifact networks is now solved through using the available relation variable. This enables us to remove the graph attributes we added earlier. Thanks to @bockthom for pointing this out. Signed-off-by: Claus Hunsen --- util-networks.R | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/util-networks.R b/util-networks.R index c5fedb21..d7923c3c 100644 --- a/util-networks.R +++ b/util-networks.R @@ -262,13 +262,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", directed = FALSE ) - ## set network attribute 'vertex.kind' to ensure the correct setting of the - ## vertex attribute 'kind' later - artifacts.net = igraph::set.graph.attribute( - artifacts.net, "vertex.kind", - value = private$get.vertex.kind.for.relation("cochange") - ) - ## store network private$artifacts.network.cochange = artifacts.net logging::logdebug("get.artifact.network.cochange: finished.") @@ -350,13 +343,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", value = private$proj.data$get.project.conf.entry("artifact.codeface") ) - ## set network attribute 'vertex.kind' to ensure the correct setting of the - ## vertex attribute 'kind' later - artifacts.net = igraph::set.graph.attribute( - artifacts.net, "vertex.kind", - value = private$get.vertex.kind.for.relation("callgraph") - ) - ## store network private$artifacts.network.callgraph = artifacts.net logging::logdebug("get.artifact.network.callgraph: finished.") @@ -390,13 +376,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts = names(artifacts.net.data.raw) # thread IDs artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts) - ## set network attribute 'vertex.kind' to ensure the correct setting of the - ## vertex attribute 'kind' later - artifacts.net = igraph::set.graph.attribute( - artifacts.net, "vertex.kind", - value = private$get.vertex.kind.for.relation("mail") - ) - ## store network private$artifacts.network.mail = artifacts.net logging::logdebug("get.artifact.network.mail: finished.") @@ -430,13 +409,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts = names(artifacts.net.data.raw) # thread IDs artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts) - ## set network attribute 'vertex.kind' to ensure the correct setting of the - ## vertex attribute 'kind' later - artifacts.net = igraph::set.graph.attribute( - artifacts.net, "vertex.kind", - value = private$get.vertex.kind.for.relation("issue") - ) - ## store network private$artifacts.network.issue = artifacts.net logging::logdebug("get.artifact.network.issue: finished.") @@ -667,8 +639,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", igraph::E(network)$relation = relation ## set vertex attribute 'kind' on all edges, corresponding to relation - kind = unique(igraph::get.graph.attribute(network, "vertex.kind")) - network = igraph::set.vertex.attribute(network, "kind", value = kind) + vertex.kind = private$get.vertex.kind.for.relation(relation) + network = igraph::set.vertex.attribute(network, "kind", value = vertex.kind) return(network) }) From bc3c8c143ee61184c0d2f98bffb0d397e04625bc Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Mon, 30 Apr 2018 16:20:41 +0200 Subject: [PATCH 32/51] Adjust return value of 'construct.edge.list.from.key.value.list' To enhance intercompatibility with internal functionality from the package 'igraph', especially with the function 'igraph::as.data.frame(..., what = "both")', the function 'construct.edge.list.from.key.value.list' now returns a data.frame for the vertices, not a plain vector. Signed-off-by: Claus Hunsen --- util-networks.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/util-networks.R b/util-networks.R index d7923c3c..bab3f274 100644 --- a/util-networks.R +++ b/util-networks.R @@ -846,7 +846,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' @param network.conf the network configuration #' @param directed whether or not the network should be directed [default: FALSE] #' -#' @return the built edge list +#' @return a list of two data.frames named 'vertices' and 'edges' (compatible with return value +#' of \code{igraph::as.data.frame}) construct.edge.list.from.key.value.list = function(list, network.conf, directed = FALSE) { logging::loginfo("Create edges.") logging::logdebug("construct.edge.list.from.key.value.list: starting.") @@ -975,7 +976,9 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed logging::logdebug("construct.edge.list.from.key.value.list: finished.") return(list( - vertices = nodes.processed, + vertices = data.frame( + name = nodes.processed + ), edges = edge.list )) } From 992ddf8d582a7a023f000b4fc57f9ff85a7f38f6 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Wed, 18 Apr 2018 12:24:34 +0200 Subject: [PATCH 33/51] Adapt pasta read method to also read lines without mapping Add revision set id to pasta items Signed-off-by: Christian Hechtl --- util-read.R | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/util-read.R b/util-read.R index 996e08fd..536e6eeb 100644 --- a/util-read.R +++ b/util-read.R @@ -303,7 +303,7 @@ read.pasta = function(data.path) { KEY.SEPERATOR = " " ## get file name of pasta data - filepath = file.path(data.path, "similar-mailbox") + filepath = file.path(data.path, "mbox-result") ## read data from disk [can be empty] lines = suppressWarnings(try(readLines(filepath), silent = TRUE)) @@ -315,13 +315,15 @@ read.pasta = function(data.path) { return(data.frame()) } + revision.set.id = 1 + result.list = parallel::mclapply(lines, function(line) { #line = lines[i] if ( nchar(line) == 0 ) { return(NULL) } - if (!grepl(SEPERATOR, line)) { + if(!grepl("<", line)) { logging::logwarn("Faulty line: %s", line) return(NULL) } @@ -330,16 +332,23 @@ read.pasta = function(data.path) { # 2) split keys # 3) split values # 4) insert all key-value pairs by iteration (works also if there is only one key) - line.split = unlist(strsplit(line, SEPERATOR)) - keys = line.split[1] - values = line.split[2] - keys.split = unlist(strsplit(keys, KEY.SEPERATOR)) - values.split = unlist(strsplit(values, KEY.SEPERATOR)) + if (grepl(SEPERATOR, line)) { + line.split = unlist(strsplit(line, SEPERATOR)) + keys = line.split[1] + values = line.split[2] + keys.split = unlist(strsplit(keys, KEY.SEPERATOR)) + values.split = unlist(strsplit(values, KEY.SEPERATOR)) + } else { + keys.split = unlist(strsplit(line, KEY.SEPERATOR)) + values.split = NA + } # Transform data to data.frame #df = data.frame(message.id = keys.split, commit.hash = values.split) df = merge(keys.split, values.split) colnames(df) = c("message.id", "commit.hash") + df$revision.set.id.id = revision.set.id + revision.set.id <<- revision.set.id + 1 return(df) }) result.df = plyr::rbind.fill(result.list) From 8139f34fd809d6750064514a549024df4cbf5863 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Wed, 18 Apr 2018 12:26:01 +0200 Subject: [PATCH 34/51] Add method to classify developers with hierarchy Signed-off-by: Christian Hechtl --- util-core-peripheral.R | 33 +++++++++++++++++++++++++++++++-- util-networks-metrics.R | 4 +++- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/util-core-peripheral.R b/util-core-peripheral.R index 3eb71d2a..92aa491e 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -56,7 +56,8 @@ LONGTERM.CORE.THRESHOLD = 0.5 ## For count-based network metrics, the raw data has to be given. For network-based metrics, ## the network has to be given. get.author.class.by.type = function(network = NULL, data = NULL, - type = c("network.degree", "network.eigen", "commit.count", "loc.count")) { + type = c("network.degree", "network.eigen", "network.hierarchy", + "commit.count", "loc.count")) { logging::logdebug("get.author.class.by.type: starting.") type = match.arg(type) @@ -69,6 +70,7 @@ get.author.class.by.type = function(network = NULL, data = NULL, result = switch(type, "network.degree" = get.author.class.network.degree(network = network), "network.eigen" = get.author.class.network.eigen(network = network), + "network.hierarchy" = get.author.class.network.hierarchy(network = network), "commit.count" = get.author.class.commit.count(range.data = data), "loc.count" = get.author.class.loc.count(range.data = data)) @@ -82,7 +84,8 @@ get.author.class.by.type = function(network = NULL, data = NULL, ## The data can either be given as list of raw range data (for the count-based metrics) ## or as list of networks for the network-based metrics). get.author.class.overview = function(network.list = NULL, range.data.list = NULL, - type = c("network.degree", "network.eigen", "commit.count", "loc.count")) { + type = c("network.degree", "network.eigen", "network.hierarchy", + "commit.count", "loc.count")) { logging::logdebug("get.author.class.overview: starting.") type = match.arg(type) @@ -393,6 +396,32 @@ get.author.class.network.degree = function(network = NULL, result.limit = NULL) return(res) } +get.author.class.network.hierarchy = function(network = NULL, result.limit = NULL) { + + logging::logdebug("get.author.class.network.hierarchy: starting.") + + if(is.null(network)) { + logging::logerror("For the network-based hierarchy-centrality analysis, the network is needed.") + stop("The network has to be given for this analysis.") + } else if(igraph::vcount(network) == 0) { + logging::logwarn("The given network is empty. Returning empty classification...") + ## return an empty classification + return(list("core" = data.frame("author.name" = character(0), "centrality" = numeric(0)), + "peripheral" = data.frame("author.name" = character(0), "centrality" = numeric(0)))) + } + + hierarchy.base.df = metrics.hierarchy(network) + hierarchy.calculated = hierarchy.base.df$deg/hierarchy.base.df$cc + + hierarchy.df = data.frame(author.name = row.names(hierarchy.base.df), hierarchy = hierarchy.calculated) + + ## Get the author classification based on the centrality + res = get.author.class(hierarchy.df, "hierarchy", result.limit = result.limit) + + logging::logdebug("get.author.class.network.hierarchy: finished.") + return(res) +} + ## * Eigenvector-based classification -------------------------------------- ## Classify the authors of the specified version range into core and peripheral diff --git a/util-networks-metrics.R b/util-networks-metrics.R index 4d1ec5d3..9bcde008 100644 --- a/util-networks-metrics.R +++ b/util-networks-metrics.R @@ -196,6 +196,8 @@ metrics.hierarchy = function(network) { cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL) degrees.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) - return(data.frame(log.deg = log(degrees.without.cluster.coeff), log.cc = log(cluster.coeff))) + return(data.frame(deg = degrees.without.cluster.coeff, cc = cluster.coeff, + log.deg = log(degrees.without.cluster.coeff), log.cc = log(cluster.coeff))) } + From c0277c36e4ff45cfbb421317a42b6ea8736afe53 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Wed, 18 Apr 2018 12:35:14 +0200 Subject: [PATCH 35/51] Fix bug that eigenvector centrality does not consider directedness Fix bug that throws an error when the core classification is empty Signed-off-by: Christian Hechtl --- util-core-peripheral.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/util-core-peripheral.R b/util-core-peripheral.R index 92aa491e..d87ea8f3 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -443,7 +443,7 @@ get.author.class.network.eigen = function(network = NULL, range.data = NULL, res } ## Get eigenvectors for all authors - centrality.vec = sort(igraph::eigen_centrality(network)$vector, decreasing= TRUE) + centrality.vec = sort(igraph::eigen_centrality(network, directed = T)$vector, decreasing= TRUE) ## In case no collaboration occured, all centrality values are set to 0 if (igraph::ecount(network) == 0) { @@ -1020,10 +1020,15 @@ get.author.class = function(author.data.frame, calc.base.name, result.limit = NU ## (1) check which positions are over the threshold ## (2) check which positions are equal to the threshold (use all.equal due to rounding errors) author.cumsum = cumsum(author.data[[calc.base.name]]) - author.class.threshold.idx = min(which( + buffer.value = which( author.cumsum > author.class.threshold | sapply(author.cumsum, function(x) isTRUE(all.equal(x, author.class.threshold))) - )) + ) + if (is.infinite(min(buffer.value))) { + author.class.threshold.idx = 0 + } else { + author.class.threshold.idx = min(buffer.value) + } ## classify authors according to threshold core.classification = rep(FALSE, nrow(author.data)) From 70d9b8bd4cb16636086ca7ab90e817b89844f172 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Wed, 18 Apr 2018 12:44:04 +0200 Subject: [PATCH 36/51] Add pasta data to commits if configured Change logging to improve performance Signed-off-by: Christian Hechtl --- util-data.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/util-data.R b/util-data.R index b36cd2e9..aacfd323 100644 --- a/util-data.R +++ b/util-data.R @@ -411,6 +411,12 @@ ProjectData = R6::R6Class("ProjectData", self$get.data.path(), private$project.conf$get.value("artifact") ) + + ## add pasta data if wanted + if(private$project.conf$get.value("pasta")) { + logging::loginfo("Adding pasta data.") + private$commits = private$add.pasta.data(private$commits) + } } private$extract.timestamps(source = "commits") @@ -508,7 +514,8 @@ ProjectData = R6::R6Class("ProjectData", private$mails = read.mails(self$get.data.path()) ## add pasta data if wanted - if (private$project.conf$get.value("pasta")) { + if(private$project.conf$get.value("pasta")) { + logging::loginfo("Adding pasta data.") private$mails = private$add.pasta.data(private$mails) } } @@ -605,7 +612,6 @@ ProjectData = R6::R6Class("ProjectData", #' #' @return the selected pasta data get.pasta.items = function(message.id = NULL, commit.hash = NULL) { - logging::loginfo("Getting pasta items started.") #if neither message.id nor commit.hash are specified break the code if (is.null(message.id) && is.null(commit.hash)) { logging::logwarn("Neither message.id nor commit.hash specified.") @@ -619,9 +625,13 @@ ProjectData = R6::R6Class("ProjectData", ## else gather all message.ids which contain the given commit.hash and return them if (!is.null(message.id)) { result = private$pasta[private$pasta[["message.id"]] == message.id, "commit.hash"] + if (!(length(result) == 0) && is.na(result)) { + result = "" + } return(result) } else { result = private$pasta[private$pasta[["commit.hash"]] == commit.hash, "message.id"] + result = result[!is.na(result)] return(result) } }, From 1400cc2f3617e993d77fd115c0bd4ec92cb6e75f Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Wed, 18 Apr 2018 12:53:01 +0200 Subject: [PATCH 37/51] Fix typo in revision.set.id Signed-off-by: Christian Hechtl --- util-read.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-read.R b/util-read.R index 536e6eeb..51849c8d 100644 --- a/util-read.R +++ b/util-read.R @@ -347,7 +347,7 @@ read.pasta = function(data.path) { #df = data.frame(message.id = keys.split, commit.hash = values.split) df = merge(keys.split, values.split) colnames(df) = c("message.id", "commit.hash") - df$revision.set.id.id = revision.set.id + df$revision.set.id = revision.set.id revision.set.id <<- revision.set.id + 1 return(df) }) From ebf4958ae9c89162c9952e644b89e94c36918aa9 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Wed, 18 Apr 2018 12:56:00 +0200 Subject: [PATCH 38/51] Adapt pasta reading test Signed-off-by: Christian Hechtl --- .../test_pasta/{similar-mailbox => mbox-result} | 2 +- tests/test-read.R | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) rename tests/codeface-data/results/testing/test_pasta/{similar-mailbox => mbox-result} (82%) diff --git a/tests/codeface-data/results/testing/test_pasta/similar-mailbox b/tests/codeface-data/results/testing/test_pasta/mbox-result similarity index 82% rename from tests/codeface-data/results/testing/test_pasta/similar-mailbox rename to tests/codeface-data/results/testing/test_pasta/mbox-result index ffba64c4..2ace66fa 100644 --- a/tests/codeface-data/results/testing/test_pasta/similar-mailbox +++ b/tests/codeface-data/results/testing/test_pasta/mbox-result @@ -1,5 +1,5 @@ => 72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0 => 5a5ec9675e98187e1e92561e1888aa6f04faa338 - => 3a0ed78458b3976243db6829f63eba3eead26774 + => 1143db502761379c2bfcecc2007fc34282e7ee61 => 0a1a5c523d835459c42f33e863623138555e2526 72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0 diff --git a/tests/test-read.R b/tests/test-read.R index ed0c8f2b..a289c98b 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -39,6 +39,7 @@ test_that("Read the raw commit data with the feature artifact.", { ## read the actual data commit.data.read = read.commits(proj.conf$get.value("datapath"), proj.conf$get.value("artifact")) + ## build the expected data.frame commit.data.expected = data.frame(commit.id = sprintf("", c(32712, 32712, 32713, 32713, 32710, 32710, 32714, 32711, 32711)), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 15:58:59", "2016-07-12 16:00:45", @@ -215,14 +216,15 @@ test_that("Read and parse the pasta data.", { pasta.data.read = read.pasta(proj.conf$get.value("datapath.pasta")) ## build the expected data.frame - pasta.data.expected = data.frame(message.id = c("", "", - "", "", - "", "", "", + pasta.data.expected = data.frame(message.id=c("","", + "","", + "","","", ""), - commit.hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", - "1143db502761379c2bfcecc2007fc34282e7ee61", "1143db502761379c2bfcecc2007fc34282e7ee61", - "0a1a5c523d835459c42f33e863623138555e2526", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0")) + commit.hash=c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0","5a5ec9675e98187e1e92561e1888aa6f04faa338", + NA,"1143db502761379c2bfcecc2007fc34282e7ee61", + "1143db502761379c2bfcecc2007fc34282e7ee61","1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0"), + revision.set.id = c(1,2,3,4,4,4,5,5)) ## check the results expect_identical(pasta.data.read, pasta.data.expected, info = "PaStA data.") From ab00c962e164428df2d59de7292eed3c3b1352aa Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Thu, 26 Apr 2018 12:45:15 +0200 Subject: [PATCH 39/51] Verify the given ProjectConf object in the setter Signed-off-by: Christian Hechtl --- util-data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-data.R b/util-data.R index aacfd323..ebce7965 100644 --- a/util-data.R +++ b/util-data.R @@ -295,7 +295,7 @@ ProjectData = R6::R6Class("ProjectData", #' @param reset.environment parameter to determine whether the environment #' has to be reset or not set.project.conf = function(project.conf, reset.environment = FALSE) { - private$project.conf = project.conf + private$project.conf = verify.argument.for.parameter(project.conf, "ProjectConf", class(self)[1]) if (reset.environment) { self$reset.environment() From 00df306a3e6dbdeb81ddc116e88a4854b07afe72 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Thu, 26 Apr 2018 12:46:58 +0200 Subject: [PATCH 40/51] Add equals method to the ProjectData class Therefor add a method that returns the names if the cached data objects in order to not having to compare all data sources but only the cached ones. This fixes #116. Signed-off-by: Christian Hechtl --- util-data.R | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/util-data.R b/util-data.R index ebce7965..91d2f301 100644 --- a/util-data.R +++ b/util-data.R @@ -636,6 +636,74 @@ ProjectData = R6::R6Class("ProjectData", } }, + #' Get the names of all data sources that are currently cached in the + #' ProjectData object. + #' + #' @return a vector containing all the names + get.cached.data.sources = function() { + result = c() + + if (!is.null(private$data.timestamps)) { + result = c(result, "data.timestamps") + } + if (!is.null(private$commits)) { + result = c(result, "commits") + } + if (!is.null(private$mails)) { + result = c(result, "mails") + } + if (!is.null(private$authors)) { + result = c(result, "authors") + } + if (!is.null(private$issues)) { + result = c(result, "issues") + } + if (!is.null(private$synchronicity)) { + result = c(result, "synchronicity") + } + if (!is.null(private$pasta)) { + result = c(result, "pasta") + } + + return(result) + }, + + #' Compares two ProjectData objects by first comparing the names of the + #' cached data sources of the two. + #' Then it compares the ProjectConf objects and in the end the cached data + #' sources. + #' + #' @param other.data.object the object to compare + #' + #' @return TRUE is the objects are equal and FALSE otherwise + equals = function(other.data.object) { + if (!(class(other.data.object)[1] == "ProjectData")) { + logging::logerror("You can only compare a ProjectData object against another one.") + return() + } + + self.data.sources = self$get.cached.data.sources() + other.data.sources = other.data.object$get.cached.data.sources() + + if (!identical(self.data.sources, other.data.sources)) { + return(FALSE) + } + + if (!identical(self$get.project.conf()$get.conf.as.string(), + other.data.object$get.project.conf()$get.conf.as.string())) { + return(FALSE) + } + + for (source in self.data.sources) { + function.call = paste("get.", source, "()", sep = "") + if (!identical(self[[function.call]], other.data.object[[function.call]])) { + return(FALSE) + } + } + + return(TRUE) + }, + ## * * data cutting ------------------------------------------------ #' Get the timestamps (earliest and latest date of activity) of the specified From 13a52121c55804c0cb737f426e6adc3fa88631e8 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Thu, 26 Apr 2018 13:16:56 +0200 Subject: [PATCH 41/51] Add test for the equals method of ProjectData Signed-off-by: Christian Hechtl --- tests/test-data.R | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tests/test-data.R diff --git a/tests/test-data.R b/tests/test-data.R new file mode 100644 index 00000000..3b6b710b --- /dev/null +++ b/tests/test-data.R @@ -0,0 +1,46 @@ +## This file is part of codeface-extraction-r, which is free software: you +## can redistribute it and/or modify it under the terms of the GNU General +## Public License as published by the Free Software Foundation, version 2. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +## +## Copyright 2018 by Christian Hechtl +## Copyright 2018 by Claus Hunsen +## All Rights Reserved. + + +context("Tests for ProjectData functionalities.") + +## +## Context +## + +CF.DATA = file.path(".", "codeface-data") +CF.SELECTION.PROCESS = "testing" +CASESTUDY = "test" +ARTIFACT = "feature" + +## use only when debugging this file independently +if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") + +test_that("Compare two ProjectData objects", { + + ##initialize a ProjectData object with the ProjectConf and clone it into another one + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.data.one = ProjectData$new(project.conf = proj.conf) + proj.data.two = proj.data.one$clone() + + expect_true(proj.data.one$equals(proj.data.two), info = "Two identical ProjectData objects.") + + ##change the second data object + proj.data.two$get.pasta() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") +}) From df5ed1b7e23494089cb5ab75cf1a50850ecffb6b Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Fri, 4 May 2018 14:26:15 +0200 Subject: [PATCH 42/51] Add 'viridis' package also to install.R As with the changes in PR #115 the 'viridis' package was added as needed package, also update the install.R. In addition, fix the misspelling of a variable name in install.R. Signed-off-by: Thomas Bock --- install.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/install.R b/install.R index 2ba574bb..07d65b49 100644 --- a/install.R +++ b/install.R @@ -22,7 +22,7 @@ ## to our needs. -pacakges = c( +packages = c( "yaml", "R6", "igraph", @@ -34,7 +34,8 @@ pacakges = c( "ggplot2", "ggraph", "markovchain", - "lubridate" + "lubridate", + "viridis" ) @@ -47,7 +48,7 @@ filter.installed.packages = function(packageList) { } -p = filter.installed.packages(pacakges) +p = filter.installed.packages(packages) if (length(p) > 0) { print(sprintf("Installing package '%s'.", p)) install.packages(p, dependencies = TRUE, verbose = FALSE, quiet = FALSE) From 9a5a2c129176c353ddef79d3333c00d00e7f36e6 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Mon, 7 May 2018 22:39:20 +0200 Subject: [PATCH 43/51] Fix some minor documentation and coding style issues Signed-off-by: Christian Hechtl --- tests/test-read.R | 21 ++++++++++----------- util-core-peripheral.R | 6 ++++-- util-data.R | 31 ++++++++++++++++++++++--------- util-networks-metrics.R | 4 +++- util-read.R | 7 ++++--- 5 files changed, 43 insertions(+), 26 deletions(-) diff --git a/tests/test-read.R b/tests/test-read.R index a289c98b..3cb0c915 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -11,7 +11,7 @@ ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## -## Copyright 2017 by Christian Hechtl +## Copyright 2017-2018 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2018 by Claus Hunsen ## All Rights Reserved. @@ -39,7 +39,6 @@ test_that("Read the raw commit data with the feature artifact.", { ## read the actual data commit.data.read = read.commits(proj.conf$get.value("datapath"), proj.conf$get.value("artifact")) - ## build the expected data.frame commit.data.expected = data.frame(commit.id = sprintf("", c(32712, 32712, 32713, 32713, 32710, 32710, 32714, 32711, 32711)), date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 15:58:59", "2016-07-12 16:00:45", @@ -216,15 +215,15 @@ test_that("Read and parse the pasta data.", { pasta.data.read = read.pasta(proj.conf$get.value("datapath.pasta")) ## build the expected data.frame - pasta.data.expected = data.frame(message.id=c("","", - "","", - "","","", - ""), - commit.hash=c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0","5a5ec9675e98187e1e92561e1888aa6f04faa338", - NA,"1143db502761379c2bfcecc2007fc34282e7ee61", - "1143db502761379c2bfcecc2007fc34282e7ee61","1143db502761379c2bfcecc2007fc34282e7ee61", - "0a1a5c523d835459c42f33e863623138555e2526", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0"), - revision.set.id = c(1,2,3,4,4,4,5,5)) + pasta.data.expected = data.frame(message.id= c("","", + "","", + "","","", + ""), + commit.hash= c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0","5a5ec9675e98187e1e92561e1888aa6f04faa338", + NA,"1143db502761379c2bfcecc2007fc34282e7ee61", + "1143db502761379c2bfcecc2007fc34282e7ee61","1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0"), + revision.set.id = c(1, 2, 3, 4, 4, 4, 5, 5)) ## check the results expect_identical(pasta.data.read, pasta.data.expected, info = "PaStA data.") diff --git a/util-core-peripheral.R b/util-core-peripheral.R index d87ea8f3..e8e0f859 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -16,6 +16,7 @@ ## Copyright 2017 by Sofie Kemper ## Copyright 2017 by Claus Hunsen ## Copyright 2017 by Felix Prasse +## Copyright 2018 by Christian Hechtl ## All Rights Reserved. ## ## This file is derived from following Codeface script: @@ -411,7 +412,7 @@ get.author.class.network.hierarchy = function(network = NULL, result.limit = NUL } hierarchy.base.df = metrics.hierarchy(network) - hierarchy.calculated = hierarchy.base.df$deg/hierarchy.base.df$cc + hierarchy.calculated = hierarchy.base.df$deg / hierarchy.base.df$cc hierarchy.df = data.frame(author.name = row.names(hierarchy.base.df), hierarchy = hierarchy.calculated) @@ -443,7 +444,8 @@ get.author.class.network.eigen = function(network = NULL, range.data = NULL, res } ## Get eigenvectors for all authors - centrality.vec = sort(igraph::eigen_centrality(network, directed = T)$vector, decreasing= TRUE) + ## The method ignores the directed parameter if the given network is undirected + centrality.vec = sort(igraph::eigen_centrality(network, directed = TRUE)$vector, decreasing= TRUE) ## In case no collaboration occured, all centrality values are set to 0 if (igraph::ecount(network) == 0) { diff --git a/util-data.R b/util-data.R index 91d2f301..dd044068 100644 --- a/util-data.R +++ b/util-data.R @@ -14,7 +14,7 @@ ## Copyright 2016-2018 by Claus Hunsen ## Copyright 2017-2018 by Thomas Bock ## Copyright 2017 by Raphael Nömmer -## Copyright 2017 by Christian Hechtl +## Copyright 2017-2018 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017 by Ferdinand Frank ## All Rights Reserved. @@ -606,6 +606,7 @@ ProjectData = R6::R6Class("ProjectData", #' Get single pasta items. #' For a given 'message.id', the associated 'commit.hash' is returned. #' For a given 'commit.hash', the associated 'message.id' or IDs are returned. + #' If there is no assigned 'commit.hash' for the given 'message.id', we return "". #' #' @param message.id the message ID to get the corresponding commit hash #' @param commit.hash the commit hash to get the corresponding message ID @@ -637,27 +638,36 @@ ProjectData = R6::R6Class("ProjectData", }, #' Get the names of all data sources that are currently cached in the - #' ProjectData object. + #' ProjectData object. The possible data sources are: + #' 'data.timestamps', 'commits', 'mails', 'issues', 'authors', + #' 'synchronicity', and 'pasta'. #' #' @return a vector containing all the names get.cached.data.sources = function() { result = c() + ## timestamp data if (!is.null(private$data.timestamps)) { result = c(result, "data.timestamps") } + + ## main data sources if (!is.null(private$commits)) { result = c(result, "commits") } if (!is.null(private$mails)) { result = c(result, "mails") } - if (!is.null(private$authors)) { - result = c(result, "authors") - } if (!is.null(private$issues)) { result = c(result, "issues") } + + ## author data + if (!is.null(private$authors)) { + result = c(result, "authors") + } + + ## additional data sources if (!is.null(private$synchronicity)) { result = c(result, "synchronicity") } @@ -670,12 +680,12 @@ ProjectData = R6::R6Class("ProjectData", #' Compares two ProjectData objects by first comparing the names of the #' cached data sources of the two. - #' Then it compares the ProjectConf objects and in the end the cached data + #' Then, it compares the ProjectConf objects and, in the end, the cached data #' sources. #' - #' @param other.data.object the object to compare + #' @param other.data.object the object with which to compare #' - #' @return TRUE is the objects are equal and FALSE otherwise + #' @return \code{TRUE} if the objects are equal and \code{FALSE} otherwise equals = function(other.data.object) { if (!(class(other.data.object)[1] == "ProjectData")) { logging::logerror("You can only compare a ProjectData object against another one.") @@ -685,15 +695,18 @@ ProjectData = R6::R6Class("ProjectData", self.data.sources = self$get.cached.data.sources() other.data.sources = other.data.object$get.cached.data.sources() + ## compare the list of cached data sources if (!identical(self.data.sources, other.data.sources)) { return(FALSE) } + ## compare the two ProjectConf objects if (!identical(self$get.project.conf()$get.conf.as.string(), - other.data.object$get.project.conf()$get.conf.as.string())) { + other.data.object$get.project.conf()$get.conf.as.string())) { return(FALSE) } + ## compare the cached data sources for (source in self.data.sources) { function.call = paste("get.", source, "()", sep = "") if (!identical(self[[function.call]], other.data.object[[function.call]])) { diff --git a/util-networks-metrics.R b/util-networks-metrics.R index 9bcde008..5a35c8c2 100644 --- a/util-networks-metrics.R +++ b/util-networks-metrics.R @@ -14,6 +14,7 @@ ## Copyright 2015 by Thomas Bock ## Copyright 2017 by Raphael Nömmer ## Copyright 2017-2018 by Claus Hunsen +## Copyright 2017-2018 by Christian Hechtl ## All Rights Reserved. @@ -190,7 +191,8 @@ metrics.scale.freeness = function(network) { #' @param network the network to be examined #' #' @return A dataframe containing the logarithm of the node degree and the logarithm -#' of the local clustering coefficient for each node. +#' of the local clustering coefficient for each node as well as the non-logarithmic values +#' for these. metrics.hierarchy = function(network) { degrees = igraph::degree(network, mode = "total") cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL) diff --git a/util-read.R b/util-read.R index 51849c8d..fd179974 100644 --- a/util-read.R +++ b/util-read.R @@ -13,7 +13,7 @@ ## ## Copyright 2016-2018 by Claus Hunsen ## Copyright 2017 by Raphael Nömmer -## Copyright 2017 by Christian Hechtl +## Copyright 2017-2018 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017 by Thomas Bock ## All Rights Reserved. @@ -292,7 +292,8 @@ read.authors = function(data.path) { #' Read and parse the pasta data from the 'similar-mailbox' file. #' The form in the file is : ... => commit.hash commit.hash2 .... -#' The parsed form is a data frame with message IDs as keys and commit hashes as values. +#' The parsed form is a data frame with message IDs as keys, commit hashes as values, and a revision set id. +#' If the message ID does not get mapped to a commit hash, the value for the commit hash is \code{NA}. #' #' @param data.path the path to the pasta data #' @@ -323,7 +324,7 @@ read.pasta = function(data.path) { return(NULL) } - if(!grepl("<", line)) { + if (!grepl("<", line)) { logging::logwarn("Faulty line: %s", line) return(NULL) } From 6de0830e57f19f3bebf45f6ee1c7ef9537c60350 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Wed, 9 May 2018 10:14:58 +0200 Subject: [PATCH 44/51] Add minor code improvements This patch contains the following changes: Return `FALSE` when no second `ProjectData` object is given to the equals method Add parantheses to the function calls in the equals method of `ProjectData` Change the revision set id in the pasta reading method from a global parameter to a `seq_along` list and iterate with `mcmapply` Signed-off-by: Christian Hechtl --- util-data.R | 6 +++--- util-read.R | 7 ++----- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/util-data.R b/util-data.R index dd044068..6d6dd315 100644 --- a/util-data.R +++ b/util-data.R @@ -689,7 +689,7 @@ ProjectData = R6::R6Class("ProjectData", equals = function(other.data.object) { if (!(class(other.data.object)[1] == "ProjectData")) { logging::logerror("You can only compare a ProjectData object against another one.") - return() + return(FALSE) } self.data.sources = self$get.cached.data.sources() @@ -708,8 +708,8 @@ ProjectData = R6::R6Class("ProjectData", ## compare the cached data sources for (source in self.data.sources) { - function.call = paste("get.", source, "()", sep = "") - if (!identical(self[[function.call]], other.data.object[[function.call]])) { + function.call = paste0("get.", source, "()") + if (!identical(self[[function.call]](), other.data.object[[function.call]]())) { return(FALSE) } } diff --git a/util-read.R b/util-read.R index fd179974..86aec724 100644 --- a/util-read.R +++ b/util-read.R @@ -316,9 +316,7 @@ read.pasta = function(data.path) { return(data.frame()) } - revision.set.id = 1 - - result.list = parallel::mclapply(lines, function(line) { + result.list = parallel::mcmapply(lines, seq_along(lines), SIMPLIFY = FALSE, FUN = function(line, line.id) { #line = lines[i] if ( nchar(line) == 0 ) { return(NULL) @@ -348,8 +346,7 @@ read.pasta = function(data.path) { #df = data.frame(message.id = keys.split, commit.hash = values.split) df = merge(keys.split, values.split) colnames(df) = c("message.id", "commit.hash") - df$revision.set.id = revision.set.id - revision.set.id <<- revision.set.id + 1 + df$revision.set.id = line.id return(df) }) result.df = plyr::rbind.fill(result.list) From 08f716f5fe0b6c34df5ddf34ddbf6864656ab207 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Fri, 11 May 2018 10:42:09 +0200 Subject: [PATCH 45/51] Add minor improvements to code and documentation Change the revision.set.id from an integer to a string of the form Signed-off-by: Christian Hechtl --- tests/test-read.R | 16 +++++++++------- util-data.R | 4 ++-- util-read.R | 5 ++--- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/tests/test-read.R b/tests/test-read.R index 3cb0c915..49e7f81e 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -215,15 +215,17 @@ test_that("Read and parse the pasta data.", { pasta.data.read = read.pasta(proj.conf$get.value("datapath.pasta")) ## build the expected data.frame - pasta.data.expected = data.frame(message.id= c("","", - "","", - "","","", + pasta.data.expected = data.frame(message.id = c("", "", + "", "", + "", "", "", ""), - commit.hash= c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0","5a5ec9675e98187e1e92561e1888aa6f04faa338", - NA,"1143db502761379c2bfcecc2007fc34282e7ee61", - "1143db502761379c2bfcecc2007fc34282e7ee61","1143db502761379c2bfcecc2007fc34282e7ee61", + commit.hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + NA, "1143db502761379c2bfcecc2007fc34282e7ee61", + "1143db502761379c2bfcecc2007fc34282e7ee61", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0"), - revision.set.id = c(1, 2, 3, 4, 4, 4, 5, 5)) + revision.set.id = c("", "", "", + "", "", "", + "", "")) ## check the results expect_identical(pasta.data.read, pasta.data.expected, info = "PaStA data.") diff --git a/util-data.R b/util-data.R index 6d6dd315..3b84e825 100644 --- a/util-data.R +++ b/util-data.R @@ -606,7 +606,7 @@ ProjectData = R6::R6Class("ProjectData", #' Get single pasta items. #' For a given 'message.id', the associated 'commit.hash' is returned. #' For a given 'commit.hash', the associated 'message.id' or IDs are returned. - #' If there is no assigned 'commit.hash' for the given 'message.id', we return "". + #' If there is no assigned 'commit.hash' for the given 'message.id', "" is returned. #' #' @param message.id the message ID to get the corresponding commit hash #' @param commit.hash the commit hash to get the corresponding message ID @@ -708,7 +708,7 @@ ProjectData = R6::R6Class("ProjectData", ## compare the cached data sources for (source in self.data.sources) { - function.call = paste0("get.", source, "()") + function.call = paste0("get.", source) if (!identical(self[[function.call]](), other.data.object[[function.call]]())) { return(FALSE) } diff --git a/util-read.R b/util-read.R index 86aec724..1f05d8f3 100644 --- a/util-read.R +++ b/util-read.R @@ -290,7 +290,7 @@ read.authors = function(data.path) { ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## PaStA data -------------------------------------------------------------- -#' Read and parse the pasta data from the 'similar-mailbox' file. +#' Read and parse the pasta data from the 'mbox-result' file. #' The form in the file is : ... => commit.hash commit.hash2 .... #' The parsed form is a data frame with message IDs as keys, commit hashes as values, and a revision set id. #' If the message ID does not get mapped to a commit hash, the value for the commit hash is \code{NA}. @@ -343,10 +343,9 @@ read.pasta = function(data.path) { } # Transform data to data.frame - #df = data.frame(message.id = keys.split, commit.hash = values.split) df = merge(keys.split, values.split) colnames(df) = c("message.id", "commit.hash") - df$revision.set.id = line.id + df["revision.set.id"] = sprintf("", line.id) return(df) }) result.df = plyr::rbind.fill(result.list) From bfdabdd68a5d84c50cdb64534f57750ced76a78d Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Fri, 11 May 2018 13:06:35 +0200 Subject: [PATCH 46/51] Remove the data timestamps from the data sources for the equals method This is because the timeststamps are implicitly tested anyway and they cause problems as the getter of them reads all not cached data sources and this changes the data object that is tested. Signed-off-by: Christian Hechtl --- tests/test-data.R | 47 +++++++++++++++++++++++++++++++++++++++++++++++ util-data.R | 10 +++------- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/tests/test-data.R b/tests/test-data.R index 3b6b710b..bd38c8d6 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -39,8 +39,55 @@ test_that("Compare two ProjectData objects", { expect_true(proj.data.one$equals(proj.data.two), info = "Two identical ProjectData objects.") + ## Always change one data source in the one object, test for inequality, change it in the + ## second object, as well, and test for equality. + ##change the second data object + proj.data.one$get.commits() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.two$get.commits() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") + proj.data.two$get.pasta() expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.one$get.pasta() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") + + proj.data.two$get.mails() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.one$get.mails() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") + + proj.data.one$get.issues() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.two$get.issues() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") + + proj.data.two$get.authors() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.one$get.authors() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") + + proj.data.one$get.synchronicity() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.two$get.synchronicity() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") }) diff --git a/util-data.R b/util-data.R index 3b84e825..611375dd 100644 --- a/util-data.R +++ b/util-data.R @@ -639,18 +639,14 @@ ProjectData = R6::R6Class("ProjectData", #' Get the names of all data sources that are currently cached in the #' ProjectData object. The possible data sources are: - #' 'data.timestamps', 'commits', 'mails', 'issues', 'authors', - #' 'synchronicity', and 'pasta'. + #' 'commits', 'mails', 'issues', 'authors', synchronicity', and 'pasta'. + #' 'data.timestamps' are tested implicitly every time as they only contain + #' the earliest and latest date of one data source. #' #' @return a vector containing all the names get.cached.data.sources = function() { result = c() - ## timestamp data - if (!is.null(private$data.timestamps)) { - result = c(result, "data.timestamps") - } - ## main data sources if (!is.null(private$commits)) { result = c(result, "commits") From 5f2f9250f33d936385c4a5d02ed93d2d6b5ac225 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Fri, 11 May 2018 13:30:25 +0200 Subject: [PATCH 47/51] Add changes of PR #124 to the Changelog file Signed-off-by: Christian Hechtl --- NEWS.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/NEWS.md b/NEWS.md index 74333b3d..2cd0e481 100644 --- a/NEWS.md +++ b/NEWS.md @@ -47,7 +47,21 @@ ### Added - Possibility to contract imperfect ranges in the end (#104, 8ebcf20b0aba0cb82dcd7e1d1b95e261a866d04e) +- Equals method for the `ProjectData` objects (#116, 00df306a3e6dbdeb81ddc116e88a4854b07afe72) +- Hierarchy network metric as classification metric in the core-peripheral module (8139f34fd809d6750064514a549024df4cbf5863) +### Changed/Improved +- The reading method for the pasta data now also reads lines without the `message.id` being mapped to a `commit.hash` +- The pasta data get a `revision.set.id` now to show what mails are concerned with the same patch +- Pasta data are now added to the commit data if it is configured (70d9b8bd4cb16636086ca7ab90e817b89844f172) + +### Fixed +- Check whether a given object to the `ProjectConf` setter in the `ProjectData` class really is a object of the + type `ProjectConf` (ab00c962e164428df2d59de7292eed3c3b1352aa) +- The method for eigenvector centrality now considers whether the network is directed or not + (c0277c36e4ff45cfbb421317a42b6ea8736afe53) +- Fix a bug that caused errors when the core classification within a core-periphery classification is empty + (c0277c36e4ff45cfbb421317a42b6ea8736afe53) ## 3.1.2 From 7e5e6a5916880203d7eba661934e22bf9f99e223 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Thu, 17 May 2018 11:05:28 +0200 Subject: [PATCH 48/51] Add equals method to the RangeData class To do so, fix the check for the object type in the equals method of the ProjectData class to also allow RangeData objects. Call this method from the equals method in RangeData and then compare the remaining two data sources (`range` and `revision.callgraph`). Also add a test for this and fix the faulty documentation of `get.data.cut.to.same.date()` This fixes #116 Signed-off-by: Christian Hechtl --- tests/test-data.R | 23 +++++++++++++++++++++++ util-data.R | 26 ++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/tests/test-data.R b/tests/test-data.R index bd38c8d6..f9023332 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -91,3 +91,26 @@ test_that("Compare two ProjectData objects", { expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") }) + +test_that("Compare two RangeData objects", { + + ##initialize a ProjectData object with the ProjectConf + ##cut it on the base of commits and clone the resulting RangeData object + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.data.base = ProjectData$new(project.conf = proj.conf) + range.data.one = proj.data.base$get.data.cut.to.same.date("commits") + range.data.two = range.data.one$clone() + + ##compare the two equal RangeData objects + expect_true(range.data.one$equals(range.data.two)) + + ##cut the ProjectData object on base of issues in order to get another + ##RangeData object to check for inequality + range.data.three = proj.data.base$get.data.cut.to.same.date("issues") + + expect_false(range.data.one$equals(range.data.three)) + + ##check whether a ProjectData object can be compared to a RangeData object + expect_false(range.data.one$equals(proj.data.base)) + +}) diff --git a/util-data.R b/util-data.R index 611375dd..1e031e00 100644 --- a/util-data.R +++ b/util-data.R @@ -683,7 +683,7 @@ ProjectData = R6::R6Class("ProjectData", #' #' @return \code{TRUE} if the objects are equal and \code{FALSE} otherwise equals = function(other.data.object) { - if (!(class(other.data.object)[1] == "ProjectData")) { + if (!("ProjectData" %in% class(other.data.object))) { logging::logerror("You can only compare a ProjectData object against another one.") return(FALSE) } @@ -770,7 +770,7 @@ ProjectData = R6::R6Class("ProjectData", #' #' @param data.sources the specified data sources #' - #' @return a list of the cut data.sources + #' @return the RangeData object with cut data sources get.data.cut.to.same.date = function(data.sources = c("mails", "commits", "issues")) { ## check arguments data.sources = match.arg(arg = data.sources, several.ok = TRUE) @@ -1025,6 +1025,28 @@ RangeData = R6::R6Class("RangeData", inherit = ProjectData, #' @return the revision callgraph get.revision.callgraph = function() { return(private$revision.callgraph) + }, + + #' Compares two RangeData objects by first calling the \code{equals()} method of + #' ProjectData. + #' Then, it compares the additional two data sources (\code{range} and + #' \code{revision.callgraph}). + #' + #' @param other.data.object the object with which to compare + #' + #' @return \code{TRUE} if the objects are equal and \code{FALSE} otherwise + equals = function(other.data.object = NULL) { + if (!("RangeData" %in% class(other.data.object))) { + logging::logerror("You can only compare a RangeData object against another one.") + return(FALSE) + } + if (super$equals(other.data.object = other.data.object)) { + return((identical(self$get.range(), other.data.object$get.range()) && + identical(self$get.revision.callgraph(), + other.data.object$get.revision.callgraph()))) + } else { + return(FALSE) + } } ) ) From f8b5985e936e441361965a79c7e75031205995ef Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Thu, 17 May 2018 12:18:39 +0200 Subject: [PATCH 49/51] Restructure equals method in RangeData Add check that one can not compare a ProjectData object to a RangeData object Expand test to that effect Signed-off-by: Christian Hechtl --- tests/test-data.R | 20 ++++++++++++++------ util-data.R | 24 +++++++++++++++++++----- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/tests/test-data.R b/tests/test-data.R index f9023332..acbf90a6 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -94,23 +94,31 @@ test_that("Compare two ProjectData objects", { test_that("Compare two RangeData objects", { - ##initialize a ProjectData object with the ProjectConf - ##cut it on the base of commits and clone the resulting RangeData object + ## initialize a ProjectData object with the ProjectConf + ## cut it on the base of commits and clone the resulting RangeData object proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) proj.data.base = ProjectData$new(project.conf = proj.conf) range.data.one = proj.data.base$get.data.cut.to.same.date("commits") range.data.two = range.data.one$clone() - ##compare the two equal RangeData objects + ## compare the two equal RangeData objects expect_true(range.data.one$equals(range.data.two)) - ##cut the ProjectData object on base of issues in order to get another - ##RangeData object to check for inequality + ## cut the ProjectData object on base of issues in order to get another + ## RangeData object to check for inequality range.data.three = proj.data.base$get.data.cut.to.same.date("issues") expect_false(range.data.one$equals(range.data.three)) - ##check whether a ProjectData object can be compared to a RangeData object + ## check whether a ProjectData object can be compared to a RangeData object expect_false(range.data.one$equals(proj.data.base)) + expect_false(proj.data.base$equals(range.data.one)) + + ## create a RangeData object with the same data sources as proj.data.base + ## and check for inequality + timestamps = proj.data.base$get.data.timestamps(outermost = TRUE) + range.data.four = split.data.time.based(proj.data.base, bins = c(timestamps$start[[1]], timestamps$end[[1]]))[[1]] + + expect_false(proj.data.base$equals(range.data.four)) }) diff --git a/util-data.R b/util-data.R index 1e031e00..6fa134b7 100644 --- a/util-data.R +++ b/util-data.R @@ -688,6 +688,11 @@ ProjectData = R6::R6Class("ProjectData", return(FALSE) } + if (!identical(self$get.class.name(), other.data.object$get.class.name())) { + logging::logerror("You can only compare two instances of the same class.") + return(FALSE) + } + self.data.sources = self$get.cached.data.sources() other.data.sources = other.data.object$get.cached.data.sources() @@ -1036,17 +1041,26 @@ RangeData = R6::R6Class("RangeData", inherit = ProjectData, #' #' @return \code{TRUE} if the objects are equal and \code{FALSE} otherwise equals = function(other.data.object = NULL) { + + ## check whether the given object is a instance of RangeData if (!("RangeData" %in% class(other.data.object))) { logging::logerror("You can only compare a RangeData object against another one.") return(FALSE) } - if (super$equals(other.data.object = other.data.object)) { - return((identical(self$get.range(), other.data.object$get.range()) && - identical(self$get.revision.callgraph(), - other.data.object$get.revision.callgraph()))) - } else { + + ## check whether the ranges are equal + if (!identical(self$get.range(), other.data.object$get.range())) { return(FALSE) } + + ## check whether the revision callgraphs are equal + if (!identical(self$get.revision.callgraph(), + other.data.object$get.revision.callgraph())) { + return(FALSE) + } + + ## check the equality of the remaining data sources + return(super$equals(other.data.object = other.data.object)) } ) ) From 80a83ad470304cde03e2565318642e53186556e2 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Thu, 17 May 2018 14:10:50 +0200 Subject: [PATCH 50/51] Fix minor coding style and documentation issues Signed-off-by: Christian Hechtl --- tests/test-data.R | 3 ++- util-data.R | 14 ++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/test-data.R b/tests/test-data.R index acbf90a6..e1951429 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -117,7 +117,8 @@ test_that("Compare two RangeData objects", { ## create a RangeData object with the same data sources as proj.data.base ## and check for inequality timestamps = proj.data.base$get.data.timestamps(outermost = TRUE) - range.data.four = split.data.time.based(proj.data.base, bins = c(timestamps$start[[1]], timestamps$end[[1]]))[[1]] + range.data.four = split.data.time.based(proj.data.base, bins = + c(timestamps[["start"]][[1]], timestamps[["end"]][[1]]))[[1]] expect_false(proj.data.base$equals(range.data.four)) diff --git a/util-data.R b/util-data.R index 6fa134b7..1ab8976b 100644 --- a/util-data.R +++ b/util-data.R @@ -683,11 +683,14 @@ ProjectData = R6::R6Class("ProjectData", #' #' @return \code{TRUE} if the objects are equal and \code{FALSE} otherwise equals = function(other.data.object) { + + ## check whether the given object is an instance of ProjectData if (!("ProjectData" %in% class(other.data.object))) { logging::logerror("You can only compare a ProjectData object against another one.") return(FALSE) } + ## check whether the two objects are of the same type if (!identical(self$get.class.name(), other.data.object$get.class.name())) { logging::logerror("You can only compare two instances of the same class.") return(FALSE) @@ -1032,17 +1035,16 @@ RangeData = R6::R6Class("RangeData", inherit = ProjectData, return(private$revision.callgraph) }, - #' Compares two RangeData objects by first calling the \code{equals()} method of - #' ProjectData. - #' Then, it compares the additional two data sources (\code{range} and - #' \code{revision.callgraph}). + #' Compares two RangeData objects by first comparing the ranges. Then it compares + #' the revision callgraphs and if they are equal, it calls the equals method of + #' ProjectData to compare the remaining data sources. #' #' @param other.data.object the object with which to compare #' #' @return \code{TRUE} if the objects are equal and \code{FALSE} otherwise equals = function(other.data.object = NULL) { - ## check whether the given object is a instance of RangeData + ## check whether the given object is an instance of RangeData if (!("RangeData" %in% class(other.data.object))) { logging::logerror("You can only compare a RangeData object against another one.") return(FALSE) @@ -1055,7 +1057,7 @@ RangeData = R6::R6Class("RangeData", inherit = ProjectData, ## check whether the revision callgraphs are equal if (!identical(self$get.revision.callgraph(), - other.data.object$get.revision.callgraph())) { + other.data.object$get.revision.callgraph())) { return(FALSE) } From 510863ed2c090f01aeca3bc7c4d78631dd2ad67c Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Thu, 17 May 2018 16:52:33 +0200 Subject: [PATCH 51/51] Version v3.2 Signed-off-by: Claus Hunsen --- NEWS.md | 85 ++++++++++++++++++++------------------------------------- 1 file changed, 30 insertions(+), 55 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2cd0e481..fdbbacfb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,67 +1,42 @@ # codeface-extraction-r – Changelog -## unversioned - -### Add relations to authors and artifacts (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) -- add for new relation types for each edge -- accept vector with more than one relation for `author.relation` and `artifact.relation` in util-conf.R - -### Changes in util-networks (#98, 2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) - -#### Build Networks (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) -- edit function `get.author.network` to handle more than one relation -- edit function `get.artifact.network` to handle more than one artifact relation -- handle more than one relation type and merge the resulting vertex lists and edge lists in `get.bipartite.network` -- enable for different relations for `authors.to.artifacts` in `get.multi.network`, add information about the relation - and merge vertex sets -- add new vertex attribute `kind` -(7c628fb93eb21f280c7d9da66680f817e107fa24, 7ad49c4ad937c9a6c7398a45179e25d5d5c03faa) -- remove vertex attribute `id` in artifact vertices (7ad49c4ad937c9a6c7398a45179e25d5d5c03faa) - -#### Network and Edge Construction (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) -- function `construct.network.from.list` split in two functions `construct.edge.list.from.key.value.list` and `construct.network.from.edge.list` -- add function `merge.network.data` und `merge.networks` -- handle more than one relation in function `add.edges.for.bipartite.relation` and set the edge attribute `relation` -- add function `create.empty.edge.list` -- enable for different relations in `get.bipartite.relation` and save the type of the relation in an attribute `relation` - -#### Simplify (021ac8b88e9a181364a51e89807df55cb741ed44) -- iterate over the different types of relations and simplfy the subnetworks relating to the `relation` attribute -- the `EDGE.ATTR.HANDLING` of the attribute `relation` is "first" - -### Changes in test suite (784c417c50eb1de5d0143908a390ead6ba22dbbf, 7ad49c4ad937c9a6c7398a45179e25d5d5c03faa, be6ee8cd48dc7692e02b7f1c512870591300fa8a) -- add relation attribute `relation` to result data frames -- remove vertex attribute `id` -- write new tests for networks with more than one relation type - -### Changes in util-plot (b55d3e84a5f9b122dacd0ee52784d930f22d1f4b, f190ca130a15a82e5eed836e9ffc53b8a34aac20) -- colors of the edges depending on the relation type -- line shape depend on the edge type (inter or intra) -- change colors of edges -- remove colors from `plot.fix.type.attributes` -- use palette 'viridis' -- different colors for artifacts - - -## unversioned + +## 3.2 ### Added +- Handling of multiple relations for all types of networks (#98, #15, #11, PR #115) + - Allow several entries for the entries `author.relation` and `artifact.relation` in the `NetworkConf` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) + - Add the mandatory edge attribute `relation` representing `author.relation` or `artifact.relation`, respectively, for all types of networks (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) + - Return data for several relations in `get.bipartite.relation` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) + - Retain one edge for each available value of edge attribute `relation` during network simplification (021ac8b88e9a181364a51e89807df55cb741ed44) + - Add new tests and adapt existing ones (784c417c50eb1de5d0143908a390ead6ba22dbbf, 7ad49c4ad937c9a6c7398a45179e25d5d5c03faa, be6ee8cd48dc7692e02b7f1c512870591300fa8a) +- Add the mandatory vertex attribute `kind` describing the actual vertex kind (7c628fb93eb21f280c7d9da66680f817e107fa24, 784c417c50eb1de5d0143908a390ead6ba22dbbf) +- Respect new vertex and edge attributes in plot functions (b55d3e84a5f9b122dacd0ee52784d930f22d1f4b) +- Possibility to merge networks with function `merge.networks` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- Possibility to merge edge and vertex lists with function `merge.network.data` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- Add function `create.empty.edge.list` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) - Possibility to contract imperfect ranges in the end (#104, 8ebcf20b0aba0cb82dcd7e1d1b95e261a866d04e) -- Equals method for the `ProjectData` objects (#116, 00df306a3e6dbdeb81ddc116e88a4854b07afe72) -- Hierarchy network metric as classification metric in the core-peripheral module (8139f34fd809d6750064514a549024df4cbf5863) +- Add method `ProjectData$equals` (#116, 00df306a3e6dbdeb81ddc116e88a4854b07afe72) +- Add author classification by hierarchy to the core-peripheral module (8139f34fd809d6750064514a549024df4cbf5863) ### Changed/Improved -- The reading method for the pasta data now also reads lines without the `message.id` being mapped to a `commit.hash` -- The pasta data get a `revision.set.id` now to show what mails are concerned with the same patch -- Pasta data are now added to the commit data if it is configured (70d9b8bd4cb16636086ca7ab90e817b89844f172) +- Remove the mandatory vertex attribute `artifact.type` due to inconsistent use () +- Remove the mandatory vertex attribute `id` from artifact vertices due to inconsistent use (7ad49c4ad937c9a6c7398a45179e25d5d5c03faa) +- Streamline edge attribute `artifact.type` for uniformity () +- Use color palette 'viridis' for plotting for better flexibility (f190ca130a15a82e5eed836e9ffc53b8a34aac20) +- Edge width in network plots now depends on edge weight, i.e., `width = 0.3 + 0.5 * log(weight)` (d791df8e2c41314f86c36b3af566141e7713f46c) +- Split function `construct.network.from.list` into the two functions `construct.edge.list.from.key.value.list` and `construct.network.from.edge.list` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- Handle data for more than one relation in function `add.edges.for.bipartite.relation` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- Retain one edge for each available value of edge attribute `relation` during network simplification (021ac8b88e9a181364a51e89807df55cb741ed44) +- Read also lines from the PaStA data without the `message.id` being mapped to a `commit.hash` (992ddf8d582a7a023f000b4fc57f9ff85a7f38f6) +- Add column `revision.set.id` to PaStA data to indicate which e-mails are concerned with the same patch (992ddf8d582a7a023f000b4fc57f9ff85a7f38f6) +- Add PaStA data to the unfiltered commit data if configured (70d9b8bd4cb16636086ca7ab90e817b89844f172) ### Fixed -- Check whether a given object to the `ProjectConf` setter in the `ProjectData` class really is a object of the - type `ProjectConf` (ab00c962e164428df2d59de7292eed3c3b1352aa) -- The method for eigenvector centrality now considers whether the network is directed or not - (c0277c36e4ff45cfbb421317a42b6ea8736afe53) -- Fix a bug that caused errors when the core classification within a core-periphery classification is empty - (c0277c36e4ff45cfbb421317a42b6ea8736afe53) +- Check whether a given object to the `ProjectConf` setter in the `ProjectData` class really is a object of type `ProjectConf` (ab00c962e164428df2d59de7292eed3c3b1352aa) +- The method for eigenvector centrality now properly considers whether the network is directed or not (c0277c36e4ff45cfbb421317a42b6ea8736afe53) +- Fix a bug that caused errors when the core classification within a core-periphery classification is empty (c0277c36e4ff45cfbb421317a42b6ea8736afe53) + ## 3.1.2