diff --git a/NEWS.md b/NEWS.md index 65cb0639..fdbbacfb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,43 @@ # codeface-extraction-r – Changelog +## 3.2 + +### Added +- Handling of multiple relations for all types of networks (#98, #15, #11, PR #115) + - Allow several entries for the entries `author.relation` and `artifact.relation` in the `NetworkConf` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) + - Add the mandatory edge attribute `relation` representing `author.relation` or `artifact.relation`, respectively, for all types of networks (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) + - Return data for several relations in `get.bipartite.relation` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) + - Retain one edge for each available value of edge attribute `relation` during network simplification (021ac8b88e9a181364a51e89807df55cb741ed44) + - Add new tests and adapt existing ones (784c417c50eb1de5d0143908a390ead6ba22dbbf, 7ad49c4ad937c9a6c7398a45179e25d5d5c03faa, be6ee8cd48dc7692e02b7f1c512870591300fa8a) +- Add the mandatory vertex attribute `kind` describing the actual vertex kind (7c628fb93eb21f280c7d9da66680f817e107fa24, 784c417c50eb1de5d0143908a390ead6ba22dbbf) +- Respect new vertex and edge attributes in plot functions (b55d3e84a5f9b122dacd0ee52784d930f22d1f4b) +- Possibility to merge networks with function `merge.networks` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- Possibility to merge edge and vertex lists with function `merge.network.data` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- Add function `create.empty.edge.list` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- Possibility to contract imperfect ranges in the end (#104, 8ebcf20b0aba0cb82dcd7e1d1b95e261a866d04e) +- Add method `ProjectData$equals` (#116, 00df306a3e6dbdeb81ddc116e88a4854b07afe72) +- Add author classification by hierarchy to the core-peripheral module (8139f34fd809d6750064514a549024df4cbf5863) + +### Changed/Improved +- Remove the mandatory vertex attribute `artifact.type` due to inconsistent use () +- Remove the mandatory vertex attribute `id` from artifact vertices due to inconsistent use (7ad49c4ad937c9a6c7398a45179e25d5d5c03faa) +- Streamline edge attribute `artifact.type` for uniformity () +- Use color palette 'viridis' for plotting for better flexibility (f190ca130a15a82e5eed836e9ffc53b8a34aac20) +- Edge width in network plots now depends on edge weight, i.e., `width = 0.3 + 0.5 * log(weight)` (d791df8e2c41314f86c36b3af566141e7713f46c) +- Split function `construct.network.from.list` into the two functions `construct.edge.list.from.key.value.list` and `construct.network.from.edge.list` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- Handle data for more than one relation in function `add.edges.for.bipartite.relation` (2f1b4d9b0d6a629163a6dd3111b20930e15fcc13) +- Retain one edge for each available value of edge attribute `relation` during network simplification (021ac8b88e9a181364a51e89807df55cb741ed44) +- Read also lines from the PaStA data without the `message.id` being mapped to a `commit.hash` (992ddf8d582a7a023f000b4fc57f9ff85a7f38f6) +- Add column `revision.set.id` to PaStA data to indicate which e-mails are concerned with the same patch (992ddf8d582a7a023f000b4fc57f9ff85a7f38f6) +- Add PaStA data to the unfiltered commit data if configured (70d9b8bd4cb16636086ca7ab90e817b89844f172) + +### Fixed +- Check whether a given object to the `ProjectConf` setter in the `ProjectData` class really is a object of type `ProjectConf` (ab00c962e164428df2d59de7292eed3c3b1352aa) +- The method for eigenvector centrality now properly considers whether the network is directed or not (c0277c36e4ff45cfbb421317a42b6ea8736afe53) +- Fix a bug that caused errors when the core classification within a core-periphery classification is empty (c0277c36e4ff45cfbb421317a42b6ea8736afe53) + + ## 3.1.2 ### Changed/Improved diff --git a/README.md b/README.md index fa2bfc7f..a8456e34 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ When selecting a version to work with, you should consider the following points: - `ggraph`: For plotting of networks (needs `udunits2` system library, e.g., `libudunits2-dev` on Ubuntu!) - `markovchain`: For core/peripheral transition probabilities - `lubridate`: For convenient date conversion and parsing +- `viridis`: For plotting of networks with nice colours ## How-To @@ -214,7 +215,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. **Note**: Default values are shown in *italics*. - `author.relation` - * The relation among authors, encoded as edges in an author network + * The relation(s) among authors, encoded as edges in an author network * **Note**: The author--artifact relation in bipartite and multi networks is configured by `artifact.relation`! * possible values: [*`"mail"`*, `"cochange"`, `"issue"`] - `author.directed` @@ -228,7 +229,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. * Remove all authors from an author network (including bipartite and multi networks) who are not present in an author network constructed with `artifact.relation` as relation, i.e., all authors that have no biparite relations in a bipartite/multi network are removed. * [`TRUE`, *`FALSE`*] - `artifact.relation` - * The relation among artifacts, encoded as edges in an artifact network + * The relation(s) among artifacts, encoded as edges in an artifact network * **Note**: This relation configures also the author--artifact relation in bipartite and multi networks! * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`] - `artifact.directed` @@ -239,13 +240,14 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. * The list of edge-attribute names and information * a subset of the following as a single vector: - timestamp information: *`"date"`*, `"date.offset"` + - general information: *`"artifact.type"`* - author information: `"author.name"`, `"author.email"` - committer information: `"committer.date"`, `"committer.name"`, `"committer.email"` - e-mail information: *`"message.id"`*, *`"thread"`*, `"subject"` - - commit information: *`"hash"`*, *`"file"`*, *`"artifact.type"`*, *`"artifact"`*, `"changed.files"`, `"added.lines"`, `"deleted.lines"`, `"diff.size"`, `"artifact.diff.size"`, `"synchronicity"` + - commit information: *`"hash"`*, *`"file"`*, *`"artifact"`*, `"changed.files"`, `"added.lines"`, `"deleted.lines"`, `"diff.size"`, `"artifact.diff.size"`, `"synchronicity"` - PaStA information: `"pasta"`, - issue information: *`"issue.id"`*, *`"event.name"`*, `"issue.state"`, `"creation.date"`, `"closing.date"`, `"is.pull.request"` - * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting. + * **Note**: `"date"` and `"artifact.type"` are always included as this information is needed for several parts of the library, e.g., time-based splitting. * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected. * **Note**: For the edge attributes `"pasta"` and `"synchronicity"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below). - `simplify` @@ -267,6 +269,22 @@ You can also update the `NetworkConf` object at any time by calling `NetworkBuil For more examples, please look in the file `showcase.R`. +### Network properties + +- Mandatory vertex attributes + * *`"type"`*: [`"Author"`, `"Artifact"`] + * *`"kind"`*: [`"Author"`,`"File"`, `"Feature"`, `"Function"`, `"MailThread"`, + `"Issue"`,`"FeatureExpression"`] + * *`"name"`* + +- Mandatory edge attributes + * *`"type"`*: [`Unipartite`, `Bipartite`] + * *`"artifact.type"`*: [`"File"`, `"Feature"`, `"Function"`, `"Mail"`, + `"IssueEvent"`,`"FeatureExpression"`] + * *`"relation"`*: [`mail`, `cochange`, `issue`, `callgraph`] (from `artifact.relation` and `author.relation` attributes in the `NetworkConf` class) + * *`"date"`* + + ## File/Module overview - `util-init.R` diff --git a/install.R b/install.R index 2ba574bb..07d65b49 100644 --- a/install.R +++ b/install.R @@ -22,7 +22,7 @@ ## to our needs. -pacakges = c( +packages = c( "yaml", "R6", "igraph", @@ -34,7 +34,8 @@ pacakges = c( "ggplot2", "ggraph", "markovchain", - "lubridate" + "lubridate", + "viridis" ) @@ -47,7 +48,7 @@ filter.installed.packages = function(packageList) { } -p = filter.installed.packages(pacakges) +p = filter.installed.packages(packages) if (length(p) > 0) { print(sprintf("Installing package '%s'.", p)) install.packages(p, dependencies = TRUE, verbose = FALSE, quiet = FALSE) diff --git a/showcase.R b/showcase.R index c3f62609..4dfe6abc 100644 --- a/showcase.R +++ b/showcase.R @@ -49,10 +49,10 @@ CF.DATA = "/path/to/codeface-data" # path to codeface data CF.SELECTION.PROCESS = "threemonth" # releases, threemonth(, testing) CASESTUDY = "busybox" -ARTIFACT = "feature" # function, feature, file, featureexpression +ARTIFACT = "feature" # function, feature, file, featureexpression (only relevant for cochange) -AUTHOR.RELATION = "mail" # mail, cochange -ARTIFACT.RELATION = "cochange" # cochange, callgraph +AUTHOR.RELATION = "mail" # mail, cochange, issue +ARTIFACT.RELATION = "cochange" # cochange, callgraph, mail, issue ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / diff --git a/tests/codeface-data/results/testing/test_pasta/similar-mailbox b/tests/codeface-data/results/testing/test_pasta/mbox-result similarity index 82% rename from tests/codeface-data/results/testing/test_pasta/similar-mailbox rename to tests/codeface-data/results/testing/test_pasta/mbox-result index ffba64c4..2ace66fa 100644 --- a/tests/codeface-data/results/testing/test_pasta/similar-mailbox +++ b/tests/codeface-data/results/testing/test_pasta/mbox-result @@ -1,5 +1,5 @@ => 72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0 => 5a5ec9675e98187e1e92561e1888aa6f04faa338 - => 3a0ed78458b3976243db6829f63eba3eead26774 + => 1143db502761379c2bfcecc2007fc34282e7ee61 => 0a1a5c523d835459c42f33e863623138555e2526 72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0 diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R index b65d7d00..65acfc5d 100644 --- a/tests/test-data-cut.R +++ b/tests/test-data-cut.R @@ -14,6 +14,7 @@ ## Copyright 2017 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2018 by Claus Hunsen +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. @@ -68,7 +69,8 @@ test_that("Cut commit and mail data to same date range.", { date = get.date.from.string("2016-07-12 16:04:40"), date.offset = as.integer(c(100)), subject = c("Re: Fw: busybox 2 tab"), - thread = sprintf("", c(9))) + thread = sprintf("", c(9)), + artifact.type = "Mail") commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits() rownames(commit.data) = 1:nrow(commit.data) diff --git a/tests/test-data.R b/tests/test-data.R new file mode 100644 index 00000000..e1951429 --- /dev/null +++ b/tests/test-data.R @@ -0,0 +1,125 @@ +## This file is part of codeface-extraction-r, which is free software: you +## can redistribute it and/or modify it under the terms of the GNU General +## Public License as published by the Free Software Foundation, version 2. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +## +## Copyright 2018 by Christian Hechtl +## Copyright 2018 by Claus Hunsen +## All Rights Reserved. + + +context("Tests for ProjectData functionalities.") + +## +## Context +## + +CF.DATA = file.path(".", "codeface-data") +CF.SELECTION.PROCESS = "testing" +CASESTUDY = "test" +ARTIFACT = "feature" + +## use only when debugging this file independently +if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") + +test_that("Compare two ProjectData objects", { + + ##initialize a ProjectData object with the ProjectConf and clone it into another one + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.data.one = ProjectData$new(project.conf = proj.conf) + proj.data.two = proj.data.one$clone() + + expect_true(proj.data.one$equals(proj.data.two), info = "Two identical ProjectData objects.") + + ## Always change one data source in the one object, test for inequality, change it in the + ## second object, as well, and test for equality. + + ##change the second data object + proj.data.one$get.commits() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.two$get.commits() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") + + proj.data.two$get.pasta() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.one$get.pasta() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") + + proj.data.two$get.mails() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.one$get.mails() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") + + proj.data.one$get.issues() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.two$get.issues() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") + + proj.data.two$get.authors() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.one$get.authors() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") + + proj.data.one$get.synchronicity() + + expect_false(proj.data.one$equals(proj.data.two), "Two not identical ProjectData objects.") + + proj.data.two$get.synchronicity() + + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects.") +}) + +test_that("Compare two RangeData objects", { + + ## initialize a ProjectData object with the ProjectConf + ## cut it on the base of commits and clone the resulting RangeData object + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.data.base = ProjectData$new(project.conf = proj.conf) + range.data.one = proj.data.base$get.data.cut.to.same.date("commits") + range.data.two = range.data.one$clone() + + ## compare the two equal RangeData objects + expect_true(range.data.one$equals(range.data.two)) + + ## cut the ProjectData object on base of issues in order to get another + ## RangeData object to check for inequality + range.data.three = proj.data.base$get.data.cut.to.same.date("issues") + + expect_false(range.data.one$equals(range.data.three)) + + ## check whether a ProjectData object can be compared to a RangeData object + expect_false(range.data.one$equals(proj.data.base)) + expect_false(proj.data.base$equals(range.data.one)) + + ## create a RangeData object with the same data sources as proj.data.base + ## and check for inequality + timestamps = proj.data.base$get.data.timestamps(outermost = TRUE) + range.data.four = split.data.time.based(proj.data.base, bins = + c(timestamps[["start"]][[1]], timestamps[["end"]][[1]]))[[1]] + + expect_false(proj.data.base$equals(range.data.four)) + +}) diff --git a/tests/test-misc.R b/tests/test-misc.R index e14981cb..a74c3f67 100644 --- a/tests/test-misc.R +++ b/tests/test-misc.R @@ -274,6 +274,49 @@ test_that("Construct consecutive and overlapping ranges.", { expect_equal(result.raw, expected.raw, info = "Standard overlapping ranges (raw).") ## TODO use expect_identical here? why failing? + ## overlapping ranges without imperfect ranges: + ## 1) expected results + expected.formatted = c( + "2018-01-01 00:00:00-2018-01-01 02:00:00", + "2018-01-01 01:30:00-2018-01-01 03:30:00", + "2018-01-01 03:00:00-2018-01-01 06:05:01" + ) + expected.raw = lapply(expected.formatted, get.range.bounds) + names(expected.raw) = expected.formatted + ## 2) formatted + result.formatted = construct.overlapping.ranges(start, end, time.period = "2 hours", overlap = "30 minutes", + imperfect.range.ratio = 1.0, raw = FALSE) + expect_identical(result.formatted, expected.formatted, + info = "Overlapping ranges without imperfect ranges (formatted).") + ## 3) raw + result.raw = construct.overlapping.ranges(start, end, time.period = "2 hours", overlap = "30 minutes", + imperfect.range.ratio = 1.0, raw = TRUE) + expect_equal(result.raw, expected.raw, + info = "Overlapping ranges without imperfect ranges (raw).") + ## TODO use expect_identical here? why failing? + + ## overlapping ranges with imperfect ranges that last at least 8% of the time period: + ## 1) expected results + expected.formatted = c( + "2018-01-01 00:00:00-2018-01-01 02:00:00", + "2018-01-01 01:30:00-2018-01-01 03:30:00", + "2018-01-01 03:00:00-2018-01-01 05:00:00", + "2018-01-01 04:30:00-2018-01-01 06:05:01" + ) + expected.raw = lapply(expected.formatted, get.range.bounds) + names(expected.raw) = expected.formatted + ## 2) formatted + result.formatted = construct.overlapping.ranges(start, end, time.period = "2 hours", overlap = "30 minutes", + imperfect.range.ratio = 0.08, raw = FALSE) + expect_identical(result.formatted, expected.formatted, + info = "Overlapping ranges with imperfect ranges (at least 8% of the time period) (formatted).") + ## 3) raw + result.raw = construct.overlapping.ranges(start, end, time.period = "2 hours", overlap = "30 minutes", + imperfect.range.ratio = 0.08, raw = TRUE) + expect_equal(result.raw, expected.raw, + info = "Overlapping ranges with imperfect ranges (at least 8% of the time period) (raw).") + ## TODO use expect_identical here? why failing? + ## non-overlapping/consecutive ranges: ## 1) expected results expected.formatted = c( @@ -295,6 +338,32 @@ test_that("Construct consecutive and overlapping ranges.", { results.raw = construct.consecutive.ranges(start.date, end.date, time.period = "2 hours", raw = FALSE) expect_equal(result.raw, expected.raw, info = "Non-overlapping ranges (consecutive).") + ## non-overlapping/consecutive ranges without imperfect ranges and exclusive end: + ## 1) expected results + expected.formatted = c( + "2018-01-01 00:00:00-2018-01-01 02:00:00", + "2018-01-01 02:00:00-2018-01-01 04:00:00", + "2018-01-01 04:00:00-2018-01-01 06:05:00" + ) + expected.raw = lapply(expected.formatted, get.range.bounds) + names(expected.raw) = expected.formatted + ## 2) formatted + result.formatted = construct.overlapping.ranges(start.date, end.date, time.period = "2 hours", overlap = 0, + imperfect.range.ratio = 1.0, include.end.date = FALSE, raw = FALSE) + expect_identical(result.formatted, expected.formatted, + info = "Non-overlapping ranges without imperfect ranges and exclusive end (formatted).") + ## 3) raw + result.raw = construct.overlapping.ranges(start.date, end.date, time.period = "2 hours", overlap = 0, + imperfect.range.ratio = 1.0, include.end.date = FALSE, raw = TRUE) + expect_equal(result.raw, expected.raw, + info = "Non-overlapping ranges without imperfect ranges and exclusive end (raw).") + ## TODO use expect_identical here? why failing? + ## 4) matching with consecutive ranges + results.raw = construct.consecutive.ranges(start.date, end.date, time.period = "2 hours", + imperfect.range.ratio = 1.0, include.end.date = FALSE, raw = FALSE) + expect_equal(result.raw, expected.raw, + info = "Non-overlapping ranges without imperfect ranges and exclusive end (consecutive).") + ## illegal overlap expect_error( construct.overlapping.ranges(start.date, end.date, time.period = "2 hours", overlap = "1 year", raw = FALSE), @@ -314,7 +383,7 @@ test_that("Construct cumulative ranges.", { end.date = get.date.from.string(end) end.including = end.date + 1 - ## standard overlapping ranges: + ## standard cumulative ranges: ## 1) expected results expected.formatted = c( "2018-01-01 00:00:00-2018-01-01 02:00:00", @@ -331,6 +400,28 @@ test_that("Construct cumulative ranges.", { result.raw = construct.cumulative.ranges(start, end, time.period = "2 hours", raw = TRUE) expect_equal(result.raw, expected.raw, info = "Cumulative ranges (raw).") ## TODO use expect_identical here? why failing? + + ## cumulative ranges without imperfect ranges: + ## 1) expected results + expected.formatted = c( + "2018-01-01 00:00:00-2018-01-01 02:00:00", + "2018-01-01 00:00:00-2018-01-01 04:00:00", + "2018-01-01 00:00:00-2018-01-01 06:05:01" + ) + expected.raw = lapply(expected.formatted, get.range.bounds) + names(expected.raw) = expected.formatted + ## 2) formatted + result.formatted = construct.cumulative.ranges(start, end, time.period = "2 hours", + imperfect.range.ratio = 1.0, raw = FALSE) + expect_identical(result.formatted, expected.formatted, + info = "Cumulative ranges without imperfect ranges (formatted).") + ## 3) raw + result.raw = construct.cumulative.ranges(start, end, time.period = "2 hours", + imperfect.range.ratio = 1.0, raw = TRUE) + expect_equal(result.raw, expected.raw, + info = "Cumulative ranges without imperfect ranges (raw).") + ## TODO use expect_identical here? why failing? + }) ## @@ -352,7 +443,7 @@ test_that("Construct ranges when difference between start and end is smaller tha ## consecutive ## 1) formatted - result.formatted = construct.consecutive.ranges(start, end, time.period = "1 week", raw = FALSE) + result.formatted = construct.consecutive.ranges(start, end, time.period = "1 week", imperfect.range.ratio = 0.2, raw = FALSE) expect_identical(result.formatted, expected.formatted, info = "Consecutive range (formatted).") ## 2) raw result.raw = construct.consecutive.ranges(start, end, time.period = "1 week", raw = TRUE) @@ -361,7 +452,7 @@ test_that("Construct ranges when difference between start and end is smaller tha ## cumulative ## 1) formatted - result.formatted = construct.cumulative.ranges(start, end, time.period = "1 week", raw = FALSE) + result.formatted = construct.cumulative.ranges(start, end, time.period = "1 week", imperfect.range.ratio = 0.2, raw = FALSE) expect_identical(result.formatted, expected.formatted, info = "Cumulative range (formatted).") ## 2) raw result.raw = construct.cumulative.ranges(start, end, time.period = "1 week", raw = TRUE) @@ -370,7 +461,7 @@ test_that("Construct ranges when difference between start and end is smaller tha ## overlapping ## 1) formatted - result.formatted = construct.overlapping.ranges(start, end, time.period = "1 week", + result.formatted = construct.overlapping.ranges(start, end, time.period = "1 week", imperfect.range.ratio = 0.2, overlap = "1 day", raw = FALSE) expect_identical(result.formatted, expected.formatted, info = "Overlapping range (formatted).") ## 2) raw diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 3e16f47b..d2f1fa5a 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -13,6 +13,7 @@ ## ## Copyright 2017-2018 by Christian Hechtl ## Copyright 2017 by Claus Hunsen +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. @@ -47,22 +48,25 @@ test_that("Network construction of the undirected artifact-cochange network", { network.built = network.builder$get.artifact.network() ## vertex attributes - vertices = c("Base_Feature", "foo", "A") + vertices = data.frame(name = c("Base_Feature", "foo", "A"), + kind = "Feature", + type = TYPE.ARTIFACT) - data = data.frame(from = c("Base_Feature", "Base_Feature"), - to = c("foo", "foo"), - date = get.date.from.string(c("2016-07-12 16:06:32", "2016-07-12 16:06:32")), - hash = c("0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test2.c", "test2.c"), - artifact.type = c("Feature", "Feature"), - artifact = c("Base_Feature", "foo"), - weight = c(1, 1), - type = TYPE.EDGES.INTRA) + data = data.frame( + from = c("Base_Feature", "Base_Feature"), + to = c("foo", "foo"), + date = get.date.from.string(c("2016-07-12 16:06:32", "2016-07-12 16:06:32")), + artifact.type = c("Feature", "Feature"), + hash = c("0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test2.c", "test2.c"), + artifact = c("Base_Feature", "foo"), + weight = 1, + type = TYPE.EDGES.INTRA, + relation = "cochange" + ) ## build expected network network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = vertices) - network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) - igraph::V(network.expected)$type = TYPE.ARTIFACT expect_true(igraph::identical_graphs(network.built, network.expected)) }) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 95e319a9..54732c7a 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -14,6 +14,7 @@ ## Copyright 2017 by Claus Hunsen ## Copyright 2017 by Christian Hechtl ## Copyright 2017 by Felix Prasse +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. @@ -208,27 +209,31 @@ test_that("Network construction of the undirected author-cochange network", { network.built = network.builder$get.author.network() ## vertex attributes - authors = c("Björn", "Olaf", "Karl", "Thomas") + authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR) ## edge attributes data = data.frame(comb.1. = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl"), comb.2. = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas"), - date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:06:10", - "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:10", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", - "1143db502761379c2bfcecc2007fc34282e7ee61", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", + "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", + "2016-07-12 16:06:10", "2016-07-12 16:06:32")), + artifact.type = "Feature", + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), - artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature"), - weight = c(1, 1, 1, 1, 1, 1, 1, 1), - type = TYPE.EDGES.INTRA + artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", + "Base_Feature", "Base_Feature"), + weight = 1, + type = TYPE.EDGES.INTRA, + relation = "cochange" ) ## build expected network network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = authors) - network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) - igraph::V(network.expected)$type = TYPE.AUTHOR expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -249,26 +254,27 @@ test_that("Network construction of the directed author-cochange network", { network.built = network.builder$get.author.network() ## vertex attributes - authors = c("Björn", "Olaf", "Karl", "Thomas") + authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR) ## edge attributes data = data.frame(from = c("Olaf", "Karl", "Thomas", "Thomas"), to = c("Björn", "Olaf", "Olaf", "Karl"), date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), + artifact.type = "Feature", hash = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), file = c("test.c", "test3.c", "test2.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature"), artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), - weight = c(1, 1, 1, 1), - type = TYPE.EDGES.INTRA + weight = 1, + type = TYPE.EDGES.INTRA, + relation = "cochange" ) ## build expected network network.expected = igraph::graph.data.frame(data, directed = TRUE, vertices = authors) - network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) - igraph::V(network.expected)$type = TYPE.AUTHOR expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -289,26 +295,33 @@ test_that("Network construction of the undirected simplified author-cochange net network.built = network.builder$get.author.network() ## vertex attributes - authors = c("Björn", "Olaf", "Karl", "Thomas") + authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR) ## edge attributes - data = data.frame(from = c("Björn", "Olaf", "Olaf", "Karl"), - to = c("Olaf", "Karl", "Thomas", "Thomas"), - date = I(list(c(1468339139, 1468339245), c(1468339541, 1468339570), c(1468339541, 1468339592), c(1468339570, 1468339592))), - hash = I(list(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"), c("3a0ed78458b3976243db6829f63eba3eead26774", - "1143db502761379c2bfcecc2007fc34282e7ee61"), c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"))), - file = I(list(c("test.c", "test.c"), c("test2.c", "test3.c"), c("test2.c", "test2.c"), c("test3.c", "test2.c"))), - artifact.type = I(list(c("Feature", "Feature"), c("Feature", "Feature"), c("Feature", "Feature"), c("Feature", "Feature"))), - artifact = I(list(c("A", "A"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"))), - weight = c(2, 2, 2, 2), - type = TYPE.EDGES.INTRA + data = data.frame( + from = c("Björn", "Olaf", "Olaf", "Karl"), + to = c("Olaf", "Karl", "Thomas", "Thomas"), + date = I(list(c(1468339139, 1468339245), c(1468339541, 1468339570), c(1468339541, 1468339592), + c(1468339570, 1468339592))), + artifact.type = I(list(c("Feature", "Feature"), c("Feature", "Feature"), c("Feature", "Feature"), + c("Feature", "Feature"))), + hash = I(list( + c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"), + c("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"))), + file = I(list(c("test.c", "test.c"), c("test2.c", "test3.c"), c("test2.c", "test2.c"), c("test3.c", "test2.c"))), + artifact = I(list(c("A", "A"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"), + c("Base_Feature", "Base_Feature"))), + weight = 2, + type = TYPE.EDGES.INTRA, + relation = "cochange" ) ## build expected network network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = authors) - network.expected = igraph::set.vertex.attribute(network.expected, "id", value = igraph::get.vertex.attribute(network.expected, "name")) - igraph::V(network.expected)$type = TYPE.AUTHOR expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -331,7 +344,7 @@ test_that("Network construction of the undirected author-issue network with all network.built = network.builder$get.author.network() vertices = data.frame(name = c("Karl", "Olaf", "Thomas", "udo", "Björn", "Max"), - id = c("Karl", "Olaf", "Thomas", "udo", "Björn", "Max"), + kind = TYPE.AUTHOR, type = TYPE.AUTHOR) edges = data.frame(from = c("Karl", "Karl", "Karl", "Karl", "Karl", "Karl", "Karl", "Karl", "Karl", "Karl", "Karl", @@ -369,6 +382,7 @@ test_that("Network construction of the undirected author-issue network with all "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:37:02", "2016-12-07 15:37:02", "2016-12-07 15:37:21", "2016-12-07 15:53:02", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:21", "2017-05-23 12:32:21", "2017-05-23 12:32:39" )), + artifact.type = "IssueEvent", issue.id = c( "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", @@ -394,12 +408,10 @@ test_that("Network construction of the undirected author-issue network with all "closed", "commented", "created", "renamed", "commented", "commented", "commented", "commented", "commented", "merged", "closed", "commented", "commented", "created", "commented", "commented", "merged", "closed", "commented" ), - weight = c( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1 ), - type = TYPE.EDGES.INTRA) + weight = 1, + type = TYPE.EDGES.INTRA, + relation = "issue" + ) network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) @@ -422,7 +434,7 @@ test_that("Network construction of the undirected author-issue network with just network.built = network.builder$get.author.network() vertices = data.frame(name = c("Karl", "Thomas", "Björn", "Olaf", "Max"), - id = c("Karl", "Thomas", "Björn", "Olaf", "Max"), + kind = TYPE.AUTHOR, type = TYPE.AUTHOR) edges = data.frame(from = c( "Thomas", "Thomas", "Thomas", "Thomas", "Thomas", "Thomas", "Björn", "Björn", "Björn", @@ -435,14 +447,14 @@ test_that("Network construction of the undirected author-issue network with just "2016-08-31 18:21:48", "2016-10-05 01:07:46", "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:37:21", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:39" )), + artifact.type = "IssueEvent", issue.id = c( "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "" ), - event.name = c( "commented", "commented", "commented", "commented", "commented", "commented", - "commented", "commented", "commented", "commented", "commented", "commented", - "commented", "commented", "commented", "commented", "commented", "commented" ), - weight = c(1), - type = TYPE.EDGES.INTRA) + event.name = "commented", + weight = 1, + type = TYPE.EDGES.INTRA, + relation = "issue") network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/tests/test-networks-bipartite.R b/tests/test-networks-bipartite.R index 263303a5..d72c498e 100644 --- a/tests/test-networks-bipartite.R +++ b/tests/test-networks-bipartite.R @@ -13,6 +13,7 @@ ## ## Copyright 2017-2018 by Christian Hechtl ## Copyright 2017-2018 by Claus Hunsen +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. @@ -46,30 +47,38 @@ test_that("Construction of the bipartite network for the feature artifact with a ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Karl", "Olaf", "Thomas") - author.net = create.empty.network(directed = FALSE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("A", "Base_Feature", "foo") - artifact.net = create.empty.network(directed = FALSE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "feature") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 5, 2, 6, 3, 5, 3, 6, 4, 6, 4, 7) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", - "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), - artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), - weight = c(1, 1, 1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Karl", "Olaf", "Thomas"), + type = TYPE.AUTHOR, + kind = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("A", "Base_Feature", "foo"), + type = TYPE.ARTIFACT, + kind = "Feature" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Karl", "Olaf", "Olaf", "Thomas", "Thomas"), + to = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), + artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c"), + artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), + weight = 1, + type = TYPE.EDGES.INTER, + relation = "cochange" + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -91,32 +100,40 @@ test_that("Construction of the bipartite network for the file artifact with auth ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf", "Thomas") - author.net = create.empty.network(directed = FALSE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("test.c", "test2.c") - artifact.net = create.empty.network(directed = FALSE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "file") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 4, 2, 4, 2, 5, 3, 5) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("File", "File", "File", "File"), - artifact = c("test.c", "test.c", "test2.c", "test2.c"), - weight = c(1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf", "Thomas"), + type = TYPE.AUTHOR, + kind = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("test.c", "test2.c"), + type = TYPE.ARTIFACT, + kind = "File" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf", "Thomas"), + to = c("test.c", "test.c", "test2.c", "test2.c"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + artifact.type = c("File", "File", "File", "File"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test.c", "test2.c", "test2.c"), + artifact = c("test.c", "test.c", "test2.c", "test2.c"), + weight = 1, + type = TYPE.EDGES.INTER, + relation = "cochange" + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the bipartite network for the function artifact with author.relation = 'cochange' and artifact. @@ -135,32 +152,39 @@ test_that("Construction of the bipartite network for the function artifact with ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf", "Thomas") - author.net = create.empty.network(directed = FALSE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("File_Level") - artifact.net = create.empty.network(directed = FALSE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "function") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 4, 2, 4, 2, 4, 3, 4) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("Function", "Function", "Function", "Function"), - artifact = c("File_Level", "File_Level", "File_Level", "File_Level"), - weight = c(1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf", "Thomas"), + type = TYPE.AUTHOR, + kind = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("File_Level"), + type = TYPE.ARTIFACT, + kind ="Function" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf", "Thomas"), + to = c("File_Level", "File_Level", "File_Level", "File_Level"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + artifact.type = c("Function", "Function", "Function", "Function"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test.c", "test2.c", "test2.c"), + artifact = c("File_Level", "File_Level", "File_Level", "File_Level"), + weight = 1, + type = TYPE.EDGES.INTER, + relation = "cochange" + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the bipartite network for the featureexpression artifact with author.relation = 'cochange' and artifact. relation = 'cochange'.", { @@ -178,32 +202,40 @@ test_that("Construction of the bipartite network for the featureexpression artif ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf") - author.net = create.empty.network(directed = FALSE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("defined(A)", "Base_Feature") - artifact.net = create.empty.network(directed = FALSE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "featureexpression") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 3, 2, 3, 2, 4) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774"), - file = c("test.c", "test.c", "test2.c"), - artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), - artifact = c("defined(A)", "defined(A)", "Base_Feature"), - weight = c(1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf"), + type = TYPE.AUTHOR, + kind = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("defined(A)", "Base_Feature"), + type = TYPE.ARTIFACT, + kind = "FeatureExpression" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf"), + to = c("defined(A)", "defined(A)", "Base_Feature"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41")), + artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774"), + file = c("test.c", "test.c", "test2.c"), + artifact = c("defined(A)", "defined(A)", "Base_Feature"), + weight = 1, + type = TYPE.EDGES.INTER, + relation = "cochange" + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the bipartite network for the feature artifact with author.relation = 'cochange' and artifact. relation = 'issue'.", { @@ -221,36 +253,47 @@ test_that("Construction of the bipartite network for the feature artifact with a ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Karl", "Max", "Olaf", "Thomas") - author.net = create.empty.network(directed = FALSE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("", "", "", "") - artifact.net = create.empty.network(directed = FALSE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "issue.id") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 6, 1, 7, 1, 6, 1, 6, 1, 6, 1, 6, 1, 8, 1, 8, 2, 9, 3, 8, 4, 7, 4, 6, 4, 6, 5, 7, 5, 7) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", - "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:53:02", - "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2013-05-05 23:28:57", - "2017-05-23 12:32:39", "2016-07-27 22:25:25", "2016-10-05 01:07:46", - "2016-12-07 15:37:21", "2016-07-14 02:03:14", "2016-07-15 08:37:57")), - issue.id = c("", "", "", "", "", - "", "", "", "", "", - "", "", "", "", ""), - event.name = c("commented", "commented", "commented", "commented", "commented", - "commented", "commented", "commented", "commented", "commented", - "commented", "commented", "commented", "commented", "commented"), - weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name =c("Björn", "Karl", "Max", "Olaf", "Thomas"), + type = TYPE.AUTHOR, + kind = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("", "", "", ""), + type = TYPE.ARTIFACT, + kind = "Issue" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Karl", "Max", "Olaf", "Olaf", "Olaf", + "Thomas", "Thomas"), + to = c("", "", "", "", "", + "", "", "", "", "", + "", "", "", "", ""), + date = get.date.from.string(c("2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", + "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:53:02", + "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2013-05-05 23:28:57", + "2017-05-23 12:32:39", "2016-07-27 22:25:25", "2016-10-05 01:07:46", + "2016-12-07 15:37:21", "2016-07-14 02:03:14", "2016-07-15 08:37:57")), + artifact.type = "IssueEvent", + issue.id = c("", "", "", "", "", + "", "", "", "", "", + "", "", "", "", ""), + event.name = c("commented", "commented", "commented", "commented", "commented", + "commented", "commented", "commented", "commented", "commented", + "commented", "commented", "commented", "commented", "commented"), + weight = 1, + type = TYPE.EDGES.INTER, + relation = "issue" + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the directed bipartite network for the feature artifact with author.relation = 'cochange' and artifact. relation = 'cochange'.", { @@ -269,33 +312,41 @@ test_that("Construction of the directed bipartite network for the feature artifa ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Karl", "Olaf", "Thomas") - author.net = create.empty.network(directed = TRUE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("A", "Base_Feature", "foo") - artifact.net = create.empty.network(directed = TRUE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "feature") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 5, 2, 6, 3, 5, 3, 6, 4, 6, 4, 7) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", - "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), - artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), - weight = c(1, 1, 1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Karl", "Olaf", "Thomas"), + type = TYPE.AUTHOR, + kind = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("A", "Base_Feature", "foo"), + type = TYPE.ARTIFACT, + kind = "Feature" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Karl", "Olaf", "Olaf","Thomas", "Thomas"), + to = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), + artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c"), + artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo"), + weight = 1, + type = TYPE.EDGES.INTER, + relation = "cochange" + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the directed bipartite network for the file artifact with author.relation = 'cochange' and artifact. relation = 'cochange'.", { @@ -314,32 +365,40 @@ test_that("Construction of the directed bipartite network for the file artifact ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf", "Thomas") - author.net = create.empty.network(directed = TRUE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("test.c", "test2.c") - artifact.net = create.empty.network(directed = TRUE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "file") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 4, 2, 4, 2, 5, 3, 5) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("File", "File", "File", "File"), - artifact = c("test.c", "test.c", "test2.c", "test2.c"), - weight = c(1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf", "Thomas"), + type = TYPE.AUTHOR, + kind = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("test.c", "test2.c"), + type = TYPE.ARTIFACT, + kind = "File" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf", "Thomas"), + to = c("test.c", "test.c", "test2.c", "test2.c"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + artifact.type = c("File", "File", "File", "File"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test.c", "test2.c", "test2.c"), + artifact = c("test.c", "test.c", "test2.c", "test2.c"), + weight = 1, + type = TYPE.EDGES.INTER, + relation = "cochange" + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the directed bipartite network for the function artifact with author.relation = 'cochange' and artifact. @@ -359,32 +418,40 @@ test_that("Construction of the directed bipartite network for the function artif ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf", "Thomas") - author.net = create.empty.network(directed = TRUE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("File_Level") - artifact.net = create.empty.network(directed = TRUE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "function") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 4, 2, 4, 2, 4, 3, 4) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test.c", "test2.c", "test2.c"), - artifact.type = c("Function", "Function", "Function", "Function"), - artifact = c("File_Level", "File_Level", "File_Level", "File_Level"), - weight = c(1, 1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf", "Thomas"), + type = TYPE.AUTHOR, + kind = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("File_Level"), + type = TYPE.ARTIFACT, + kind = "Function" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf", "Thomas"), + to = c("File_Level", "File_Level", "File_Level", "File_Level"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32")), + artifact.type = c("Function", "Function", "Function", "Function"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test.c", "test2.c", "test2.c"), + artifact = c("File_Level", "File_Level", "File_Level", "File_Level"), + weight = 1, + type = TYPE.EDGES.INTER, + relation = "cochange" + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) test_that("Construction of the directed bipartite network for the featureexpression artifact with author.relation = 'cochange' and artifact. relation = 'cochange'.", { @@ -403,29 +470,37 @@ test_that("Construction of the directed bipartite network for the featureexpress ## build network network.built = network.builder$get.bipartite.network() - authors = c("Björn", "Olaf") - author.net = create.empty.network(directed = TRUE) + - igraph::vertex(authors, name = authors, type = TYPE.AUTHOR) - - artifact.vertices = c("defined(A)", "Base_Feature") - artifact.net = create.empty.network(directed = TRUE) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = "featureexpression") - - network.expected = igraph::disjoint_union(author.net, artifact.net) - - vertex.sequence.for.edges = c(1, 3, 2, 3, 2, 4) - - extra.edge.attributes = list(date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774"), - file = c("test.c", "test.c", "test2.c"), - artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), - artifact = c("defined(A)", "defined(A)", "Base_Feature"), - weight = c(1, 1, 1), - type = TYPE.EDGES.INTER) - - network.expected = igraph::add_edges(network.expected, vertex.sequence.for.edges, attr = extra.edge.attributes) + ## construct expected network: + ## 1) construct expected vertices + authors = data.frame( + name = c("Björn", "Olaf"), + type = TYPE.AUTHOR, + kind = TYPE.AUTHOR + ) + artifacts = data.frame( + name = c("defined(A)", "Base_Feature"), + type = TYPE.ARTIFACT, + kind = "FeatureExpression" + ) + vertices = plyr::rbind.fill(authors, artifacts) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Olaf", "Olaf"), + to = c("defined(A)", "defined(A)", "Base_Feature"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41")), + artifact.type = c("FeatureExpression", "FeatureExpression", "FeatureExpression"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774"), + file = c("test.c", "test.c", "test2.c"), + artifact = c("defined(A)", "defined(A)", "Base_Feature"), + weight = 1, + type = TYPE.EDGES.INTER, + relation = "cochange" + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) - }) +}) diff --git a/tests/test-networks-cut.R b/tests/test-networks-cut.R index 0b162061..491d2f2b 100644 --- a/tests/test-networks-cut.R +++ b/tests/test-networks-cut.R @@ -69,7 +69,8 @@ test_that("Cut commit and mail data to same date range.", { date = get.date.from.string(c("2016-07-12 16:04:40")), date.offset = as.integer(c(100)), subject = c("Re: Fw: busybox 2 tab"), - thread = sprintf("", c(9))) + thread = sprintf("", c(9)), + artifact.type = "Mail") commit.data = x$get.project.data()$get.commits() rownames(commit.data) = 1:nrow(commit.data) diff --git a/tests/test-networks-multi-relation.R b/tests/test-networks-multi-relation.R new file mode 100644 index 00000000..06ea1327 --- /dev/null +++ b/tests/test-networks-multi-relation.R @@ -0,0 +1,300 @@ +## This file is part of codeface-extraction-r, which is free software: you +## can redistribute it and/or modify it under the terms of the GNU General +## Public License as published by the Free Software Foundation, version 2. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +## +## Copyright 2018 by Barbara Eckl +## All Rights Reserved. + + +context("Network-building functionality.") + +## +## Context +## + +CF.DATA = file.path(".", "codeface-data") +CF.SELECTION.PROCESS = "testing" +CASESTUDY = "test" +ARTIFACT = "feature" # function, feature, file, featureexpression + +## use only when debugging this file independently +if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") + +test_that("Network construction of the undirected author network with relation = c('cochange', 'mail')", { + + ## configurations + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("artifact.filter.base", FALSE) + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(author.relation = c("cochange", "mail"))) + + ## construct objects + proj.data = ProjectData$new(project.conf = proj.conf) + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + + ## build network + network.built = network.builder$get.author.network() + + ## vertex attributes + authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz fritz@example.org", "georg", "Hans"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR) + + ## edge attributes + data = data.frame(comb.1. = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", # cochange + "Björn", "Björn", "Olaf", "Olaf"), # mail + comb.2. = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", # cochange + "Olaf", "Olaf", "Thomas", "Thomas"), # mail + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # cochange + "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", + "2016-07-12 16:06:10", "2016-07-12 16:06:32", + "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", # mail + "2016-07-12 16:05:37")), + artifact.type = c(rep("Feature", 8), # cochange + rep("Mail", 4)), # mail + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", + rep(NA, 4)), + file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", + rep(NA, 4)), + artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", + rep(NA, 4)), + weight = 1, + type = TYPE.EDGES.INTRA, + relation = c(rep("cochange", 8), + rep("mail", 4)), + message.id = c(NA, NA, NA, NA, NA, NA, NA, NA, + "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<65a1sf31sagd684dfv31@mail.gmail.com>", "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>"), + thread = c(NA, NA, NA, NA, NA, NA, NA, NA, + "", "", "", "") + ) + + ## build expected network + network.expected = igraph::graph.data.frame(data, vertices = authors, + directed = net.conf$get.value("author.directed")) + + expect_true(igraph::identical_graphs(network.built, network.expected)) +}) + + +test_that("Construction of the bipartite network for the feature artifact with author.relation = c('cochange', 'issue') and artifact. + relation = c('issue', 'mail').", { + + ## configurations + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("artifact.filter.base", FALSE) + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(author.relation = c("cochange", "issue"), artifact.relation = c("issue", "mail"))) + + ## construct objects + proj.data = ProjectData$new(project.conf = proj.conf) + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + + ## build network + network.built = network.builder$get.bipartite.network() + + ## construct expected network: + ## 1) construct expected vertices + authors1 = data.frame( # author -- issue + name = c("Björn", "Karl", "Max", "Olaf", "Thomas"), + type = TYPE.AUTHOR, + kind = TYPE.AUTHOR + ) + authors2 = data.frame( # author --mail + name = c("Fritz fritz@example.org", "georg", "Hans", "udo"), + type = TYPE.AUTHOR, + kind = TYPE.AUTHOR + ) + issues = data.frame( + name = c("", "", "", ""), + type = TYPE.ARTIFACT, + kind = "Issue" + ) + threads = data.frame( + name = c("", "", "", + "", "", "", "", "", ""), + type = TYPE.ARTIFACT, + kind = "MailThread" + ) + vertices = plyr::rbind.fill(authors1, issues, authors2, threads) + ## 2) construct expected edge attributes + network.expected.data = data.frame( + from = c("Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Björn", "Karl", "Max", # issue + "Olaf", "Olaf", "Olaf", "Thomas", "Thomas", + "Björn", "Björn", "Björn", "Fritz fritz@example.org", "georg", "Hans", "Hans", "Hans", # mail + "Hans", "Hans", "Hans", "Hans", "Olaf", "Olaf", "Thomas", "udo"), + to = c("", "", "", "", "", "", + "", "", "", "", "", "", + "", "", "", "", + "", "", "", "", "", "", + "", "", "", "", "", "", + "", "", ""), + date = get.date.from.string(c("2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", # issue + "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:53:02", + "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2013-05-05 23:28:57", + "2017-05-23 12:32:39", "2016-07-27 22:25:25", "2016-10-05 01:07:46", + "2016-12-07 15:37:21", "2016-07-14 02:03:14", "2016-07-15 08:37:57", + "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", # mail + "2010-07-12 11:05:35", "2010-07-12 12:05:34", "2010-07-12 12:05:40", + "2010-07-12 12:05:41", "2010-07-12 12:05:42", "2010-07-12 12:05:43", + "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", + "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", + "2010-07-12 10:05:36")), + artifact.type = c(rep("IssueEvent", 15), rep("Mail", 16)), + issue.id = c("", "", "", "", "", + "", "", "", "", "", + "", "", "", "", "", + rep(NA,16)), + event.name = c(rep("commented", 15), + rep(NA, 16)), + weight = 1, + type = TYPE.EDGES.INTER, + relation = c(rep("issue", 15), rep("mail", 16)), + message.id = c(rep(NA, 15), + "", "<1107974989.17910.6.camel@jmcmullan>", + "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "", "", + "", "","", + "", "", "", + "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", + ""), + thread = c(rep(NA, 15), + "", "", "", "", "", "", + "", "", "", "", "", "", + "", "", "", "") + ) + ## 3) construct expected network + network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + directed = net.conf$get.value("author.directed")) + + expect_true(igraph::identical_graphs(network.built, network.expected)) +}) + + +test_that("Construction of the multi network for the feature artifact with author.relation = c('cochange', 'mail') and artifact. + relation = c('cochange', 'issue').", { + + ## configurations + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("artifact.filter.base", FALSE) + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(author.relation = c("cochange", "mail"), artifact.relation = c("cochange", "issue"))) + + ## construct objects + proj.data = ProjectData$new(project.conf = proj.conf) + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + + ## build network + network.built = network.builder$get.multi.network() + + ## build expected network + vertices = data.frame( + name = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz fritz@example.org", "georg", "Hans", + "Base_Feature", "foo", "A", "", "", "", ""), + kind = c(rep(TYPE.AUTHOR, 8), rep("Feature", 3), rep("Issue", 4)), + type = c(rep(TYPE.AUTHOR, 8), rep(TYPE.ARTIFACT, 7)) + ) + row.names(vertices) = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz fritz@example.org", "georg", "Hans", + "Base_Feature", "foo", "A", "", "", "", "") + + edges = data.frame(from = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", # author cochange + "Björn", "Björn", "Olaf", "Olaf", # author mail + "Base_Feature", "Base_Feature", # artifact cochange + "Björn", "Olaf", "Olaf", "Karl", "Thomas", "Thomas", # bipartite cochange + rep("Björn",8), rep("Olaf",3), "Karl", "Thomas", "Thomas"), # bipartite issue + to = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", # author cochange + "Olaf", "Olaf", "Thomas", "Thomas", # author mail + "foo", "foo", # artifact cochange + "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo", # bipartite cochange + "", "", rep("", 4), rep("", 2), "", + rep("", 2), "", rep("", 2)), # bipartite issue + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # author cochange + "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", + "2016-07-12 16:06:10", "2016-07-12 16:06:32", + "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", + "2016-07-12 16:05:37", + "2016-07-12 16:06:32", "2016-07-12 16:06:32", # artifact cochange + "2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # bipartite cochange + "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32", + "2016-07-12 16:05:47", "2016-07-14 17:42:52", "2016-08-31 18:21:48", # bipartite issue + "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:53:02", + "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2016-07-27 22:25:25", + "2016-10-05 01:07:46", "2016-12-07 15:37:21", "2013-05-05 23:28:57", + "2016-07-14 02:03:14", "2016-07-15 08:37:57")), + artifact.type = c(rep("Feature", 8), + rep("Mail", 4), + rep("Feature", 2), + rep("Feature", 6), + rep("IssueEvent", 14)), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", # author cochange + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", + NA, NA, NA, NA, # author mail + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", # artifact cochange + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", # bipartite cochange + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", + rep(NA, 14)), # bipartite issue + file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", # author cochange + NA, NA, NA, NA, + "test2.c", "test2.c", # artifact cochange + "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", # bipartite cochange + rep(NA, 14)), + artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", # author cochange + "Base_Feature", + rep(NA, 4), + "Base_Feature", "foo", # bipartite cochange + "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo", # bipartite cochange + rep(NA, 14)), + weight = 1, + type = c(rep(TYPE.EDGES.INTRA, 14), rep(TYPE.EDGES.INTER, 20)), + relation = c(rep("cochange", 8), + rep("mail", 4), + rep("cochange", 2), + rep("cochange", 6), + rep("issue", 14)), + message.id = c(rep(NA, 8), + "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<65a1sf31sagd684dfv31@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", + rep(NA, 22)), + thread = c(rep(NA, 8), + "", "", "", "", + rep(NA, 22)), + issue.id = c(rep(NA, 20), + "", "","", "", "", + "", "", "", "", "", "", + "", "", ""), + event.name = c(rep(NA, 20), rep("commented", 14)) + ) + + network.expected = igraph::graph.data.frame(edges, vertices = vertices, + directed = net.conf$get.value("author.directed")) + + expected.edges = igraph::as_data_frame(network.expected, what = "edges") + expected.vertices = igraph::as_data_frame(network.expected, what = "vertices") + + built.edges = igraph::as_data_frame(network.built, what = "edges") + built.vertices = igraph::as_data_frame(network.built, what = "vertices") + + expect_identical(expected.edges, built.edges, info = "Multi network edges") + expect_identical(expected.vertices, built.vertices, info = "Multi network vertices") + ## TODO as soon as the bug in igraph is fixed switch to the expect_true function below + # expect_true(igraph::identical_graphs(network.expected, network.built)) +}) diff --git a/tests/test-networks-multi.R b/tests/test-networks-multi.R index a9384302..fcdcd9df 100644 --- a/tests/test-networks-multi.R +++ b/tests/test-networks-multi.R @@ -13,6 +13,7 @@ ## ## Copyright 2018 by Christian Hechtl ## Copyright 2018 by Claus Hunsen +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. @@ -49,41 +50,44 @@ test_that("Construction of the multi network for the feature artifact with autho ## build expected network vertices = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas", "Base_Feature", "foo", "A"), - id = c("Björn", "Olaf", "Karl", "Thomas", - "Base_Feature", "foo", "A"), - type = c(rep(TYPE.AUTHOR, 4), rep(TYPE.ARTIFACT, 3))) + kind = c(rep(TYPE.AUTHOR, 4), rep("Feature", 3)), + type = c(rep(TYPE.AUTHOR, 4), rep(TYPE.ARTIFACT, 3)) + ) row.names(vertices) = c("Björn", "Olaf", "Karl", "Thomas", "Base_Feature", "foo", "A") - edges = data.frame(from= c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", - "Base_Feature", "Base_Feature", "Björn", "Olaf", "Olaf", "Karl", "Thomas", - "Thomas"), - to = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", "foo", - "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo"), - date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", - "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", - "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32", - "2016-07-12 16:06:32", "2016-07-12 15:58:59", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:10", "2016-07-12 16:06:32", - "2016-07-12 16:06:32")), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), - file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", - "test2.c", "test2.c", "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c"), - artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", - "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", - "Feature", "Feature"), - artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", - "Base_Feature", "Base_Feature", "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", - "foo"), - weight = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), - type = c(rep(TYPE.EDGES.INTRA, 10), rep(TYPE.EDGES.INTER, 6))) + edges = data.frame( + from = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl", + "Base_Feature", "Base_Feature", "Björn", "Olaf", "Olaf", "Karl", "Thomas", + "Thomas"), + to = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", "foo", + "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo"), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", + "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", + "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32", + "2016-07-12 16:06:32", "2016-07-12 15:58:59", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:10", "2016-07-12 16:06:32", + "2016-07-12 16:06:32")), + artifact.type = c("Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", + "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", "Feature", + "Feature", "Feature"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", + "test2.c", "test2.c", "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c"), + artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", + "Base_Feature", "Base_Feature", "foo", "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", + "foo"), + weight = 1, + type = c(rep(TYPE.EDGES.INTRA, 10), rep(TYPE.EDGES.INTER, 6)), + relation = "cochange" + ) network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/tests/test-read.R b/tests/test-read.R index 5e5eab24..49e7f81e 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -11,7 +11,7 @@ ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## -## Copyright 2017 by Christian Hechtl +## Copyright 2017-2018 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2018 by Claus Hunsen ## All Rights Reserved. @@ -172,7 +172,8 @@ test_that("Read the mail data.", { "=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= 2", "Re: busybox 1", "=?KOI8-R?Q?=EF=D4=D7=C5=D4:_Some_patches?= tab", "Re: Fw: busybox 2 tab", "Re: Fw: busybox 10"), - thread = sprintf("", c(1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 7, 8, 8, 8, 9, 9)) + thread = sprintf("", c(1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 7, 8, 8, 8, 9, 9)), + artifact.type = "Mail" ) ## delete the line with the empty date mail.data.expected = mail.data.expected[-13, ] @@ -215,13 +216,16 @@ test_that("Read and parse the pasta data.", { ## build the expected data.frame pasta.data.expected = data.frame(message.id = c("", "", - "", "", - "", "", "", - ""), + "", "", + "", "", "", + ""), commit.hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", - "1143db502761379c2bfcecc2007fc34282e7ee61", "1143db502761379c2bfcecc2007fc34282e7ee61", - "0a1a5c523d835459c42f33e863623138555e2526", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0")) + NA, "1143db502761379c2bfcecc2007fc34282e7ee61", + "1143db502761379c2bfcecc2007fc34282e7ee61", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0"), + revision.set.id = c("", "", "", + "", "", "", + "", "")) ## check the results expect_identical(pasta.data.read, pasta.data.expected, info = "PaStA data.") @@ -239,21 +243,44 @@ test_that("Read and parse the issue data.", { ## build the expected data.frame issue.data.expected = data.frame(issue.id = sprintf("", rep(c(2, 48, 51, 48, 2, 48, 51, 57), c(6, 2, 5, 5, 1, 3, 8, 6))), issue.state = rep(c("CLOSED", "OPEN", "CLOSED", "OPEN", "CLOSED", "OPEN", "CLOSED", "CLOSED"), c(6, 2, 5, 5, 1, 3, 8, 6)), - creation.date = get.date.from.string(rep(c("2013-04-21 23:52:09", "2016-04-17 02:06:38", "2016-07-12 15:59:25", "2016-04-17 02:06:38", "2013-04-21 23:52:09", "2016-04-17 02:06:38", "2016-07-12 15:59:25", "2016-12-07 15:53:02"), c(6, 2, 5, 5, 1, 3, 8, 6))), - closing.date = get.date.from.string(rep(c("2013-05-25 20:02:08", NA, "2016-12-07 15:37:02", NA, "2014-05-25 20:02:08", NA, "2016-12-07 15:37:02", "2017-05-23 12:32:21"), c(6, 2, 5, 5, 1, 3, 8, 6))), + creation.date = get.date.from.string(rep(c("2013-04-21 23:52:09", "2016-04-17 02:06:38", "2016-07-12 15:59:25", "2016-04-17 02:06:38", + "2013-04-21 23:52:09", "2016-04-17 02:06:38", "2016-07-12 15:59:25", "2016-12-07 15:53:02"), + c(6, 2, 5, 5, 1, 3, 8, 6))), + closing.date = get.date.from.string(rep(c("2013-05-25 20:02:08", NA, "2016-12-07 15:37:02", NA, "2014-05-25 20:02:08", NA, "2016-12-07 15:37:02", + "2017-05-23 12:32:21"), c(6, 2, 5, 5, 1, 3, 8, 6))), is.pull.request = rep(c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE), c(6, 2, 5, 5, 1, 3, 8, 6)), - author.name = c("Karl", "Karl", "Karl", "Olaf", "Olaf", "Karl", "udo", "udo", "Thomas", "Thomas", "Björn", "Björn", "Björn", "Thomas", "Björn", "Björn", "Björn", "Thomas", "Thomas", "Björn", "Björn", "Olaf", "Björn", "Olaf", "Björn", "Björn", "Olaf", "Olaf", "Olaf", "Björn", "Björn", "Björn", "Björn", "Max", "Max", "Max"), - author.email = c("karl@example.org", "karl@example.org", "karl@example.org", "olaf@example.org", "olaf@example.org", "karl@example.org", "udo@example.org", "udo@example.org", "thomas@example.org", "thomas@example.org", "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "thomas@example.org", "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "thomas@example.org", "thomas@example.org", "bjoern@example.org", "bjoern@example.org", "olaf@example.org", "bjoern@example.org", "olaf@example.org", "bjoern@example.org", "bjoern@example.org", "olaf@example.org", "olaf@example.org", "olaf@example.org", "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "max@example.org", "max@example.org", "max@example.org"), - date = get.date.from.string(c("2013-04-21 23:52:09", "2013-05-05 23:28:57", "2013-05-05 23:28:57", "2013-05-25 20:02:08", "2013-05-25 20:02:08", "2013-06-01 22:37:03", "2016-04-17 02:07:37", "2016-04-17 02:07:37", "2016-07-12 15:59:25", "2016-07-12 15:59:25", "2016-07-12 15:59:25", "2016-07-12 16:03:23", "2016-07-12 16:05:47", "2016-07-14 02:03:14", "2016-07-14 17:42:52", "2016-07-15 08:37:57", "2016-07-15 08:37:57", "2016-07-15 08:37:57", "2016-07-19 10:47:25", "2016-07-27 22:25:25", "2016-07-27 22:25:25", "2016-07-27 22:25:25", "2016-08-31 18:21:48", "2016-10-05 01:07:46", "2016-10-13 15:33:56", "2016-12-06 14:03:42", "2016-12-07 15:37:02", "2016-12-07 15:37:02", "2016-12-07 15:37:21", "2016-12-07 15:53:02", "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:21", "2017-05-23 12:32:21", "2017-05-23 12:32:39")), + author.name = c("Karl", "Karl", "Karl", "Olaf", "Olaf", "Karl", "udo", "udo", "Thomas", "Thomas", "Björn", "Björn", "Björn", "Thomas", + "Björn", "Björn", "Björn", "Thomas", "Thomas", "Björn", "Björn", "Olaf", "Björn", "Olaf", "Björn", "Björn", "Olaf", "Olaf", + "Olaf", "Björn", "Björn", "Björn", "Björn", "Max", "Max", "Max"), + author.email = c("karl@example.org", "karl@example.org", "karl@example.org", "olaf@example.org", "olaf@example.org", "karl@example.org", + "udo@example.org", "udo@example.org", "thomas@example.org", "thomas@example.org", "bjoern@example.org", "bjoern@example.org", + "bjoern@example.org", "thomas@example.org", "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "thomas@example.org", + "thomas@example.org", "bjoern@example.org", "bjoern@example.org", "olaf@example.org", "bjoern@example.org", "olaf@example.org", + "bjoern@example.org", "bjoern@example.org", "olaf@example.org", "olaf@example.org", "olaf@example.org", "bjoern@example.org", + "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", "max@example.org", "max@example.org", "max@example.org"), + date = get.date.from.string(c("2013-04-21 23:52:09", "2013-05-05 23:28:57", "2013-05-05 23:28:57", "2013-05-25 20:02:08", "2013-05-25 20:02:08", + "2013-06-01 22:37:03", "2016-04-17 02:07:37", "2016-04-17 02:07:37", "2016-07-12 15:59:25", "2016-07-12 15:59:25", + "2016-07-12 15:59:25", "2016-07-12 16:03:23", "2016-07-12 16:05:47", "2016-07-14 02:03:14", "2016-07-14 17:42:52", + "2016-07-15 08:37:57", "2016-07-15 08:37:57", "2016-07-15 08:37:57", "2016-07-19 10:47:25", "2016-07-27 22:25:25", + "2016-07-27 22:25:25", "2016-07-27 22:25:25", "2016-08-31 18:21:48", "2016-10-05 01:07:46", "2016-10-13 15:33:56", + "2016-12-06 14:03:42", "2016-12-07 15:37:02", "2016-12-07 15:37:02", "2016-12-07 15:37:21", "2016-12-07 15:53:02", + "2016-12-07 15:53:02", "2017-02-20 22:25:41", "2017-03-02 17:30:10", "2017-05-23 12:32:21", "2017-05-23 12:32:21", + "2017-05-23 12:32:39")), ref.name = c(rep("", 6), rep("Karl", 2), rep("Björn", 2), rep("", 5), rep("Thomas", 2), rep("", 2), rep("udo", 2), rep("", 15)), - event.name = c("created", "commented", "referenced", "merged", "closed", "head_ref_deleted", "mentioned", "subscribed", "mentioned", "subscribed", "created", "renamed", "commented", "commented", "commented", "mentioned", "subscribed", "commented", "referenced", "mentioned", "subscribed", "commented", "commented", "commented", "commented", "commented", "merged", "closed", "commented", "commented", "created", "commented", "commented", "merged", "closed", "commented")) + event.name = c("created", "commented", "referenced", "merged", "closed", "head_ref_deleted", "mentioned", "subscribed", "mentioned", "subscribed", + "created", "renamed", "commented", "commented", "commented", "mentioned", "subscribed", "commented", "referenced", "mentioned", + "subscribed", "commented", "commented", "commented", "commented", "commented", "merged", "closed", "commented", "commented", "created", + "commented", "commented", "merged", "closed", "commented"), + artifact.type = "IssueEvent" + ) ## calculate event IDs issue.data.expected[["event.id"]] = sapply( paste(issue.data.expected[["issue.id"]], issue.data.expected[["author.name"]], issue.data.expected[["date"]], sep = "_"), digest::sha1 ) ## set row names as integers - attr(issue.data.expected, "row.names") = as.integer(c(1, 2, 3, 4, 5, 6, 8, 9, 18, 19, 20, 21, 22, 10, 11, 12, 13, 14, 7, 15, 16, 17, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)) + attr(issue.data.expected, "row.names") = as.integer(c(1, 2, 3, 4, 5, 6, 8, 9, 18, 19, 20, 21, 22, 10, 11, 12, 13, 14, 7, 15, 16, 17, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36)) ## check the results expect_identical(issue.data.read, issue.data.expected, info = "Issue data.") diff --git a/util-conf.R b/util-conf.R index 6275dabe..f614791e 100644 --- a/util-conf.R +++ b/util-conf.R @@ -16,6 +16,7 @@ ## Copyright 2017 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017-2018 by Thomas Bock +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. @@ -89,7 +90,6 @@ Conf = R6::R6Class("Conf", #' - allowed, and #' - allowed.number. check.value = function(value, name) { - browser(expr = name == "revisions") if (!exists(name, where = private[["attributes"]])) { result = c(existing = FALSE) } else { @@ -652,7 +652,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, default = "mail", type = "character", allowed = c("mail", "cochange", "issue"), - allowed.number = 1 + allowed.number = Inf ), author.directed = list( default = FALSE, @@ -677,7 +677,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, default = "cochange", type = "character", allowed = c("cochange", "callgraph", "mail", "issue"), - allowed.number = 1 + allowed.number = Inf ), artifact.directed = list( default = FALSE, @@ -687,15 +687,17 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, ), edge.attributes = list( default = c( - "date", # general + "date", "artifact.type", # general "message.id", "thread", # mail data - "hash", "file", "artifact.type", "artifact", # commit data + "hash", "file", "artifact", # commit data "issue.id", "event.name" # issue data ), type = "character", allowed = c( # the date "date", "date.offset", + # the artifact type indicating the edge + "artifact.type", # author information "author.name", "author.email", # committer information @@ -703,7 +705,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, # e-mail information "message.id", "thread", "subject", ## commit information - "hash", "file", "artifact.type", "artifact", "changed.files", "added.lines", + "hash", "file", "artifact", "changed.files", "added.lines", "deleted.lines", "diff.size", "artifact.diff.size", "synchronicity", # pasta information "pasta", @@ -752,9 +754,13 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, update.values = function(updated.values = list(), stop.on.error = FALSE) { super$update.values(updated.values = updated.values, stop.on.error = stop.on.error) - ## 1) "date" always as edge attribute + ## 1) "date" and "artifact.type" always as edge attribute name = "edge.attributes" - private[["attributes"]][[name]][["value"]] = unique(c(self$get.value(name), "date")) + private[["attributes"]][[name]][["value"]] = unique(c( + "date", + "artifact.type", + self$get.value(name) + )) ## return invisible invisible() @@ -809,7 +815,6 @@ get.configuration.string = function(conf, title = deparse(substitute(conf))) { if (len == 1) { field = paste0(" = ", paste(struct, collapse = ", "), "\n") } else if (len > 7) { - # browser() entries = paste0(indentation, indentation.item, indentation.item, struct) field = paste0( " = [\n", diff --git a/util-core-peripheral.R b/util-core-peripheral.R index 3eb71d2a..e8e0f859 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -16,6 +16,7 @@ ## Copyright 2017 by Sofie Kemper ## Copyright 2017 by Claus Hunsen ## Copyright 2017 by Felix Prasse +## Copyright 2018 by Christian Hechtl ## All Rights Reserved. ## ## This file is derived from following Codeface script: @@ -56,7 +57,8 @@ LONGTERM.CORE.THRESHOLD = 0.5 ## For count-based network metrics, the raw data has to be given. For network-based metrics, ## the network has to be given. get.author.class.by.type = function(network = NULL, data = NULL, - type = c("network.degree", "network.eigen", "commit.count", "loc.count")) { + type = c("network.degree", "network.eigen", "network.hierarchy", + "commit.count", "loc.count")) { logging::logdebug("get.author.class.by.type: starting.") type = match.arg(type) @@ -69,6 +71,7 @@ get.author.class.by.type = function(network = NULL, data = NULL, result = switch(type, "network.degree" = get.author.class.network.degree(network = network), "network.eigen" = get.author.class.network.eigen(network = network), + "network.hierarchy" = get.author.class.network.hierarchy(network = network), "commit.count" = get.author.class.commit.count(range.data = data), "loc.count" = get.author.class.loc.count(range.data = data)) @@ -82,7 +85,8 @@ get.author.class.by.type = function(network = NULL, data = NULL, ## The data can either be given as list of raw range data (for the count-based metrics) ## or as list of networks for the network-based metrics). get.author.class.overview = function(network.list = NULL, range.data.list = NULL, - type = c("network.degree", "network.eigen", "commit.count", "loc.count")) { + type = c("network.degree", "network.eigen", "network.hierarchy", + "commit.count", "loc.count")) { logging::logdebug("get.author.class.overview: starting.") type = match.arg(type) @@ -393,6 +397,32 @@ get.author.class.network.degree = function(network = NULL, result.limit = NULL) return(res) } +get.author.class.network.hierarchy = function(network = NULL, result.limit = NULL) { + + logging::logdebug("get.author.class.network.hierarchy: starting.") + + if(is.null(network)) { + logging::logerror("For the network-based hierarchy-centrality analysis, the network is needed.") + stop("The network has to be given for this analysis.") + } else if(igraph::vcount(network) == 0) { + logging::logwarn("The given network is empty. Returning empty classification...") + ## return an empty classification + return(list("core" = data.frame("author.name" = character(0), "centrality" = numeric(0)), + "peripheral" = data.frame("author.name" = character(0), "centrality" = numeric(0)))) + } + + hierarchy.base.df = metrics.hierarchy(network) + hierarchy.calculated = hierarchy.base.df$deg / hierarchy.base.df$cc + + hierarchy.df = data.frame(author.name = row.names(hierarchy.base.df), hierarchy = hierarchy.calculated) + + ## Get the author classification based on the centrality + res = get.author.class(hierarchy.df, "hierarchy", result.limit = result.limit) + + logging::logdebug("get.author.class.network.hierarchy: finished.") + return(res) +} + ## * Eigenvector-based classification -------------------------------------- ## Classify the authors of the specified version range into core and peripheral @@ -414,7 +444,8 @@ get.author.class.network.eigen = function(network = NULL, range.data = NULL, res } ## Get eigenvectors for all authors - centrality.vec = sort(igraph::eigen_centrality(network)$vector, decreasing= TRUE) + ## The method ignores the directed parameter if the given network is undirected + centrality.vec = sort(igraph::eigen_centrality(network, directed = TRUE)$vector, decreasing= TRUE) ## In case no collaboration occured, all centrality values are set to 0 if (igraph::ecount(network) == 0) { @@ -991,10 +1022,15 @@ get.author.class = function(author.data.frame, calc.base.name, result.limit = NU ## (1) check which positions are over the threshold ## (2) check which positions are equal to the threshold (use all.equal due to rounding errors) author.cumsum = cumsum(author.data[[calc.base.name]]) - author.class.threshold.idx = min(which( + buffer.value = which( author.cumsum > author.class.threshold | sapply(author.cumsum, function(x) isTRUE(all.equal(x, author.class.threshold))) - )) + ) + if (is.infinite(min(buffer.value))) { + author.class.threshold.idx = 0 + } else { + author.class.threshold.idx = min(buffer.value) + } ## classify authors according to threshold core.classification = rep(FALSE, nrow(author.data)) diff --git a/util-data.R b/util-data.R index b36cd2e9..1ab8976b 100644 --- a/util-data.R +++ b/util-data.R @@ -14,7 +14,7 @@ ## Copyright 2016-2018 by Claus Hunsen ## Copyright 2017-2018 by Thomas Bock ## Copyright 2017 by Raphael Nömmer -## Copyright 2017 by Christian Hechtl +## Copyright 2017-2018 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017 by Ferdinand Frank ## All Rights Reserved. @@ -295,7 +295,7 @@ ProjectData = R6::R6Class("ProjectData", #' @param reset.environment parameter to determine whether the environment #' has to be reset or not set.project.conf = function(project.conf, reset.environment = FALSE) { - private$project.conf = project.conf + private$project.conf = verify.argument.for.parameter(project.conf, "ProjectConf", class(self)[1]) if (reset.environment) { self$reset.environment() @@ -411,6 +411,12 @@ ProjectData = R6::R6Class("ProjectData", self$get.data.path(), private$project.conf$get.value("artifact") ) + + ## add pasta data if wanted + if(private$project.conf$get.value("pasta")) { + logging::loginfo("Adding pasta data.") + private$commits = private$add.pasta.data(private$commits) + } } private$extract.timestamps(source = "commits") @@ -508,7 +514,8 @@ ProjectData = R6::R6Class("ProjectData", private$mails = read.mails(self$get.data.path()) ## add pasta data if wanted - if (private$project.conf$get.value("pasta")) { + if(private$project.conf$get.value("pasta")) { + logging::loginfo("Adding pasta data.") private$mails = private$add.pasta.data(private$mails) } } @@ -599,13 +606,13 @@ ProjectData = R6::R6Class("ProjectData", #' Get single pasta items. #' For a given 'message.id', the associated 'commit.hash' is returned. #' For a given 'commit.hash', the associated 'message.id' or IDs are returned. + #' If there is no assigned 'commit.hash' for the given 'message.id', "" is returned. #' #' @param message.id the message ID to get the corresponding commit hash #' @param commit.hash the commit hash to get the corresponding message ID #' #' @return the selected pasta data get.pasta.items = function(message.id = NULL, commit.hash = NULL) { - logging::loginfo("Getting pasta items started.") #if neither message.id nor commit.hash are specified break the code if (is.null(message.id) && is.null(commit.hash)) { logging::logwarn("Neither message.id nor commit.hash specified.") @@ -619,13 +626,101 @@ ProjectData = R6::R6Class("ProjectData", ## else gather all message.ids which contain the given commit.hash and return them if (!is.null(message.id)) { result = private$pasta[private$pasta[["message.id"]] == message.id, "commit.hash"] + if (!(length(result) == 0) && is.na(result)) { + result = "" + } return(result) } else { result = private$pasta[private$pasta[["commit.hash"]] == commit.hash, "message.id"] + result = result[!is.na(result)] return(result) } }, + #' Get the names of all data sources that are currently cached in the + #' ProjectData object. The possible data sources are: + #' 'commits', 'mails', 'issues', 'authors', synchronicity', and 'pasta'. + #' 'data.timestamps' are tested implicitly every time as they only contain + #' the earliest and latest date of one data source. + #' + #' @return a vector containing all the names + get.cached.data.sources = function() { + result = c() + + ## main data sources + if (!is.null(private$commits)) { + result = c(result, "commits") + } + if (!is.null(private$mails)) { + result = c(result, "mails") + } + if (!is.null(private$issues)) { + result = c(result, "issues") + } + + ## author data + if (!is.null(private$authors)) { + result = c(result, "authors") + } + + ## additional data sources + if (!is.null(private$synchronicity)) { + result = c(result, "synchronicity") + } + if (!is.null(private$pasta)) { + result = c(result, "pasta") + } + + return(result) + }, + + #' Compares two ProjectData objects by first comparing the names of the + #' cached data sources of the two. + #' Then, it compares the ProjectConf objects and, in the end, the cached data + #' sources. + #' + #' @param other.data.object the object with which to compare + #' + #' @return \code{TRUE} if the objects are equal and \code{FALSE} otherwise + equals = function(other.data.object) { + + ## check whether the given object is an instance of ProjectData + if (!("ProjectData" %in% class(other.data.object))) { + logging::logerror("You can only compare a ProjectData object against another one.") + return(FALSE) + } + + ## check whether the two objects are of the same type + if (!identical(self$get.class.name(), other.data.object$get.class.name())) { + logging::logerror("You can only compare two instances of the same class.") + return(FALSE) + } + + self.data.sources = self$get.cached.data.sources() + other.data.sources = other.data.object$get.cached.data.sources() + + ## compare the list of cached data sources + if (!identical(self.data.sources, other.data.sources)) { + return(FALSE) + } + + ## compare the two ProjectConf objects + if (!identical(self$get.project.conf()$get.conf.as.string(), + other.data.object$get.project.conf()$get.conf.as.string())) { + return(FALSE) + } + + ## compare the cached data sources + for (source in self.data.sources) { + function.call = paste0("get.", source) + if (!identical(self[[function.call]](), other.data.object[[function.call]]())) { + return(FALSE) + } + } + + return(TRUE) + }, + ## * * data cutting ------------------------------------------------ #' Get the timestamps (earliest and latest date of activity) of the specified @@ -683,7 +778,7 @@ ProjectData = R6::R6Class("ProjectData", #' #' @param data.sources the specified data sources #' - #' @return a list of the cut data.sources + #' @return the RangeData object with cut data sources get.data.cut.to.same.date = function(data.sources = c("mails", "commits", "issues")) { ## check arguments data.sources = match.arg(arg = data.sources, several.ok = TRUE) @@ -938,6 +1033,36 @@ RangeData = R6::R6Class("RangeData", inherit = ProjectData, #' @return the revision callgraph get.revision.callgraph = function() { return(private$revision.callgraph) + }, + + #' Compares two RangeData objects by first comparing the ranges. Then it compares + #' the revision callgraphs and if they are equal, it calls the equals method of + #' ProjectData to compare the remaining data sources. + #' + #' @param other.data.object the object with which to compare + #' + #' @return \code{TRUE} if the objects are equal and \code{FALSE} otherwise + equals = function(other.data.object = NULL) { + + ## check whether the given object is an instance of RangeData + if (!("RangeData" %in% class(other.data.object))) { + logging::logerror("You can only compare a RangeData object against another one.") + return(FALSE) + } + + ## check whether the ranges are equal + if (!identical(self$get.range(), other.data.object$get.range())) { + return(FALSE) + } + + ## check whether the revision callgraphs are equal + if (!identical(self$get.revision.callgraph(), + other.data.object$get.revision.callgraph())) { + return(FALSE) + } + + ## check the equality of the remaining data sources + return(super$equals(other.data.object = other.data.object)) } ) ) diff --git a/util-misc.R b/util-misc.R index d3f24535..4442870b 100644 --- a/util-misc.R +++ b/util-misc.R @@ -361,7 +361,7 @@ construct.ranges = function(revs, sliding.window = FALSE, raw = FALSE) { #' #' Important: As the start of each range is supposed to be inclusive and the end of each range #' exclusive, 1 second is added to \code{end}. This way, the date \code{end} will be *included* -#' in the last range. +#' in the last range. To avoid that behavior, set parameter \code{include.end.date} to \code{FALSE}. #' #' Note: You may want to use the function \code{ProjectData$get.data.timestamps} with this #' function here. @@ -371,15 +371,25 @@ construct.ranges = function(revs, sliding.window = FALSE, raw = FALSE) { #' last range (see above) #' @param time.period The time period describing the length of the ranges, a character #' string, e.g., "3 mins" or "15 days" +#' @param imperfect.range.ratio The ratio of the \code{time.period} the last range has to last at least. +#' That is, if the last range would be shorter than ratio * \code{time.period}, +#' the last range will be combined with the second last range. +#' A ratio of 0.0 means that the last range could be as small as possible. +#' A ratio of 1.0 means that the last range has to last at least \code{time.period}. +#' [default: 0.0] +#' @param include.end.date whether to include the end date or not [default: TRUE] #' @param raw whether to return pairs of POSIXct objects or strings rather than #' formatted strings [default: FALSE] #' #' @return the constructed ranges, either formatted or raw; the raw ranges are a named list, #' for which the formatted ranges are the names -construct.consecutive.ranges = function(start, end, time.period, raw = FALSE) { +construct.consecutive.ranges = function(start, end, time.period, imperfect.range.ratio = 0.0, + include.end.date = TRUE, raw = FALSE) { ## just construct overlapping ranges without any overlap ;) - ranges = construct.overlapping.ranges(start, end, time.period, overlap = 0, raw) + ranges = construct.overlapping.ranges(start, end, time.period, overlap = 0, + imperfect.range.ratio = imperfect.range.ratio, + include.end.date = include.end.date, raw) return(ranges) } @@ -399,7 +409,7 @@ construct.consecutive.ranges = function(start, end, time.period, raw = FALSE) { #' #' Important: As the start of each range is supposed to be inclusive and the end of each range #' exclusive, 1 second is added to \code{end}. This way, the date \code{end} will be *included* -#' in the last range. +#' in the last range. To avoid that behavior, set parameter \code{include.end.date} to \code{FALSE}. #' #' Note: You may want to use the function \code{ProjectData$get.data.timestamps} with this #' function here. @@ -413,12 +423,20 @@ construct.consecutive.ranges = function(start, end, time.period, raw = FALSE) { #' (e.g., "3 mins" or "15 days") or a numeric indication the percentage of #' overlap (e.g., 1/4). Should be more than 0 seconds and must not be larger #' than the given \code{time.period}. +#' @param imperfect.range.ratio The ratio of the \code{time.period} the last range has to last at least. +#' That is, if the last range would be shorter than ratio * \code{time.period}, +#' the last range will be combined with the second last range. +#' A ratio of 0.0 means that the last range could be as small as possible. +#' A ratio of 1.0 means that the last range has to last at least \code{time.period}. +#' [default: 0.0] +#' @param include.end.date whether to include the end date or not [default: TRUE] #' @param raw whether to return pairs of POSIXct objects or strings rather than #' formatted strings [default: FALSE] #' #' @return the constructed ranges, either formatted or raw; the raw ranges are a named list, #' for which the formatted ranges are the names -construct.overlapping.ranges = function(start, end, time.period, overlap, raw = FALSE) { +construct.overlapping.ranges = function(start, end, time.period, overlap, imperfect.range.ratio = 0.0, + include.end.date = TRUE, raw = FALSE) { ## convert given periods to lubridate stuff: ## 1) time period @@ -431,7 +449,11 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, raw = } ## 3) the dates for theirselves start.date = get.date.from.string(start) - end.date = get.date.from.string(end) + 1 ## add 1 for inclusion of end.date + if (include.end.date) { + end.date = get.date.from.string(end) + 1 ## add 1 for inclusion of end.date + } else { + end.date = get.date.from.string(end) + } ## check the breaking case if (overlap >= time.period) { @@ -478,6 +500,23 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, raw = return(c(bin.start, bin.end)) }) + # if wanted, check for imperfect range in the end: + if (imperfect.range.ratio > 0) { + ## 1) get the last range + last.range = ranges.raw[[bins.number]] + ## 2) get the last range's duration + last.range.duration = lubridate::as.duration(lubridate::interval(last.range[1], last.range[2])) + ## 3) check if the last range is too short + is.too.short = last.range.duration < imperfect.range.ratio * time.period + ## 4) combine the last range with the second-last one, if the last one is too short + if (bins.number > 1 && is.too.short) { + ## extend second-last range until end.date + ranges.raw[[bins.number - 1]][2] = end.date + ## remove last range + ranges.raw = ranges.raw[-bins.number] + } + } + ## construct actual range strings (without names) ranges = sapply(ranges.raw, construct.ranges, sliding.window = FALSE, raw = FALSE) ranges = unname(ranges) @@ -508,7 +547,7 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, raw = #' #' Important: As the start of each range is supposed to be inclusive and the end of each range #' exclusive, 1 second is added to \code{end}. This way, the date \code{end} will be *included* -#' in the last range. +#' in the last range. To avoid that behavior, set parameter \code{include.end.date} to \code{FALSE}. #' #' Note: You may want to use the function \code{ProjectData$get.data.timestamps} with this #' function here. @@ -518,15 +557,25 @@ construct.overlapping.ranges = function(start, end, time.period, overlap, raw = #' last range (see above) #' @param time.period The time period describing the length of the ranges, a character #' string, e.g., "3 mins" or "15 days" +#' @param imperfect.range.ratio The ratio of the \code{time.period} the last range has to last at least. +#' That is, if the last range would be shorter than ratio * \code{time.period}, +#' the last range will be combined with the second last range. +#' A ratio of 0.0 means that the last range could be as small as possible. +#' A ratio of 1.0 means that the last range has to last at least \code{time.period}. +#' [default: 0.0] +#' @param include.end.date whether to include the end date or not [default: TRUE] #' @param raw whether to return pairs of POSIXct objects or strings rather than #' formatted strings [default: FALSE] #' #' @return the constructed ranges, either formatted or raw; the raw ranges are a named list, #' for which the formatted ranges are the names -construct.cumulative.ranges = function(start, end, time.period, raw = FALSE) { +construct.cumulative.ranges = function(start, end, time.period, imperfect.range.ratio = 0.0, + include.end.date = TRUE, raw = FALSE) { ## get the consecutive ranges to alter them afterwards - ranges.consecutive = construct.overlapping.ranges(start, end, time.period, overlap = 0, raw = TRUE) + ranges.consecutive = construct.overlapping.ranges(start, end, time.period, overlap = 0, + imperfect.range.ratio = imperfect.range.ratio, + include.end.date = include.end.date, raw = TRUE) ## set the start of each range to global start date ranges.raw = lapply(ranges.consecutive, function(range.bounds) { diff --git a/util-networks-metrics.R b/util-networks-metrics.R index 4d1ec5d3..5a35c8c2 100644 --- a/util-networks-metrics.R +++ b/util-networks-metrics.R @@ -14,6 +14,7 @@ ## Copyright 2015 by Thomas Bock ## Copyright 2017 by Raphael Nömmer ## Copyright 2017-2018 by Claus Hunsen +## Copyright 2017-2018 by Christian Hechtl ## All Rights Reserved. @@ -190,12 +191,15 @@ metrics.scale.freeness = function(network) { #' @param network the network to be examined #' #' @return A dataframe containing the logarithm of the node degree and the logarithm -#' of the local clustering coefficient for each node. +#' of the local clustering coefficient for each node as well as the non-logarithmic values +#' for these. metrics.hierarchy = function(network) { degrees = igraph::degree(network, mode = "total") cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL) degrees.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) - return(data.frame(log.deg = log(degrees.without.cluster.coeff), log.cc = log(cluster.coeff))) + return(data.frame(deg = degrees.without.cluster.coeff, cc = cluster.coeff, + log.deg = log(degrees.without.cluster.coeff), log.cc = log(cluster.coeff))) } + diff --git a/util-networks.R b/util-networks.R index 2948a9f4..bab3f274 100644 --- a/util-networks.R +++ b/util-networks.R @@ -15,6 +15,7 @@ ## Copyright 2017 by Raphael Nömmer ## Copyright 2017 by Christian Hechtl ## Copyright 2017-2018 by Thomas Bock +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. @@ -48,6 +49,7 @@ EDGE.ATTR.HANDLING = list( ## network-analytic data weight = "sum", type = "first", + relation = "first", ## commit data changed.files = "sum", @@ -85,8 +87,27 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.network.mail = NULL, # igraph artifacts.network.issue = NULL, # igraph - ## * * data cutting --------------------------------------------- + ## * * relation-to-vertex-kind mapping ----------------------------- + #' Determine which vertex kind should be chose for the vertex depending on the relation + #' between the vertices. + #' + #' @param relation the given relation + #' + #' @return the vertex kind to be used + get.vertex.kind.for.relation = function(relation) { + + vertex.kind = switch(relation, + cochange = private$proj.data$get.project.conf.entry("artifact.codeface"), + callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), + mail = "MailThread", + issue = "Issue" + ) + + return(vertex.kind) + }, + + ## * * data cutting ------------------------------------------------ #' Cut the data sources of the data object to the same date ranges. cut.data.to.same.timestamps = function() { @@ -120,13 +141,21 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$authors.network.cochange) } - ## construct network based on artifact2author data - author.net = construct.network.from.list( + ## construct edge list based on artifact2author data + author.net.data = construct.edge.list.from.key.value.list( private$proj.data$get.artifact2author(), network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) + ## construct network from obtained data + author.net = construct.network.from.edge.list( + author.net.data[["vertices"]], + author.net.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("author.directed") + ) + ## store network private$authors.network.cochange = author.net logging::logdebug("get.author.network.cochange: finished.") @@ -148,17 +177,27 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$authors.network.mail) } - author.relation = construct.network.from.list( + + ## construct edge list based on thread2author data + author.net.data = construct.edge.list.from.key.value.list( private$proj.data$get.thread2author(), network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) + ## construct network from obtained data + author.net = construct.network.from.edge.list( + author.net.data[["vertices"]], + author.net.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("author.directed") + ) + ## store network - private$authors.network.mail = author.relation + private$authors.network.mail = author.net logging::logdebug("get.author.network.mail: finished.") - return(author.relation) + return(author.net) }, ##get the issue based author relation as network @@ -170,16 +209,25 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$authors.network.issue) } - author.relation = construct.network.from.list( + ## construct edge list based on issue2author data + author.net.data = construct.edge.list.from.key.value.list( private$proj.data$get.issue2author(), network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed") ) - private$authors.network.issue = author.relation + ## construct network from obtained data + author.net = construct.network.from.edge.list( + author.net.data[["vertices"]], + author.net.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("author.directed") + ) + + private$authors.network.issue = author.net logging::logdebug("get.author.network.issue: finished.") - return(author.relation) + return(author.net) }, ## * * artifact networks ------------------------------------------- @@ -198,8 +246,18 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$artifacts.network.cochange) } - artifacts.net = construct.network.from.list( - private$proj.data$get.commit2artifact(), + ## construct edge list based on commit2artifact data + artifacts.net.data.raw = private$proj.data$get.commit2artifact() + artifacts.net.data = construct.edge.list.from.key.value.list( + artifacts.net.data.raw, + network.conf = private$network.conf, + directed = FALSE + ) + + ## construct network from obtained data + artifacts.net = construct.network.from.edge.list( + artifacts.net.data[["vertices"]], + artifacts.net.data[["edges"]], network.conf = private$network.conf, directed = FALSE ) @@ -243,6 +301,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## read network from disk artifacts.net = igraph::read.graph(file, format = "pajek") + # set vertex labels properly (copy "id" attribute to "name" attribute) artifacts.net = igraph::set.vertex.attribute( artifacts.net, @@ -277,7 +336,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", } ## (3) set processed names inside graph object artifacts.net = igraph::set.vertex.attribute(artifacts.net, "name", value = names) - artifacts.net = igraph::set.vertex.attribute(artifacts.net, "id", value = names) + + ## set edge attribute 'artifact.type' as the raw data do not contain this! + artifacts.net = igraph::set.edge.attribute( + artifacts.net, "artifact.type", + value = private$proj.data$get.project.conf.entry("artifact.codeface") + ) ## store network private$artifacts.network.callgraph = artifacts.net @@ -308,9 +372,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## construct empty network directed = private$network.conf$get.value("artifact.directed") - artifacts = names(private$proj.data$get.thread2author()) # thread IDs - artifacts.net = create.empty.network(directed = directed) + - igraph::vertices(artifacts, type = TYPE.ARTIFACT) + artifacts.net.data.raw = private$proj.data$get.thread2author() + artifacts = names(artifacts.net.data.raw) # thread IDs + artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts) ## store network private$artifacts.network.mail = artifacts.net @@ -341,9 +405,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## construct empty network directed = private$network.conf$get.value("artifact.directed") - artifacts = names(private$proj.data$get.issue2author()) # thread IDs - artifacts.net = create.empty.network(directed = directed) + - igraph::vertices(artifacts, type = TYPE.ARTIFACT) + artifacts.net.data.raw = private$proj.data$get.issue2author() + artifacts = names(artifacts.net.data.raw) # thread IDs + artifacts.net = create.empty.network(directed = directed) + igraph::vertices(artifacts) ## store network private$artifacts.network.issue = artifacts.net @@ -352,44 +416,48 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(artifacts.net) }, - ## * * bipartite relation ------------------------------------------ + ## * * bipartite relations ------------------------------------------ - #' Get the key-value data for the bipartite relation, - #' which is implied by the "artifact.relation" from the network configuration. + #' Get the key-value data for the bipartite relations, + #' which are implied by the "artifact.relation" from the network configuration. #' - #' @return the data for the bipartite relation, with the attribute - #' 'artifact.type' denoting the artifact type for the relation - get.bipartite.relation = function() { - logging::logdebug("get.bipartite.relation: starting.") - - relation = private$network.conf$get.variable("artifact.relation") - logging::logdebug("Using bipartite relation '%s'.", relation) - - switch( - relation, - cochange = { - bip.relation = private$proj.data$get.author2artifact() - artifact.type =private$proj.data$get.project.conf.entry("artifact") - }, - callgraph = { - bip.relation = private$proj.data$get.author2artifact() - artifact.type =private$proj.data$get.project.conf.entry("artifact") - }, - mail = { - bip.relation = private$proj.data$get.author2thread() - artifact.type = "thread" - }, - issue = { - bip.relation = private$proj.data$get.author2issue() - artifact.type = "issue.id" - } - ) - - ## set artifact.type attribute - attr(bip.relation, "artifact.type") = artifact.type + #' @return the list of data for the bipartite relations, each with the attributes + #' 'vertex.kind' denoting the artifact type for the relations + #' and 'relation' denoting the respective network-configuration entry + get.bipartite.relations = function() { + logging::logdebug("get.bipartite.relations: starting.") + + relations = private$network.conf$get.variable("artifact.relation") + logging::logdebug("Using bipartite relations '%s'.", relations) + + bip.relations = lapply(relations, function(relation) { + ## get the actual relation and the artifact type + switch( + relation, + cochange = { + bip.relation = private$proj.data$get.author2artifact() + }, + callgraph = { + bip.relation = private$proj.data$get.author2artifact() + }, + mail = { + bip.relation = private$proj.data$get.author2thread() + }, + issue = { + bip.relation = private$proj.data$get.author2issue() + } + ) + + ## set vertex.kind and relation attributes + attr(bip.relation, "vertex.kind") = private$get.vertex.kind.for.relation(relation) + attr(bip.relation, "relation") = relation + + return (bip.relation) + }) + names(bip.relations) = relations - logging::logdebug("get.bipartite.relation: finished.") - return(bip.relation) + logging::logdebug("get.bipartite.relations: finished.") + return(bip.relations) } ), @@ -402,7 +470,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' given data object and the network configuration #' #' @param project.data the given data object - #' @param network.conf the network configuration + #' @param network.conf the network configuration initialize = function(project.data, network.conf) { ## check arguments @@ -450,7 +518,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' #' @param network.conf the new network configuration. #' @param reset.environment parameter to determine whether the environment - #' has to be reset or not + #' has to be reset or not [default: FALSE] set.network.conf = function(network.conf, reset.environment = FALSE) { private$network.conf = network.conf @@ -497,14 +565,24 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::loginfo("Constructing author network.") ## construct network - relation = private$network.conf$get.value("author.relation") - net = switch( - relation, - cochange = private$get.author.network.cochange(), - mail = private$get.author.network.mail(), - issue = private$get.author.network.issue(), - stop(sprintf("The author relation '%s' does not exist.", relation)) - ) + relations = private$network.conf$get.value("author.relation") + networks = lapply(relations, function(relation) { + network = switch( + relation, + cochange = private$get.author.network.cochange(), + mail = private$get.author.network.mail(), + issue = private$get.author.network.issue(), + stop(sprintf("The author relation '%s' does not exist.", rel)) + ## TODO construct edge lists here and merge those (inline the private methods) + ) + + ## set edge attributes on all edges + igraph::E(network)$type = TYPE.EDGES.INTRA + igraph::E(network)$relation = relation + + return(network) + }) + net = merge.networks(networks) ## add all missing authors to the network if wanted if (private$network.conf$get.value("author.all.authors")) { @@ -517,15 +595,18 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", if (private$network.conf$get.value("author.only.committers")) { ## authors-artifact relation authors.from.net = igraph::get.vertex.attribute(net, "name") - authors.from.artifacts = names(private$get.bipartite.relation()) + authors.from.artifacts = lapply(private$get.bipartite.relations(), function(bipartite.relation) { + return(names(bipartite.relation)) + }) + authors.from.artifacts = unlist(authors.from.artifacts) if (!is.null(authors.from.artifacts)) { net = igraph::delete.vertices(net, setdiff(authors.from.net, authors.from.artifacts)) } } - ## set vertex and edge attributes for identifaction + ## set vertex attributes for identifaction + igraph::V(net)$kind = TYPE.AUTHOR igraph::V(net)$type = TYPE.AUTHOR - igraph::E(net)$type = TYPE.EDGES.INTRA ## add range attribute for later analysis (if available) if ("RangeData" %in% class(private$proj.data)) { @@ -542,20 +623,31 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::loginfo("Constructing artifact network.") ## construct network - relation = private$network.conf$get.value("artifact.relation") - - net = switch( - relation, - cochange = private$get.artifact.network.cochange(), - callgraph = private$get.artifact.network.callgraph(), - mail = private$get.artifact.network.mail(), - issue = private$get.artifact.network.issue(), - stop(sprintf("The artifact relation '%s' does not exist.", relation)) - ) + relations = private$network.conf$get.value("artifact.relation") + networks = lapply(relations, function(relation) { + network = switch( + relation, + cochange = private$get.artifact.network.cochange(), + callgraph = private$get.artifact.network.callgraph(), + mail = private$get.artifact.network.mail(), + issue = private$get.artifact.network.issue(), + stop(sprintf("The artifact relation '%s' does not exist.", relation)) + ) + + ## set edge attributes on all edges + igraph::E(network)$type = TYPE.EDGES.INTRA + igraph::E(network)$relation = relation + + ## set vertex attribute 'kind' on all edges, corresponding to relation + vertex.kind = private$get.vertex.kind.for.relation(relation) + network = igraph::set.vertex.attribute(network, "kind", value = vertex.kind) + + return(network) + }) + net = merge.networks(networks) ## set vertex and edge attributes for identifaction igraph::V(net)$type = TYPE.ARTIFACT - igraph::E(net)$type = TYPE.EDGES.INTRA ## add range attribute for later analysis (if available) if ("RangeData" %in% class(private$proj.data)) { @@ -570,44 +662,70 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' @return the bipartite network get.bipartite.network = function() { ## get data by the chosen relation - net.to.net = private$get.bipartite.relation() - artifact.type = attr(net.to.net, "artifact.type") + bipartite.relation.data = private$get.bipartite.relations() + directed = private$network.conf$get.value("author.directed") - ## extract vertices for author network - if (private$network.conf$get.value("author.all.authors")) { - authors = private$proj.data$get.authors()[[ "author.name" ]] - } else { - authors = names(net.to.net) - } + vertex.data = lapply(bipartite.relation.data, function(net.to.net) { - ## construct networks from vertices: - directed = private$network.conf$get.value("author.directed") - ## 1) author network - authors.net = create.empty.network(directed = directed) + - igraph::vertices(authors, name = authors, type = TYPE.AUTHOR) - ## 2) artifact network - artifact.vertices = unique(unlist(lapply(net.to.net, function(df) { - return(df[["data.vertices"]]) - }))) - artifact.net = create.empty.network(directed = directed) + - igraph::vertices(artifact.vertices, name = artifact.vertices, type = TYPE.ARTIFACT, artifact.type = artifact.type) - ## 3) combine both networks and bipartite relation - u = combine.networks(authors.net, artifact.net, net.to.net, network.conf = private$network.conf) + ## extract vertices for author network + if (private$network.conf$get.value("author.all.authors")) { + author.vertices = private$proj.data$get.authors()[[ "author.name" ]] + } else { + author.vertices = names(net.to.net) + } + + ## select all artifact vertices + artifact.vertices = unique(unlist(lapply(net.to.net, function(df) { + return(df[["data.vertices"]]) + }))) + + ## get vertex.kind from bipartite relation + vertex.kind = attr(net.to.net, "vertex.kind") + + ## join author and artifact vertices to the list of all vertices in the network + vertices = data.frame( + name = c(author.vertices, artifact.vertices), + type = c( + rep(TYPE.AUTHOR, length(author.vertices)), + rep(TYPE.ARTIFACT, length(artifact.vertices)) + ), + kind = c( + rep(TYPE.AUTHOR, length(author.vertices)), + rep(vertex.kind , length(artifact.vertices)) + ) + ) + return(vertices) + }) + + ## Merge network data and construct a vertex-only network for now + vertex.data = merge.network.data(vertex.data = vertex.data, edge.data = NULL)[["vertices"]] + vertex.network = igraph::graph.data.frame( + d = create.empty.edge.list(), + vertices = vertex.data, + directed = directed + ) + + ## Add edges to the vertex network + network = add.edges.for.bipartite.relation( + vertex.network, + bipartite.relations = bipartite.relation.data, + private$network.conf + ) ## remove vertices that are not committers if wanted if (private$network.conf$get.value("author.only.committers")) { committers = unique(private$proj.data$get.commits()[["author.name"]]) - authors = igraph::get.vertex.attribute(u, "name", igraph::V(u)[ type == TYPE.AUTHOR ]) + authors = igraph::get.vertex.attribute(network, "name", igraph::V(network)[ type == TYPE.AUTHOR ]) authors.to.remove = setdiff(authors, committers) - u = igraph::delete.vertices(u, authors.to.remove) + network = igraph::delete.vertices(network, authors.to.remove) } ## add range attribute for later analysis (if available) if ("RangeData" %in% class(private$proj.data)) { - attr(u, "range") = private$proj.data$get.range() + attr(network, "range") = private$proj.data$get.range() } - return(u) + return(network) }, #' Get all networks as list. @@ -618,7 +736,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::loginfo("Constructing all networks.") ## author-artifact relation - authors.to.artifacts = private$get.bipartite.relation() + authors.to.artifacts = private$get.bipartite.relations() ## bipartite network bipartite.net = self$get.bipartite.network() ## author relation @@ -646,6 +764,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", networks = self$get.networks() authors.to.artifacts = networks[["authors.to.artifacts"]] authors.net = networks[["authors.net"]] + igraph::V(authors.net)$kind = TYPE.AUTHOR artifacts.net = networks[["artifacts.net"]] ## remove authors from author-artifact relation, if needed: @@ -653,16 +772,33 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## configured by 'author.only.committers', for example), thus, we need to remove any authors ## from the author--artifact relation that are superfluous authors.from.net = igraph::get.vertex.attribute(authors.net, "name") - authors.from.artifacts = names(authors.to.artifacts) - authors.to.artifacts = authors.to.artifacts[ intersect(authors.from.net, authors.from.artifacts) ] + ## save relation and intersect the author vertices from the author network and the + ## bipartite networks + authors.to.artifacts = mapply(function(a2a.rel, relation.type) { + authors.from.artifacts = names(a2a.rel) + a2a = a2a.rel[ intersect(authors.from.net, authors.from.artifacts) ] + attr(a2a, "relation") = relation.type + return(a2a) + }, authors.to.artifacts, names(authors.to.artifacts), SIMPLIFY = FALSE) + ## unify vertices with artifacts from bipartite and artifact relation: ## when the source for artifacts is different for the bipartite relation and the artifact relation (e.g., ## "cochange" and "callgraph"), then the vertex names need to be unified so that all vertices are ## represented properly and, more important here, all bipartite relations can be added - artifacts.all = unique(plyr::rbind.fill(authors.to.artifacts)[[ "data.vertices" ]]) + artifacts = lapply(authors.to.artifacts, function(a2a.rel) { + artifact.list = plyr::rbind.fill(a2a.rel)[ c("data.vertices", "artifact.type") ] + artifact.list = unique(artifact.list) + return(artifact.list) + }) + artifacts.all = plyr::rbind.fill(artifacts) + artifacts.from.net = igraph::get.vertex.attribute(artifacts.net, "name") - browser(expr = length(setdiff(artifacts.all, artifacts.from.net)) > 0) - artifacts.net = artifacts.net + igraph::vertices(setdiff(artifacts.all, artifacts.from.net), type = TYPE.ARTIFACT) + artifacts.to.add = setdiff(artifacts.all[["data.vertices"]], artifacts.from.net) + artifacts.to.add.kind = artifacts.all[ + artifacts.all[["data.vertices"]] %in% artifacts.to.add, "artifact.type" + ] + artifacts.net = artifacts.net + igraph::vertices(artifacts.to.add, type = TYPE.ARTIFACT, + kind = artifacts.to.add.kind) ## check directedness and adapt artifact network if needed if (igraph::is.directed(authors.net) && !igraph::is.directed(artifacts.net)) { @@ -678,6 +814,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", gc() ## combine the networks + ## TODO use merge.networks and add.edges.for.bipartite.relations here (remove combine.networks then!) u = combine.networks(authors.net, artifacts.net, authors.to.artifacts, network.conf = private$network.conf) @@ -696,21 +833,24 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Network and edge construction ------------------------------------------- -#' Construct a dependency network from the given list of lists. +#' Construct a edge list from the given list of lists. #' For example for a list of authors per thread, where all authors are connected if they are #' in the same thread (sublist). #' +#' Important: The input needs to be compatible with the function \code{get.key.to.value.from.df}. +#' #' If directed the order of things in the sublist is respected and the 'edge.attr's hold the #' vector of possible edge attributes in the given list. #' #' @param list the list of lists with data #' @param network.conf the network configuration -#' @param directed whether or not the network should be directed +#' @param directed whether or not the network should be directed [default: FALSE] #' -#' @return the built network -construct.network.from.list = function(list, network.conf, directed = FALSE) { +#' @return a list of two data.frames named 'vertices' and 'edges' (compatible with return value +#' of \code{igraph::as.data.frame}) +construct.edge.list.from.key.value.list = function(list, network.conf, directed = FALSE) { logging::loginfo("Create edges.") - logging::logdebug("construct.network.from.list: starting.") + logging::logdebug("construct.edge.list.from.key.value.list: starting.") # initialize an edge list to fill and the set of nodes nodes.processed = c() @@ -833,10 +973,35 @@ construct.network.from.list = function(list, network.conf, directed = FALSE) { } + logging::logdebug("construct.edge.list.from.key.value.list: finished.") + + return(list( + vertices = data.frame( + name = nodes.processed + ), + edges = edge.list + )) +} + +#' Construct a network from the given lists of vertices and edges. +#' For example for a list of authors per thread, where all authors are connected if they are +#' in the same thread (sublist). +#' +#' If directed the order of things in the sublist is respected and the 'edge.attr's hold the +#' vector of possible edge attributes in the given list. +#' +#' @param vertices data frame with vertex data +#' @param edge.list list of edges +#' @param network.conf the network configuration +#' @param directed whether or not the network should be directed [default: FALSE] +#' +#' @return the built network +construct.network.from.edge.list = function(vertices, edge.list, network.conf, directed = FALSE) { + logging::logdebug("construct.network.from.edge.list: starting.") logging::loginfo("Construct network from edges.") ## get unique list of vertices to produce - nodes.processed = unique(nodes.processed) + nodes.processed = unique(vertices) # if we do not have nodes AND the edge.list is empty, return rightaway if (length(nodes.processed) == 0) { @@ -854,14 +1019,14 @@ construct.network.from.list = function(list, network.conf, directed = FALSE) { net = igraph::graph.data.frame(edge.list, directed = directed, vertices = nodes.processed) } - net = igraph::set.vertex.attribute(net, "id", value = igraph::get.vertex.attribute(net, "name")) net = igraph::set.edge.attribute(net, "weight", value = 1) # transform multiple edges to edge weights - if (network.conf$get.value("simplify")) + if (network.conf$get.value("simplify")) { net = simplify.network(net) + } - logging::logdebug("construct.network.from.list: finished.") + logging::logdebug("construct.network.from.edge.list: finished.") return(net) } @@ -898,46 +1063,106 @@ combine.networks = function(net1, net2, net1.to.net2, network.conf) { return(u) } +#' Merges a list vertex data frame and merges a list of edge +#' data frames +#' +#' @param vertex.data the list of vertex data frames +#' @param edge.data the list of edge data frames +#' +#' @return list containing one edge data frame and one vertex data frame +merge.network.data = function(vertex.data, edge.data) { + logging::logdebug("merge.network.data: starting.") + + vertices = plyr::rbind.fill(vertex.data) + + ## select unique vertices + vertices = unique.data.frame(vertices) + edges = plyr::rbind.fill(edge.data) + + logging::logdebug("merge.network.data: finished.") + return(list( + vertices = vertices, + edges = edges + )) +} + +#' Merges a list of networks to one big network +#' +#' @param networks the list of networks +#' +#' @return the built one network +merge.networks = function(networks) { + logging::logdebug("merge.networks: starting.") + + ## list with all vertex data frames + vertex.data = lapply(networks, function(network) { + return(igraph::as_data_frame(network, what = "vertices")) + }) + + ## list of all edge data frames + edge.data = lapply(networks, function(network) { + return(igraph::as_data_frame(network, what = "edges")) + }) + + ## merge all edge and vertex data frames + new.network.data = merge.network.data(vertex.data, edge.data) + + ## build whole network form edge and vertex data frame + whole.network = igraph::graph.data.frame( + new.network.data[["edges"]], + vertices = new.network.data[["vertices"]], + directed = igraph::is.directed(networks[[1]]) + ) + + logging::logdebug("merge.networks: finished.") + + return(whole.network) +} + #' Add vertex relations to the given network. #' #' @param net the network -#' @param net1.to.net2 the vertex relations to add to the given network +#' @param bipartite.relations the list of the vertex relations to add to the given network for +#' all configured relations #' @param network.conf the network configuration #' #' @return the adjusted network -add.edges.for.bipartite.relation = function(net, net1.to.net2, network.conf) { +add.edges.for.bipartite.relation = function(net, bipartite.relations, network.conf) { - ## construct edges (i.e., a vertex sequence with c(source, target, source, target, ...)) - vertex.sequence.for.edges = parallel::mcmapply(function(d, a.df) { - a = a.df[["data.vertices"]] - new.edges = lapply(a, function(vert) { - igraph::V(net)[d, vert] # get two vertices from source network: c(author, artifact) - }) - return(new.edges) - }, names(net1.to.net2), net1.to.net2) + ## iterate about all bipartite.relations depending on the relation type + for (net1.to.net2 in bipartite.relations) { - ## get extra edge attributes - extra.edge.attributes.df = parallel::mclapply(net1.to.net2, function(a.df) { - cols.which = network.conf$get.value("edge.attributes") %in% colnames(a.df) - return(a.df[, network.conf$get.value("edge.attributes")[cols.which], drop = FALSE]) - }) - extra.edge.attributes.df = plyr::rbind.fill(extra.edge.attributes.df) - extra.edge.attributes.df["weight"] = 1 # add weight + ## construct edges (i.e., a vertex sequence with c(source, target, source, target, ...)) + vertex.sequence.for.edges = parallel::mcmapply(function(d, a.df) { + a = a.df[["data.vertices"]] + new.edges = lapply(a, function(vert) { + igraph::V(net)[d, vert] # get two vertices from source network: c(author, artifact) + }) + return(new.edges) + }, names(net1.to.net2), net1.to.net2) - extra.edge.attributes = as.list(extra.edge.attributes.df) + ## get extra edge attributes + extra.edge.attributes.df = parallel::mclapply(net1.to.net2, function(a.df) { + cols.which = network.conf$get.value("edge.attributes") %in% colnames(a.df) + return(a.df[, network.conf$get.value("edge.attributes")[cols.which], drop = FALSE]) + }) + extra.edge.attributes.df = plyr::rbind.fill(extra.edge.attributes.df) + extra.edge.attributes.df["weight"] = 1 # add weight + extra.edge.attributes.df["type"] = TYPE.EDGES.INTER # add egde type + extra.edge.attributes.df["relation"] = attr(net1.to.net2, "relation") # add relation type - ## set edge type - extra.edge.attributes = c(extra.edge.attributes, list(type = TYPE.EDGES.INTER)) + extra.edge.attributes = as.list(extra.edge.attributes.df) - ## add the vertex sequences as edges to the network - new.net = igraph::add_edges(net, unlist(vertex.sequence.for.edges), attr = extra.edge.attributes) + ## add the vertex sequences as edges to the network + net = igraph::add_edges(net, unlist(vertex.sequence.for.edges), attr = extra.edge.attributes) + } - return(new.net) + return(net) } #' Create an empty network that does not break the algorithms. #' -#' @param directed whether or not the network should be directed +#' @param directed whether or not the network should be directed [default: TRUE] #' #' @return the new empty network create.empty.network = function(directed = TRUE) { @@ -952,6 +1177,14 @@ create.empty.network = function(directed = TRUE) { return(net) } +#' Create an empty data frame that does not break the algorithms and +#' represents an edge list. +#' +#' @return the new empty data frame as edge list +create.empty.edge.list = function() { + return(data.frame(from = character(0), to = character(0))) +} + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Network simplification -------------------------------------------------- @@ -959,14 +1192,41 @@ create.empty.network = function(directed = TRUE) { #' Simplify a given network. #' #' @param network the given network +#' @param remove.multiple whether to contract multiple edges between the same pair of nodes [default: TRUE] +#' @param remove.loops whether to remove loops [default: TRUE] #' #' @return the simplified network -simplify.network = function(network) { +simplify.network = function(network, remove.multiple = TRUE, remove.loops = TRUE) { logging::logdebug("simplify.network: starting.") logging::loginfo("Simplifying network.") - ## simplify networks (contract edges and remove loops) - network = igraph::simplify(network, edge.attr.comb = EDGE.ATTR.HANDLING, remove.loops = TRUE) + if (length(unique(igraph::get.edge.attribute(network, "relation"))) > 1) { + ## data frame of the network + edge.data = igraph::as_data_frame(network, what = "edges") + vertex.data = igraph::as_data_frame(network, what = "vertices") + + ## select edges of one relation, build the network and simplify this network + networks = lapply(unique(edge.data[["relation"]]), + function(relation) { + network.data = edge.data[edge.data[["relation"]] == relation, ] + net = igraph::graph_from_data_frame(d = network.data, + vertices = vertex.data, + directed = igraph::is.directed(network)) + + ## simplify networks (contract edges and remove loops) + net = igraph::simplify(net, edge.attr.comb = EDGE.ATTR.HANDLING, + remove.multiple = remove.multiple, + remove.loops = remove.loops) + ## TODO perform simplification on edge list? + return(net) + }) + + ## merge the simplified networks + network = merge.networks(networks) + } else { + network = igraph::simplify(network, edge.attr.comb = EDGE.ATTR.HANDLING, + remove.multiple = remove.multiple, remove.loops = remove.loops) + } logging::logdebug("simplify.network: finished.") return(network) @@ -975,16 +1235,19 @@ simplify.network = function(network) { #' Simplify a list of networks. #' #' @param networks the list of networks +#' @param remove.multiple whether to contract multiple edges between the same pair of nodes [default: TRUE] +#' @param remove.loops whether to remove loops [default: TRUE] #' #' @return the simplified networks -simplify.networks = function(networks) { +simplify.networks = function(networks, remove.multiple = TRUE, remove.loops = TRUE) { logging::logdebug("simplify.networks: starting.") logging::loginfo( "Simplifying networks (names = [%s]).", paste(names(networks), collapse = ", ") ) - nets = parallel::mclapply(networks, simplify.network) + nets = parallel::mclapply(networks, simplify.network, remove.multiple = remove.multiple, + remove.loops = remove.loops) logging::logdebug("simplify.networks: finished.") return(nets) diff --git a/util-plot.R b/util-plot.R index c4c16c64..095cbb60 100644 --- a/util-plot.R +++ b/util-plot.R @@ -12,6 +12,7 @@ ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## ## Copyright 2017-2018 by Claus Hunsen +## Copyright 2018 by Barbara Eckl ## All Rights Reserved. @@ -35,23 +36,8 @@ PLOT.VERTEX.TYPE.ARTIFACT = TYPE.ARTIFACT # "Artifact" PLOT.VERTEX.SIZE = 10 PLOT.VERTEX.SIZE.LEGEND = PLOT.VERTEX.SIZE / 2 -## colors for vertices and edges (colored) -PLOT.COLORS.BY.TYPE.VERTEX = c("#00AEFF", "#FF8B00") -names(PLOT.COLORS.BY.TYPE.VERTEX) = c(TYPE.AUTHOR, TYPE.ARTIFACT) -PLOT.COLORS.BY.TYPE.EDGE = c("#999999", "#14CC3B") -names(PLOT.COLORS.BY.TYPE.EDGE) = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTER) - -## colors for vertices and edges (grayscale) -PLOT.COLORS.BY.TYPE.VERTEX.GRAY = c("gray40", "gray30") -names(PLOT.COLORS.BY.TYPE.VERTEX.GRAY) = c(TYPE.AUTHOR, TYPE.ARTIFACT) -PLOT.COLORS.BY.TYPE.EDGE.GRAY = c("gray60", "gray40") -names(PLOT.COLORS.BY.TYPE.EDGE.GRAY) = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTER) - -## shapes of vertices and edges -PLOT.SHAPE.VERTEX = c(16, 15) # (authors, artifacts) -names(PLOT.SHAPE.VERTEX) = c(TYPE.AUTHOR, TYPE.ARTIFACT) -PLOT.SHAPE.EDGE = c("dashed", "solid") # (unipartite, bipartite) -names(PLOT.SHAPE.EDGE) = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTER) +## vertex-label color +PLOT.VERTEX.LABEL.COLOR = "gray60" ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -67,17 +53,16 @@ names(PLOT.SHAPE.EDGE) = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTER) #' #' Note: The names for the vertex types are taken from the variables \code{PLOT.VERTEX.TYPE.AUTHOR} and #' \code{PLOT.VERTEX.TYPE.ARTIFACT}. The defaults are \code{"Developer"} and \code{TYPE.ARTIFACT}, respectively. +#' All loops are deleted for plotting the network. #' #' @param network the network to plot and print #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] -#' @param grayscale logical indicating whether the plot is to be in grayscale, by default, it is colored -#' [default: FALSE] #' #' @return the network (invisibly) #' #' @aliases plot.print.network -plot.network = function(network, labels = TRUE, grayscale = FALSE) { - plot.print.network(network, labels = labels, grayscale = grayscale) +plot.network = function(network, labels = TRUE) { + plot.print.network(network, labels = labels) } #' Construct a ggplot2/ggraph plot object for the given network and print it directly. @@ -90,17 +75,16 @@ plot.network = function(network, labels = TRUE, grayscale = FALSE) { #' #' Note: The names for the vertex types are taken from the variables \code{PLOT.VERTEX.TYPE.AUTHOR} and #' \code{PLOT.VERTEX.TYPE.ARTIFACT}. The defaults are \code{"Developer"} and \code{TYPE.ARTIFACT}, respectively. +#' All loops are deleted for plotting the network. #' #' @param network the network to plot and print #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] -#' @param grayscale logical indicating whether the plot is to be in grayscale, by default, it is colored -#' [default: FALSE] #' #' @return the network (invisibly) #' #' @aliases plot.network -plot.print.network = function(network, labels = TRUE, grayscale = FALSE) { - p = plot.get.plot.for.network(network, labels = labels, grayscale = grayscale) +plot.print.network = function(network, labels = TRUE) { + p = plot.get.plot.for.network(network, labels = labels) print(p) } @@ -114,27 +98,13 @@ plot.print.network = function(network, labels = TRUE, grayscale = FALSE) { #' #' Note: The names for the vertex types are taken from the variables \code{PLOT.VERTEX.TYPE.AUTHOR} and #' \code{PLOT.VERTEX.TYPE.ARTIFACT}. The defaults are \code{"Developer"} and \code{TYPE.ARTIFACT}, respectively. +#' All loops are deleted for plotting the network. #' #' @param network the network to plot #' @param labels logical indicating whether vertex lables should be plotted [default: TRUE] -#' @param grayscale logical indicating whether the plot is to be in grayscale, by default, it is colored -#' [default: FALSE] #' #' @return a ggplot2/ggraph plot object -plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) { - ## set colors for grayscale - if (grayscale) { - colors.vertex = PLOT.COLORS.BY.TYPE.VERTEX.GRAY - colors.edge = PLOT.COLORS.BY.TYPE.EDGE.GRAY - colors.vertex.label = "white" - } - ## set colors for colored - else { - colors.vertex = PLOT.COLORS.BY.TYPE.VERTEX - colors.edge = PLOT.COLORS.BY.TYPE.EDGE - colors.vertex.label = "black" - } - +plot.get.plot.for.network = function(network, labels = TRUE) { ## check if network is empty if (igraph::vcount(network) == 0) { network = create.empty.network(directed = igraph::is.directed(network)) + @@ -146,8 +116,11 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) PLOT.VERTEX.TYPES = c(PLOT.VERTEX.TYPE.AUTHOR, PLOT.VERTEX.TYPE.ARTIFACT) names(PLOT.VERTEX.TYPES) = c(TYPE.AUTHOR, TYPE.ARTIFACT) + ## remove loops because of weird behavior when plotting + network = igraph::delete.edges(network, igraph::E(network)[igraph::is.loop(network)]) + ## fix the type attributes (add new ones, also named) - network = plot.fix.type.attributes(network, colors.vertex = colors.vertex, colors.edge = colors.edge) + network = plot.fix.type.attributes(network) ## set network layout if (!("layout" %in% igraph::list.graph.attributes(network))) { @@ -161,7 +134,7 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) if (igraph::ecount(network) > 0) { p = p + ggraph::geom_edge_fan( - mapping = ggplot2::aes(colour = edge.type, linetype = edge.type), + mapping = ggplot2::aes(colour = relation, linetype = edge.type, width = 0.3 + 0.5 * log(weight)), end_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), start_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), arrow = if (igraph::is.directed(network)) { @@ -176,22 +149,29 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) p = p + ## plot vertices - ggraph::geom_node_point(ggplot2::aes(color = vertex.type, shape = vertex.type), size = PLOT.VERTEX.SIZE) + - ggraph::geom_node_text(ggplot2::aes(label = if (labels) name else c("")), size = 3.5, color = colors.vertex.label) + + ggraph::geom_node_point(ggplot2::aes(color = kind, shape = vertex.type), size = PLOT.VERTEX.SIZE) + + ggraph::geom_node_text(ggplot2::aes(label = if (labels) name else c("")), size = 3.5, color = PLOT.VERTEX.LABEL.COLOR) + + ## scale vertices (colors and styles) - ggplot2::scale_shape_manual("Vertices", values = PLOT.SHAPE.VERTEX, labels = PLOT.VERTEX.TYPES) + - ggplot2::scale_color_manual("Vertices", values = colors.vertex, labels = PLOT.VERTEX.TYPES) + + ggplot2::scale_shape_discrete(name = "Vertex Types", solid = TRUE) + + viridis::scale_color_viridis(name = "Vertices", option = "plasma", discrete = TRUE, + end = 0.8, begin = 0.05) + ## scale edges (colors and styles) - ggraph::scale_edge_linetype_manual("Relations", values = PLOT.SHAPE.EDGE) + - ggraph::scale_edge_colour_manual("Relations", values = colors.edge) + + ggraph::scale_edge_linetype(name = "Relation Types") + + ggplot2::discrete_scale(name = "Relations", "edge_colour", "viridis", + viridis::viridis_pal(option = "viridis", end = 0.8, begin = 0.25)) + + ## BROKEN RIGHT NOW due to bug in scale_edge_colour_viridis(): + # ggraph::scale_edge_colour_viridis(name = "Relations", option = "magma", discrete = TRUE, + # end = 0.85, begin = 0, direction = 1) + ## theme ggplot2::theme_light() + ggplot2::guides( ## reduce size of symbols in legend - shape = ggplot2::guide_legend(override.aes = list(size = PLOT.VERTEX.SIZE.LEGEND)) + shape = ggplot2::guide_legend(override.aes = list(size = PLOT.VERTEX.SIZE.LEGEND)), + color = ggplot2::guide_legend(override.aes = list(size = PLOT.VERTEX.SIZE.LEGEND)) ) + ggplot2::theme( legend.position = "bottom", @@ -226,29 +206,17 @@ plot.get.plot.for.network = function(network, labels = TRUE, grayscale = FALSE) #' - TYPE.ARTIFACT = TRUE. #' #' Furthermore, the following attributes are added to either vertices or edges: -#' - vertex.color = a color code (hex or else) for coloring the vertices (see 'colors.vertex' parameter), -#' - edge.color = a color code (hex or else) for coloring the edges (see 'colors.edge' parameter), #' - vertex.type = a copy of the old vertex attribute 'type', and #' - edge.type = a copy of the old edge attribute 'type'. #' #' @param network the igraph object to augment -#' @param colors.vertex a vector of length 2, the entries named with the values of 'TYPE.AUTHOR' and 'TYPE.ARTIFACT' -#' [default: PLOT.COLORS.BY.TYPE.VERTEX] -#' @param colors.edge a vector of length 2, the entries named with the values of 'TYPE.EDGES.INTER' and 'TYPE.EDGES.INTRA' -#' [default: PLOT.COLORS.BY.TYPE.EDGE] #' #' @return the old network with the new and changed vertex and edge attributes -plot.fix.type.attributes = function(network, colors.vertex = PLOT.COLORS.BY.TYPE.VERTEX, colors.edge = PLOT.COLORS.BY.TYPE.EDGE) { +plot.fix.type.attributes = function(network) { ## copy type attribute to vertex.type and edge.type network = igraph::set.vertex.attribute(network, "vertex.type", value = igraph::get.vertex.attribute(network, "type")) network = igraph::set.edge.attribute(network, "edge.type", value = igraph::get.edge.attribute(network, "type")) - ## add edge and vertex colors - network = igraph::set.vertex.attribute(network, "vertex.color", - value = colors.vertex[ igraph::get.vertex.attribute(network, "vertex.type") ]) - network = igraph::set.edge.attribute(network, "edge.color", - value = colors.edge[ igraph::get.edge.attribute(network, "edge.type") ]) - ## adjust 'type' attribute for vertices for bipartite plotting (we need Booleans there) types = igraph::get.vertex.attribute(network, "type") network = igraph::remove.vertex.attribute(network, "type") diff --git a/util-read.R b/util-read.R index 4e62df73..1f05d8f3 100644 --- a/util-read.R +++ b/util-read.R @@ -13,7 +13,7 @@ ## ## Copyright 2016-2018 by Claus Hunsen ## Copyright 2017 by Raphael Nömmer -## Copyright 2017 by Christian Hechtl +## Copyright 2017-2018 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017 by Thomas Bock ## All Rights Reserved. @@ -216,6 +216,9 @@ read.mails = function(data.path) { ## set pattern for thread ID for better recognition mail.data[["thread"]] = sprintf("", mail.data[["thread"]]) + ## set proper artifact type for proper vertex attribute 'artifact.type' + mail.data["artifact.type"] = "Mail" + ## remove mails without a proper date as they mess up directed mail-based networks ## this basically only applies for project-level analysis empty.dates = which(mail.data[["date"]] == "" | is.na(mail.data[["date"]])) @@ -287,9 +290,10 @@ read.authors = function(data.path) { ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## PaStA data -------------------------------------------------------------- -#' Read and parse the pasta data from the 'similar-mailbox' file. +#' Read and parse the pasta data from the 'mbox-result' file. #' The form in the file is : ... => commit.hash commit.hash2 .... -#' The parsed form is a data frame with message IDs as keys and commit hashes as values. +#' The parsed form is a data frame with message IDs as keys, commit hashes as values, and a revision set id. +#' If the message ID does not get mapped to a commit hash, the value for the commit hash is \code{NA}. #' #' @param data.path the path to the pasta data #' @@ -300,7 +304,7 @@ read.pasta = function(data.path) { KEY.SEPERATOR = " " ## get file name of pasta data - filepath = file.path(data.path, "similar-mailbox") + filepath = file.path(data.path, "mbox-result") ## read data from disk [can be empty] lines = suppressWarnings(try(readLines(filepath), silent = TRUE)) @@ -312,13 +316,13 @@ read.pasta = function(data.path) { return(data.frame()) } - result.list = parallel::mclapply(lines, function(line) { + result.list = parallel::mcmapply(lines, seq_along(lines), SIMPLIFY = FALSE, FUN = function(line, line.id) { #line = lines[i] if ( nchar(line) == 0 ) { return(NULL) } - if (!grepl(SEPERATOR, line)) { + if (!grepl("<", line)) { logging::logwarn("Faulty line: %s", line) return(NULL) } @@ -327,16 +331,21 @@ read.pasta = function(data.path) { # 2) split keys # 3) split values # 4) insert all key-value pairs by iteration (works also if there is only one key) - line.split = unlist(strsplit(line, SEPERATOR)) - keys = line.split[1] - values = line.split[2] - keys.split = unlist(strsplit(keys, KEY.SEPERATOR)) - values.split = unlist(strsplit(values, KEY.SEPERATOR)) + if (grepl(SEPERATOR, line)) { + line.split = unlist(strsplit(line, SEPERATOR)) + keys = line.split[1] + values = line.split[2] + keys.split = unlist(strsplit(keys, KEY.SEPERATOR)) + values.split = unlist(strsplit(values, KEY.SEPERATOR)) + } else { + keys.split = unlist(strsplit(line, KEY.SEPERATOR)) + values.split = NA + } # Transform data to data.frame - #df = data.frame(message.id = keys.split, commit.hash = values.split) df = merge(keys.split, values.split) colnames(df) = c("message.id", "commit.hash") + df["revision.set.id"] = sprintf("", line.id) return(df) }) result.df = plyr::rbind.fill(result.list) @@ -381,6 +390,9 @@ read.issues = function(data.path) { ## set pattern for issue ID for better recognition issue.data[["issue.id"]] = sprintf("", issue.data[["issue.id"]]) + ## set proper artifact type for proper vertex attribute 'artifact.type' + issue.data["artifact.type"] = "IssueEvent" + ## convert 'is.pull.request' column to logicals issue.data[["is.pull.request"]] = as.logical(issue.data[["is.pull.request"]])