se-sic · clhunsen · Jan 15, 2019 · Nov 29, 2018 · Dec 2, 2018 · Dec 4, 2018
diff --git a/NEWS.md b/NEWS.md
@@ -2,8 +2,29 @@
 
 ## Unversioned
 
+### Added
+- In addition to the ProjectConf parameter `artifact.filter.base`, which configured whether the base artifact should be
+included in the `get.commits.filtered` method, there is now the similiar parameter `filter.untracked.files` which does
+the same thing for untracked files (11428d9847fd44f982cd094a3248bd13fb6b7b58)
+- Edges are not being constructed in the author network between authors that only modify untracked files. For authors
+it can be configured if the edges should be created or not using the new NetworkConf parameter `base.artifact.edges`
+(c60c2f6e44b6f34cccb2714eccc7674158c83dde)
+- The public `get.commits.filtered.uncached` method was added which allows for external filtering of the commits by
+specifying if untracked files and/or the base artifact should be filtered (this method does not take advantage of
+caching, whereas the `get.commits.filtered` method does) (11428d9847fd44f982cd094a3248bd13fb6b7b58)
+
 ### Changed/Improved
 - Change shape of `Vertices` in the legend of plots to avoid confusion (f4fb4807cfd87d9d552a9ede92ea65ae4a386a04)
+- Commits that do not change any artifact are considered to be carried out on a metafile called `untracked.file`
+(11428d9847fd44f982cd094a3248bd13fb6b7b58)
+- Remove `get.commits.raw`, `set.commits.raw` and `read.commits.raw` functions (64a94863c9e70ac8c75e443bc15cd7facbf2111d,
+c26e582e4ad6bf1eaeb08202fc3e00394332a013)
+- Removed `get.commits.filtered.empty` and corresponding `filter.commits.empty` method, the functionality has been moved
+to the altered `get.commits.filtered` and `filter.commits` method respectively (11428d9847fd44f982cd094a3248bd13fb6b7b58)
+- The `filter.commits` method now takes parameters which configure if untracked files and/or the base artifact should be
+filtered out (11428d9847fd44f982cd094a3248bd13fb6b7b58)
+- Filtering by artifact kind (e.g. filtering out either Feature or FeatureExpression) is now being done in the
+`get.commits` method instead of the `get.commits.filtered` method (894c9a5c181fef14dcb71fa23699bebbcbcd2b4f)
 
 ### Fixed
 - Fix error when resetting an `ProjectData` environment (c64cab84e928a2a4c89a6df12440ba7ca06e6263)

diff --git a/showcase.R b/showcase.R
@@ -16,6 +16,7 @@
 ## Copyright 2017 by Christian Hechtl <[email protected]>
 ## Copyright 2017 by Felix Prasse <[email protected]>
 ## Copyright 2017-2018 by Thomas Bock <[email protected]>
+## Copyright 2018 by Jakob Kronawitter <[email protected]>
 ## All Rights Reserved.
 
 
@@ -85,7 +86,7 @@ x = NetworkBuilder$new(project.data = x.data, network.conf = net.conf)
 # x.data$get.synchronicity()
 # x.data$group.artifacts.by.data.column("commits", "author.name")
 # x.data$get.commits.filtered()
-# x.data$get.commits.filtered.empty()
+# x.data$get.commits.filtered.uncached(remove.untracked.files = TRUE, remove.base.artifact = FALSE)
 # x.data$get.mails()
 # x.data$get.authors()
 # x.data$get.data.path()
@@ -126,7 +127,6 @@ y = NetworkBuilder$new(project.data = y.data, network.conf = net.conf)
 # y.data$get.synchronicity()
 # y.data$group.artifacts.by.data.column("commits", "author.name")
 # y.data$get.commits.filtered()
-# y.data$get.commits.filtered.empty()
 # y.data$get.mails()
 # y.data$get.authors()
 # y.data$get.data.path()

diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R
@@ -16,6 +16,7 @@
 ## Copyright 2018 by Claus Hunsen <[email protected]>
 ## Copyright 2018 by Barbara Eckl <[email protected]>
 ## Copyright 2018 by Thomas Bock <[email protected]>
+## Copyright 2018 by Jakob Kronawitter <[email protected]>
 ## All Rights Reserved.
 
 
@@ -44,26 +45,22 @@ test_that("Cut commit and mail data to same date range.", {
 
     x.data = ProjectData$new(proj.conf)
 
-    commit.data.expected = data.frame(commit.id = sprintf("<commit-%s>", c(32712, 32712, 32713, 32713)),
-                                      date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 15:58:59", "2016-07-12 16:00:45",
-                                                                  "2016-07-12 16:00:45")),
-                                      author.name = c("Björn", "Björn", "Olaf", "Olaf"),
-                                      author.email = c("[email protected]", "[email protected]", "[email protected]",
-                                                     "[email protected]"),
-                                      committer.date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 15:58:59", "2016-07-20 10:00:44",
-                                                                              "2016-07-20 10:00:44")),
-                                      committer.name = c("Björn", "Björn", "Björn", "Björn"),
-                                      committer.email = c("[email protected]", "[email protected]", "[email protected]", "[email protected]"),
-                                      hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0",
-                                             "5a5ec9675e98187e1e92561e1888aa6f04faa338", "5a5ec9675e98187e1e92561e1888aa6f04faa338"),
-                                      changed.files = as.integer(c(1, 1, 1, 1)),
-                                      added.lines = as.integer(c(1, 1, 1, 1)),
-                                      deleted.lines = as.integer(c(1, 1, 0, 0)),
-                                      diff.size = as.integer(c(2, 2, 1, 1)),
-                                      file = c("test.c", "test.c", "test.c", "test.c"),
-                                      artifact = c("A", "defined(A)", "A", "defined(A)"),
-                                      artifact.type = c("Feature", "FeatureExpression", "Feature", "FeatureExpression"),
-                                      artifact.diff.size = as.integer(c(1, 1, 1, 1)))
+    commit.data.expected = data.frame(commit.id = sprintf("<commit-%s>", c(32712, 32713)),
+                                      date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45")),
+                                      author.name = c("Björn", "Olaf"),
+                                      author.email = c("[email protected]", "[email protected]"),
+                                      committer.date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-20 10:00:44")),
+                                      committer.name = c("Björn", "Björn"),
+                                      committer.email = c("[email protected]", "[email protected]"),
+                                      hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"),
+                                      changed.files = as.integer(c(1, 1)),
+                                      added.lines = as.integer(c(1, 1)),
+                                      deleted.lines = as.integer(c(1, 0)),
+                                      diff.size = as.integer(c(2, 1)),
+                                      file = c("test.c", "test.c"),
+                                      artifact = c("A", "A"),
+                                      artifact.type = c("Feature", "Feature"),
+                                      artifact.diff.size = as.integer(c(1, 1)))
 
     mail.data.expected = data.frame(author.name = c("Thomas"),
                                     author.email = c("[email protected]"),

diff --git a/tests/test-networks-cut.R b/tests/test-networks-cut.R
@@ -14,6 +14,7 @@
 ## Copyright 2017 by Christian Hechtl <[email protected]>
 ## Copyright 2018 by Claus Hunsen <[email protected]>
 ## Copyright 2018 by Thomas Bock <[email protected]>
+## Copyright 2018 by Jakob Kronawitter <[email protected]>
 ## All Rights Reserved.
 
 
@@ -44,26 +45,22 @@ test_that("Cut commit and mail data to same date range.", {
     x.data = ProjectData$new(proj.conf)
     x = NetworkBuilder$new(x.data, net.conf)
 
-    commit.data.expected = data.frame(commit.id = sprintf("<commit-%s>", c(32712, 32712, 32713, 32713)),
-                                      date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 15:58:59", "2016-07-12 16:00:45",
-                                                                  "2016-07-12 16:00:45")),
-                                      author.name = c("Björn", "Björn", "Olaf", "Olaf"),
-                                      author.email = c("[email protected]", "[email protected]", "[email protected]",
-                                                     "[email protected]"),
-                                      committer.date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 15:58:59", "2016-07-20 10:00:44",
-                                                                              "2016-07-20 10:00:44")),
-                                      committer.name = c("Björn", "Björn", "Björn", "Björn"),
-                                      committer.email = c("[email protected]", "[email protected]", "[email protected]", "[email protected]"),
-                                      hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0",
-                                             "5a5ec9675e98187e1e92561e1888aa6f04faa338", "5a5ec9675e98187e1e92561e1888aa6f04faa338"),
-                                      changed.files = as.integer(c(1, 1, 1, 1)),
-                                      added.lines = as.integer(c(1, 1, 1, 1)),
-                                      deleted.lines = as.integer(c(1, 1, 0, 0)),
-                                      diff.size = as.integer(c(2, 2, 1, 1)),
-                                      file = c("test.c", "test.c", "test.c", "test.c"),
-                                      artifact = c("A", "defined(A)", "A", "defined(A)"),
-                                      artifact.type = c("Feature", "FeatureExpression", "Feature", "FeatureExpression"),
-                                      artifact.diff.size = as.integer(c(1, 1, 1, 1)))
+    commit.data.expected = data.frame(commit.id = sprintf("<commit-%s>", c(32712, 32713)),
+                                      date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45")),
+                                      author.name = c("Björn", "Olaf"),
+                                      author.email = c("[email protected]", "[email protected]"),
+                                      committer.date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-20 10:00:44")),
+                                      committer.name = c("Björn", "Björn"),
+                                      committer.email = c("[email protected]", "[email protected]"),
+                                      hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"),
+                                      changed.files = as.integer(c(1, 1)),
+                                      added.lines = as.integer(c(1, 1)),
+                                      deleted.lines = as.integer(c(1, 0)),
+                                      diff.size = as.integer(c(2, 1)),
+                                      file = c("test.c", "test.c"),
+                                      artifact = c("A", "A"),
+                                      artifact.type = c("Feature", "Feature"),
+                                      artifact.diff.size = as.integer(c(1, 1)))
 
     mail.data.expected = data.frame(author.name = c("Thomas"),
                                     author.email = c("[email protected]"),

diff --git a/tests/test-read.R b/tests/test-read.R
@@ -15,6 +15,7 @@
 ## Copyright 2017 by Felix Prasse <[email protected]>
 ## Copyright 2018 by Claus Hunsen <[email protected]>
 ## Copyright 2018 by Thomas Bock <[email protected]>
+## Copyright 2018 by Jakob Kronawitter <[email protected]>
 ## All Rights Reserved.
 
 
@@ -88,7 +89,7 @@ test_that("Read the raw commit data with the file artifact.", {
     proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file")
 
     ## read the actual data
-    commit.data.read = read.commits.raw(proj.conf$get.value("datapath"), proj.conf$get.value("artifact"))
+    commit.data.read = read.commits(proj.conf$get.value("datapath"), proj.conf$get.value("artifact"))
 
     ## build the expected data.frame
     commit.data.expected = data.frame(commit.id = sprintf("<commit-%s>", c(32716, 32717, 32718, 32719, 32715)),

diff --git a/tests/test-split.R b/tests/test-split.R
@@ -15,6 +15,7 @@
 ## Copyright 2017 by Felix Prasse <[email protected]>
 ## Copyright 2018 by Thomas Bock <[email protected]>
 ## Copyright 2018 by Christian Hechtl <[email protected]>
+## Copyright 2018 by Jakob Kronawitter <[email protected]>
 ## All Rights Reserved.
 
 
@@ -93,9 +94,9 @@ test_that("Split a data object time-based (split.basis == 'commits').", {
     ## check data for all ranges
     expected.data = list(
         commits = list(
-            "2016-07-12 15:58:59-2016-07-12 16:01:59" = data$commits[1:4, ],
-            "2016-07-12 16:01:59-2016-07-12 16:04:59" = data.frame(),
-            "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$commits[5:9, ]
+            "2016-07-12 15:58:59-2016-07-12 16:01:59" = data$commits[1:2, ],
+            "2016-07-12 16:01:59-2016-07-12 16:04:59" = data$commits[0, ],
+            "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$commits[3:6, ]
         ),
         mails = list(
             "2016-07-12 15:58:59-2016-07-12 16:01:59" = data.frame(),
@@ -168,10 +169,10 @@ test_that("Split a data object time-based (split.basis == 'mails').", {
     ## check data for all ranges
     expected.data = list(
         commits = list(
-            "2004-10-09 18:38:13-2007-10-10 12:38:13" = data.frame(),
-            "2007-10-10 12:38:13-2010-10-10 06:38:13" = data.frame(),
-            "2010-10-10 06:38:13-2013-10-10 00:38:13" = data.frame(),
-            "2013-10-10 00:38:13-2016-07-12 16:05:38" = data$commits[1:4, ]
+            "2004-10-09 18:38:13-2007-10-10 12:38:13" = data$commits[0, ],
+            "2007-10-10 12:38:13-2010-10-10 06:38:13" = data$commits[0, ],
+            "2010-10-10 06:38:13-2013-10-10 00:38:13" = data$commits[0, ],
+            "2013-10-10 00:38:13-2016-07-12 16:05:38" = data$commits[1:2, ]
         ),
         mails = list(
             "2004-10-09 18:38:13-2007-10-10 12:38:13" = data$mails[rownames(data$mails) %in% 1:2, ],
@@ -247,9 +248,9 @@ test_that("Split a data object time-based (split.basis == 'issues').", {
     ## check data for all ranges
     expected.data = list(
         commits = list(
-            "2013-04-21 23:52:09-2015-04-22 11:52:09" = data.frame(),
+            "2013-04-21 23:52:09-2015-04-22 11:52:09" = data$commits[0, ],
             "2015-04-22 11:52:09-2017-04-21 23:52:09" = data$commits,
-            "2017-04-21 23:52:09-2017-05-23 12:32:40" = data.frame()
+            "2017-04-21 23:52:09-2017-05-23 12:32:40" = data$commits[0, ]
         ),
         mails = list(
             "2013-04-21 23:52:09-2015-04-22 11:52:09" = data.frame(),
@@ -496,9 +497,9 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
     ## check data for all ranges
     expected.data = list(
         commits = list(
-            "2016-07-12 15:58:59-2016-07-12 16:05:41" = data$commits[1:4, ],
-            "2016-07-12 16:05:41-2016-07-12 16:06:32" = data$commits[5:7, ],
-            "2016-07-12 16:06:32-2016-07-12 16:06:33" = data$commits[8:9, ]
+            "2016-07-12 15:58:59-2016-07-12 16:05:41" = data$commits[1:2, ],
+            "2016-07-12 16:05:41-2016-07-12 16:06:32" = data$commits[3:4, ],
+            "2016-07-12 16:06:32-2016-07-12 16:06:33" = data$commits[5:6, ]
         ),
         mails = list(
             "2016-07-12 15:58:59-2016-07-12 16:05:41" = data$mails[rownames(data$mails) %in% 16:17, ],
@@ -591,8 +592,8 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
     ## check data for all ranges
     expected.data = list(
         commits = list(
-            "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$commits[1:6, ],
-            "2016-07-12 16:06:10-2016-07-12 16:06:33" = data$commits[7:9, ]
+            "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$commits[1:3, ],
+            "2016-07-12 16:06:10-2016-07-12 16:06:33" = data$commits[4:6, ]
         ),
         mails = list(
             "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$mails[rownames(data$mails) %in% 16:17, ],
@@ -675,12 +676,12 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
     ## check data for all ranges
     expected.data = list(
         commits = list(
-            "2004-10-09 18:38:13-2010-07-12 11:05:35" = data.frame(),
-            "2010-07-12 11:05:35-2010-07-12 12:05:41" = data.frame(),
-            "2010-07-12 12:05:41-2010-07-12 12:05:44" = data.frame(),
-            "2010-07-12 12:05:44-2016-07-12 15:58:40" = data.frame(),
-            "2016-07-12 15:58:40-2016-07-12 16:05:37" = data$commits[1:4, ],
-            "2016-07-12 16:05:37-2016-07-12 16:05:38" = data.frame()
+            "2004-10-09 18:38:13-2010-07-12 11:05:35" = data$commits[0, ],
+            "2010-07-12 11:05:35-2010-07-12 12:05:41" = data$commits[0, ],
+            "2010-07-12 12:05:41-2010-07-12 12:05:44" = data$commits[0, ],
+            "2010-07-12 12:05:44-2016-07-12 15:58:40" = data$commits[0, ],
+            "2016-07-12 15:58:40-2016-07-12 16:05:37" = data$commits[1:2, ],
+            "2016-07-12 16:05:37-2016-07-12 16:05:38" = data$commits[0, ]
         ),
         mails = list(
             "2004-10-09 18:38:13-2010-07-12 11:05:35" = data$mails[rownames(data$mails) %in% 1:3, ],
@@ -742,7 +743,7 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
     ## check data for all ranges
     expected.data = list(
         commits = list(
-            "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$commits[1:4, ]
+            "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$commits[1:2, ]
         ),
         mails = list(
             "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$mails
@@ -785,8 +786,8 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
     ## check data for all ranges
     expected.data = list(
         commits = list(
-            "2004-10-09 18:38:13-2010-07-12 12:05:43" = data.frame(),
-            "2010-07-12 12:05:43-2016-07-12 16:05:38" = data$commits[1:4, ]
+            "2004-10-09 18:38:13-2010-07-12 12:05:43" = data$commits[0, ],
+            "2010-07-12 12:05:43-2016-07-12 16:05:38" = data$commits[1:2, ]
         ),
         mails = list(
             "2004-10-09 18:38:13-2010-07-12 12:05:43" = data$mails[rownames(data$mails) %in% 1:8, ],
@@ -866,10 +867,10 @@ test_that("Split a data object activity-based (activity.type = 'issues').", {
     ## check data for all ranges
     expected.data = list(
         commits = list(
-            "2013-04-21 23:52:09-2016-07-12 16:05:47" = data$commits[1:6, ],
-            "2016-07-12 16:05:47-2016-08-31 18:21:48" = data$commits[7:9, ],
-            "2016-08-31 18:21:48-2017-02-20 22:25:41" = data.frame(),
-            "2017-02-20 22:25:41-2017-05-23 12:32:40" = data.frame()
+            "2013-04-21 23:52:09-2016-07-12 16:05:47" = data$commits[1:3, ],
+            "2016-07-12 16:05:47-2016-08-31 18:21:48" = data$commits[4:6, ],
+            "2016-08-31 18:21:48-2017-02-20 22:25:41" = data$commits[0, ],
+            "2017-02-20 22:25:41-2017-05-23 12:32:40" = data$commits[0, ]
         ),
         mails = list(
             "2013-04-21 23:52:09-2016-07-12 16:05:47" = data$mails[rownames(data$mails) %in% 14:17, ],
@@ -967,7 +968,7 @@ test_that("Split a data object activity-based (activity.type = 'issues').", {
     expected.data = list(
         commits = list(
             "2013-04-21 23:52:09-2016-07-27 22:25:25" = data$commits,
-            "2016-07-27 22:25:25-2017-05-23 12:32:40" = data.frame()
+            "2016-07-27 22:25:25-2017-05-23 12:32:40" = data$commits[0, ]
         ),
         mails = list(
             "2013-04-21 23:52:09-2016-07-27 22:25:25" = data$mails[rownames(data$mails) %in% 14:17, ],