diff --git a/NEWS.md b/NEWS.md index 3756263b..d4e457bc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,6 +27,7 @@ ### Fixed - Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004) +- Fix a bug in `extract.timestamps` that occurs when the first `data.source` contains empty data and that leads to a return value of type numeric which should be POSIXct (PR #270, 10696e4cf4ae92371917ed8ccaec2b0183da145c, 646c01a42ad8decfbc9040030e790e51cb65cffd) ## 4.4 diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R index d1f3ef2a..26b1bf08 100644 --- a/tests/test-data-cut.R +++ b/tests/test-data-cut.R @@ -18,6 +18,7 @@ ## Copyright 2018 by Thomas Bock ## Copyright 2020 by Thomas Bock ## Copyright 2018 by Jakob Kronawitter +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -82,3 +83,63 @@ test_that("Cut commit and mail data to same date range.", { expect_identical(mail.data, mail.data.expected, info = "Cut mail data.") }) + +test_that("Cut data to same date range with one empty data source.", { + + ## configurations + + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + + ## in order to properly test whether the data types of timestamps originating from empty data are correct, + ## ensure that the first provided data source contains empty data. This is important as R usually uses the + ## first entry of a data frame to determine the data type of it. However, the data type of the project timestamps + ## should be properly set regardless of the empty data. + proj.conf$update.value("issues.locked", TRUE) + data.sources = c("issues", "mails", "commits") + + ## construct objects + + x.data = ProjectData$new(proj.conf) + x.data$set.issues(NULL) + + commit.data.expected = data.frame(commit.id = sprintf("", c(32712, 32713)), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45")), + author.name = c("Björn", "Olaf"), + author.email = c("bjoern@example.org", "olaf@example.org"), + committer.date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-20 10:00:44")), + committer.name = c("Björn", "Björn"), + committer.email = c("bjoern@example.org", "bjoern@example.org"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"), + changed.files = as.integer(c(1, 1)), + added.lines = as.integer(c(1, 1)), + deleted.lines = as.integer(c(1, 0)), + diff.size = as.integer(c(2, 1)), + file = c("test.c", "test.c"), + artifact = c("A", "A"), + artifact.type = c("Feature", "Feature"), + artifact.diff.size = as.integer(c(1, 1))) + + mail.data.expected = data.frame(author.name = c("Thomas", "Olaf"), + author.email = c("thomas@example.org", "olaf@example.org"), + message.id = c("<65a1sf31sagd684dfv31@mail.gmail.com>", "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>"), + date = get.date.from.string(c("2016-07-12 16:04:40", "2016-07-12 16:05:37")), + date.offset = as.integer(c(100, 200)), + subject = c("Re: Fw: busybox 2 tab", "Re: Fw: busybox 10"), + thread = sprintf("", c("13#9", "13#9")), + artifact.type = c("Mail", "Mail")) + + issue.data.expected = create.empty.issues.list() + + commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits.unfiltered() + rownames(commit.data) = 1:nrow(commit.data) + + mail.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.mails() + rownames(mail.data) = 1:nrow(mail.data) + + issue.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.issues() + + expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.") + expect_identical(mail.data, mail.data.expected, info = "Cut mail data.") + expect_identical(issue.data, issue.data.expected, info = "Cut issue data (empty).") + +}) diff --git a/util-data.R b/util-data.R index 90c01ca4..f137a928 100644 --- a/util-data.R +++ b/util-data.R @@ -25,7 +25,7 @@ ## Copyright 2021 by Johannes Hostert ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann -## Copyright 2022-2023 by Maximilian Löffler +## Copyright 2022-2024 by Maximilian Löffler ## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -113,7 +113,7 @@ DATASOURCE.TO.ARTIFACT.COLUMN = list( ## the maximum time difference between subsequent mails of a patchstack -PATCHSTACK.MAIL.DECAY.THRESHOLD = "30 seconds" +PATCHSTACK.MAIL.DECAY.THRESHOLD = lubridate::as.duration("30 seconds") ## configuration parameters that do not reset the environment when changed CONF.PARAMETERS.NO.RESET.ENVIRONMENT = c("commit.messages", @@ -283,8 +283,7 @@ ProjectData = R6::R6Class("ProjectData", ## of 'PATCHSTACK.MAIL.DECAY.THRESHOLD' while (i < nrow(thread) && running) { if (thread[1, "author.name"] == thread[i + 1, "author.name"] && - thread[i + 1, "date"] - thread[i, "date"] <= - lubridate::as.duration(PATCHSTACK.MAIL.DECAY.THRESHOLD)) { + thread[i + 1, "date"] - thread[i, "date"] <= PATCHSTACK.MAIL.DECAY.THRESHOLD) { i = i + 1 } else { running = FALSE @@ -797,8 +796,8 @@ ProjectData = R6::R6Class("ProjectData", } ## NAs otherwise else { - source.date.min = NA - source.date.max = NA + source.date.min = as.POSIXct(NA) + source.date.max = as.POSIXct(NA) } ## remove old line if existing