Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a bug in 'extract.timestamps' when dealing with an empty data source #270

Merged
merged 4 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
### Fixed

- Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004)
- Fix a bug in `extract.timestamps` that occurs when the first `data.source` contains empty data and that leads to a return value of type numeric which should be POSIXct (PR #270, 10696e4cf4ae92371917ed8ccaec2b0183da145c, 646c01a42ad8decfbc9040030e790e51cb65cffd)

## 4.4

Expand Down
61 changes: 61 additions & 0 deletions tests/test-data-cut.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
## Copyright 2018 by Thomas Bock <[email protected]>
## Copyright 2020 by Thomas Bock <[email protected]>
## Copyright 2018 by Jakob Kronawitter <[email protected]>
## Copyright 2024 by Maximilian Löffler <[email protected]>
## All Rights Reserved.


Expand Down Expand Up @@ -82,3 +83,63 @@ test_that("Cut commit and mail data to same date range.", {
expect_identical(mail.data, mail.data.expected, info = "Cut mail data.")

})

test_that("Cut data to same date range with one empty data source.", {

## configurations

proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT)

## in order to properly test whether the data types of timestamps originating from empty data are correct,
## ensure that the first provided data source contains empty data. This is important as R usually uses the
## first entry of a data frame to determine the data type of it. However, the data type of the project timestamps
## should be properly set regardless of the empty data.
proj.conf$update.value("issues.locked", TRUE)
data.sources = c("issues", "mails", "commits")

## construct objects

x.data = ProjectData$new(proj.conf)
x.data$set.issues(NULL)

commit.data.expected = data.frame(commit.id = sprintf("<commit-%s>", c(32712, 32713)),
date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45")),
author.name = c("Björn", "Olaf"),
author.email = c("[email protected]", "[email protected]"),
committer.date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-20 10:00:44")),
committer.name = c("Björn", "Björn"),
committer.email = c("[email protected]", "[email protected]"),
hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"),
changed.files = as.integer(c(1, 1)),
added.lines = as.integer(c(1, 1)),
deleted.lines = as.integer(c(1, 0)),
diff.size = as.integer(c(2, 1)),
file = c("test.c", "test.c"),
artifact = c("A", "A"),
artifact.type = c("Feature", "Feature"),
artifact.diff.size = as.integer(c(1, 1)))

mail.data.expected = data.frame(author.name = c("Thomas", "Olaf"),
author.email = c("[email protected]", "[email protected]"),
message.id = c("<[email protected]>", "<[email protected]>"),
date = get.date.from.string(c("2016-07-12 16:04:40", "2016-07-12 16:05:37")),
date.offset = as.integer(c(100, 200)),
subject = c("Re: Fw: busybox 2 tab", "Re: Fw: busybox 10"),
thread = sprintf("<thread-%s>", c("13#9", "13#9")),
artifact.type = c("Mail", "Mail"))

issue.data.expected = create.empty.issues.list()

commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits.unfiltered()
rownames(commit.data) = 1:nrow(commit.data)

mail.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.mails()
rownames(mail.data) = 1:nrow(mail.data)

issue.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.issues()

expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.")
expect_identical(mail.data, mail.data.expected, info = "Cut mail data.")
expect_identical(issue.data, issue.data.expected, info = "Cut issue data (empty).")

})
11 changes: 5 additions & 6 deletions util-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
## Copyright 2021 by Johannes Hostert <[email protected]>
## Copyright 2021 by Mirabdulla Yusifli <[email protected]>
## Copyright 2022 by Jonathan Baumann <[email protected]>
## Copyright 2022-2023 by Maximilian Löffler <[email protected]>
## Copyright 2022-2024 by Maximilian Löffler <[email protected]>
## Copyright 2024 by Leo Sendelbach <[email protected]>
## All Rights Reserved.

Expand Down Expand Up @@ -113,7 +113,7 @@ DATASOURCE.TO.ARTIFACT.COLUMN = list(


## the maximum time difference between subsequent mails of a patchstack
PATCHSTACK.MAIL.DECAY.THRESHOLD = "30 seconds"
PATCHSTACK.MAIL.DECAY.THRESHOLD = lubridate::as.duration("30 seconds")
bockthom marked this conversation as resolved.
Show resolved Hide resolved

## configuration parameters that do not reset the environment when changed
CONF.PARAMETERS.NO.RESET.ENVIRONMENT = c("commit.messages",
Expand Down Expand Up @@ -283,8 +283,7 @@ ProjectData = R6::R6Class("ProjectData",
## of 'PATCHSTACK.MAIL.DECAY.THRESHOLD'
while (i < nrow(thread) && running) {
if (thread[1, "author.name"] == thread[i + 1, "author.name"] &&
thread[i + 1, "date"] - thread[i, "date"] <=
lubridate::as.duration(PATCHSTACK.MAIL.DECAY.THRESHOLD)) {
thread[i + 1, "date"] - thread[i, "date"] <= PATCHSTACK.MAIL.DECAY.THRESHOLD) {
i = i + 1
} else {
running = FALSE
Expand Down Expand Up @@ -797,8 +796,8 @@ ProjectData = R6::R6Class("ProjectData",
}
## NAs otherwise
else {
source.date.min = NA
source.date.max = NA
source.date.min = as.POSIXct(NA)
source.date.max = as.POSIXct(NA)
}

## remove old line if existing
Expand Down