Skip to content

Commit

Permalink
Merge pull request #270 from maxloeffler/dev
Browse files Browse the repository at this point in the history
Fix a bug in 'extract.timestamps' when dealing with an empty data source

Reviewed-by: Thomas Bock <[email protected]>
  • Loading branch information
bockthom authored Oct 23, 2024
2 parents 34137e8 + 1d1fe7f commit 751b72f
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 6 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
### Fixed

- Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004)
- Fix a bug in `extract.timestamps` that occurs when the first `data.source` contains empty data and that leads to a return value of type numeric which should be POSIXct (PR #270, 10696e4cf4ae92371917ed8ccaec2b0183da145c, 646c01a42ad8decfbc9040030e790e51cb65cffd)

## 4.4

Expand Down
61 changes: 61 additions & 0 deletions tests/test-data-cut.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
## Copyright 2018 by Thomas Bock <[email protected]>
## Copyright 2020 by Thomas Bock <[email protected]>
## Copyright 2018 by Jakob Kronawitter <[email protected]>
## Copyright 2024 by Maximilian Löffler <[email protected]>
## All Rights Reserved.


Expand Down Expand Up @@ -82,3 +83,63 @@ test_that("Cut commit and mail data to same date range.", {
expect_identical(mail.data, mail.data.expected, info = "Cut mail data.")

})

test_that("Cut data to same date range with one empty data source.", {

## configurations

proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT)

## in order to properly test whether the data types of timestamps originating from empty data are correct,
## ensure that the first provided data source contains empty data. This is important as R usually uses the
## first entry of a data frame to determine the data type of it. However, the data type of the project timestamps
## should be properly set regardless of the empty data.
proj.conf$update.value("issues.locked", TRUE)
data.sources = c("issues", "mails", "commits")

## construct objects

x.data = ProjectData$new(proj.conf)
x.data$set.issues(NULL)

commit.data.expected = data.frame(commit.id = sprintf("<commit-%s>", c(32712, 32713)),
date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45")),
author.name = c("Björn", "Olaf"),
author.email = c("[email protected]", "[email protected]"),
committer.date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-20 10:00:44")),
committer.name = c("Björn", "Björn"),
committer.email = c("[email protected]", "[email protected]"),
hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"),
changed.files = as.integer(c(1, 1)),
added.lines = as.integer(c(1, 1)),
deleted.lines = as.integer(c(1, 0)),
diff.size = as.integer(c(2, 1)),
file = c("test.c", "test.c"),
artifact = c("A", "A"),
artifact.type = c("Feature", "Feature"),
artifact.diff.size = as.integer(c(1, 1)))

mail.data.expected = data.frame(author.name = c("Thomas", "Olaf"),
author.email = c("[email protected]", "[email protected]"),
message.id = c("<[email protected]>", "<[email protected]>"),
date = get.date.from.string(c("2016-07-12 16:04:40", "2016-07-12 16:05:37")),
date.offset = as.integer(c(100, 200)),
subject = c("Re: Fw: busybox 2 tab", "Re: Fw: busybox 10"),
thread = sprintf("<thread-%s>", c("13#9", "13#9")),
artifact.type = c("Mail", "Mail"))

issue.data.expected = create.empty.issues.list()

commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits.unfiltered()
rownames(commit.data) = 1:nrow(commit.data)

mail.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.mails()
rownames(mail.data) = 1:nrow(mail.data)

issue.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.issues()

expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.")
expect_identical(mail.data, mail.data.expected, info = "Cut mail data.")
expect_identical(issue.data, issue.data.expected, info = "Cut issue data (empty).")

})
11 changes: 5 additions & 6 deletions util-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
## Copyright 2021 by Johannes Hostert <[email protected]>
## Copyright 2021 by Mirabdulla Yusifli <[email protected]>
## Copyright 2022 by Jonathan Baumann <[email protected]>
## Copyright 2022-2023 by Maximilian Löffler <[email protected]>
## Copyright 2022-2024 by Maximilian Löffler <[email protected]>
## Copyright 2024 by Leo Sendelbach <[email protected]>
## All Rights Reserved.

Expand Down Expand Up @@ -113,7 +113,7 @@ DATASOURCE.TO.ARTIFACT.COLUMN = list(


## the maximum time difference between subsequent mails of a patchstack
PATCHSTACK.MAIL.DECAY.THRESHOLD = "30 seconds"
PATCHSTACK.MAIL.DECAY.THRESHOLD = lubridate::as.duration("30 seconds")

## configuration parameters that do not reset the environment when changed
CONF.PARAMETERS.NO.RESET.ENVIRONMENT = c("commit.messages",
Expand Down Expand Up @@ -283,8 +283,7 @@ ProjectData = R6::R6Class("ProjectData",
## of 'PATCHSTACK.MAIL.DECAY.THRESHOLD'
while (i < nrow(thread) && running) {
if (thread[1, "author.name"] == thread[i + 1, "author.name"] &&
thread[i + 1, "date"] - thread[i, "date"] <=
lubridate::as.duration(PATCHSTACK.MAIL.DECAY.THRESHOLD)) {
thread[i + 1, "date"] - thread[i, "date"] <= PATCHSTACK.MAIL.DECAY.THRESHOLD) {
i = i + 1
} else {
running = FALSE
Expand Down Expand Up @@ -797,8 +796,8 @@ ProjectData = R6::R6Class("ProjectData",
}
## NAs otherwise
else {
source.date.min = NA
source.date.max = NA
source.date.min = as.POSIXct(NA)
source.date.max = as.POSIXct(NA)
}

## remove old line if existing
Expand Down

0 comments on commit 751b72f

Please sign in to comment.