Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change commit filtering and network building regarding the untracked files and base artifact #149

Merged
merged 31 commits into from
Jan 15, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
64a9486
Remove get.commits.raw function from util-data.R
Nov 29, 2018
894c9a5
Move artifact kind filtering functionality into the get.commits method
Dec 2, 2018
e74e15d
Adjust read.commits to return a valid data.frame instead of an empty one
Dec 4, 2018
11428d9
Restructure get.commits and get.commits.filtered(.empty) methods
Dec 6, 2018
c26e582
Delete set.commits.raw and read.commits.raw methods.
Dec 6, 2018
51617bb
Adjust two testcases to work with the new get.commits method
Dec 6, 2018
67a4fbe
Adapt test cases to new changes and improve empty dataframe creation
Dec 7, 2018
c60c2f6
Change edge generation behaviour for base and untracked files artifact
Dec 8, 2018
fada26d
Adjust copyright headers of modified files
Dec 10, 2018
43f185d
Update changelog
Dec 10, 2018
5ea65b9
Add global constant 'UNTRACKED.FILE' and adjust documentation
Dec 15, 2018
ec8c6dd
Update default behavior of 'Conf' objects
clhunsen Dec 14, 2018
0d7c222
Fix nodes for networks without edges
bockthom Dec 16, 2018
6580427
Improve edge creation concerning untracked files and the base artifact
Dec 16, 2018
dde0dd7
Leave artifact column empty if artifact == file or artifact == funtion
Dec 17, 2018
d11d0fb
Add 'UNTRACKED.FILE constant' back into the constant 'BASE.ARTIFACTS'
Dec 17, 2018
32a7162
Alter inline comments with wrong information
Dec 17, 2018
466d8eb
Change names of network and project configuration options
Dec 18, 2018
7e27a18
Further improve construction of edgeless networks
clhunsen Dec 17, 2018
dc8873e
Update changelog.
Dec 18, 2018
137d833
Fix setting authors in co-change-based author networks
clhunsen Dec 18, 2018
e709786
Update README
Dec 19, 2018
a5802b0
Update documentation and showcase.R
Dec 20, 2018
67dcf31
Rename variable 'list' to 'author.groups' and adjust documentation
Dec 20, 2018
5f0f529
Add additional utility functions for easier empty dataframe creation
Dec 20, 2018
6043e5c
Change null checking behaviour of two methods
Dec 20, 2018
418d1dc
Update README
Jan 7, 2019
523daef
Move empty dataframe creation utility functions into util-read.R
Jan 7, 2019
f8281c7
Adjust comments for the column names of commonly used dataframes
Jan 9, 2019
01217a8
Update changelog
Jan 9, 2019
ae58902
Adjust copyright headers
Jan 14, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 28 additions & 28 deletions tests/test-split.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ test_that("Split a data object time-based (split.basis == 'commits').", {
## check data for all ranges
expected.data = list(
commits = list(
"2016-07-12 15:58:59-2016-07-12 16:01:59" = data$commits[1:4, ],
"2016-07-12 16:01:59-2016-07-12 16:04:59" = data.frame(),
"2016-07-12 16:04:59-2016-07-12 16:06:33" = data$commits[5:9, ]
"2016-07-12 15:58:59-2016-07-12 16:01:59" = data$commits[1:2, ],
"2016-07-12 16:01:59-2016-07-12 16:04:59" = data$commits[0, ],
"2016-07-12 16:04:59-2016-07-12 16:06:33" = data$commits[3:6, ]
),
mails = list(
"2016-07-12 15:58:59-2016-07-12 16:01:59" = data.frame(),
Expand Down Expand Up @@ -168,10 +168,10 @@ test_that("Split a data object time-based (split.basis == 'mails').", {
## check data for all ranges
expected.data = list(
commits = list(
"2004-10-09 18:38:13-2007-10-10 12:38:13" = data.frame(),
"2007-10-10 12:38:13-2010-10-10 06:38:13" = data.frame(),
"2010-10-10 06:38:13-2013-10-10 00:38:13" = data.frame(),
"2013-10-10 00:38:13-2016-07-12 16:05:38" = data$commits[1:4, ]
"2004-10-09 18:38:13-2007-10-10 12:38:13" = data$commits[0, ],
"2007-10-10 12:38:13-2010-10-10 06:38:13" = data$commits[0, ],
"2010-10-10 06:38:13-2013-10-10 00:38:13" = data$commits[0, ],
"2013-10-10 00:38:13-2016-07-12 16:05:38" = data$commits[1:2, ]
),
mails = list(
"2004-10-09 18:38:13-2007-10-10 12:38:13" = data$mails[rownames(data$mails) %in% 1:2, ],
Expand Down Expand Up @@ -247,9 +247,9 @@ test_that("Split a data object time-based (split.basis == 'issues').", {
## check data for all ranges
expected.data = list(
commits = list(
"2013-04-21 23:52:09-2015-04-22 11:52:09" = data.frame(),
"2013-04-21 23:52:09-2015-04-22 11:52:09" = data$commits[0, ],
"2015-04-22 11:52:09-2017-04-21 23:52:09" = data$commits,
"2017-04-21 23:52:09-2017-05-23 12:32:40" = data.frame()
"2017-04-21 23:52:09-2017-05-23 12:32:40" = data$commits[0, ]
),
mails = list(
"2013-04-21 23:52:09-2015-04-22 11:52:09" = data.frame(),
Expand Down Expand Up @@ -496,9 +496,9 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
## check data for all ranges
expected.data = list(
commits = list(
"2016-07-12 15:58:59-2016-07-12 16:05:41" = data$commits[1:4, ],
"2016-07-12 16:05:41-2016-07-12 16:06:32" = data$commits[5:7, ],
"2016-07-12 16:06:32-2016-07-12 16:06:33" = data$commits[8:9, ]
"2016-07-12 15:58:59-2016-07-12 16:05:41" = data$commits[1:2, ],
"2016-07-12 16:05:41-2016-07-12 16:06:32" = data$commits[3:4, ],
"2016-07-12 16:06:32-2016-07-12 16:06:33" = data$commits[5:6, ]
),
mails = list(
"2016-07-12 15:58:59-2016-07-12 16:05:41" = data$mails[rownames(data$mails) %in% 16:17, ],
Expand Down Expand Up @@ -591,8 +591,8 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
## check data for all ranges
expected.data = list(
commits = list(
"2016-07-12 15:58:59-2016-07-12 16:06:10" = data$commits[1:6, ],
"2016-07-12 16:06:10-2016-07-12 16:06:33" = data$commits[7:9, ]
"2016-07-12 15:58:59-2016-07-12 16:06:10" = data$commits[1:3, ],
"2016-07-12 16:06:10-2016-07-12 16:06:33" = data$commits[4:6, ]
),
mails = list(
"2016-07-12 15:58:59-2016-07-12 16:06:10" = data$mails[rownames(data$mails) %in% 16:17, ],
Expand Down Expand Up @@ -675,12 +675,12 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
## check data for all ranges
expected.data = list(
commits = list(
"2004-10-09 18:38:13-2010-07-12 11:05:35" = data.frame(),
"2010-07-12 11:05:35-2010-07-12 12:05:41" = data.frame(),
"2010-07-12 12:05:41-2010-07-12 12:05:44" = data.frame(),
"2010-07-12 12:05:44-2016-07-12 15:58:40" = data.frame(),
"2016-07-12 15:58:40-2016-07-12 16:05:37" = data$commits[1:4, ],
"2016-07-12 16:05:37-2016-07-12 16:05:38" = data.frame()
"2004-10-09 18:38:13-2010-07-12 11:05:35" = data$commits[0, ],
"2010-07-12 11:05:35-2010-07-12 12:05:41" = data$commits[0, ],
"2010-07-12 12:05:41-2010-07-12 12:05:44" = data$commits[0, ],
"2010-07-12 12:05:44-2016-07-12 15:58:40" = data$commits[0, ],
"2016-07-12 15:58:40-2016-07-12 16:05:37" = data$commits[1:2, ],
"2016-07-12 16:05:37-2016-07-12 16:05:38" = data$commits[0, ]
),
mails = list(
"2004-10-09 18:38:13-2010-07-12 11:05:35" = data$mails[rownames(data$mails) %in% 1:3, ],
Expand Down Expand Up @@ -742,7 +742,7 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
## check data for all ranges
expected.data = list(
commits = list(
"2004-10-09 18:38:13-2016-07-12 16:05:38" = data$commits[1:4, ]
"2004-10-09 18:38:13-2016-07-12 16:05:38" = data$commits[1:2, ]
),
mails = list(
"2004-10-09 18:38:13-2016-07-12 16:05:38" = data$mails
Expand Down Expand Up @@ -785,8 +785,8 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
## check data for all ranges
expected.data = list(
commits = list(
"2004-10-09 18:38:13-2010-07-12 12:05:43" = data.frame(),
"2010-07-12 12:05:43-2016-07-12 16:05:38" = data$commits[1:4, ]
"2004-10-09 18:38:13-2010-07-12 12:05:43" = data$commits[0, ],
"2010-07-12 12:05:43-2016-07-12 16:05:38" = data$commits[1:2, ]
),
mails = list(
"2004-10-09 18:38:13-2010-07-12 12:05:43" = data$mails[rownames(data$mails) %in% 1:8, ],
Expand Down Expand Up @@ -866,10 +866,10 @@ test_that("Split a data object activity-based (activity.type = 'issues').", {
## check data for all ranges
expected.data = list(
commits = list(
"2013-04-21 23:52:09-2016-07-12 16:05:47" = data$commits[1:6, ],
"2016-07-12 16:05:47-2016-08-31 18:21:48" = data$commits[7:9, ],
"2016-08-31 18:21:48-2017-02-20 22:25:41" = data.frame(),
"2017-02-20 22:25:41-2017-05-23 12:32:40" = data.frame()
"2013-04-21 23:52:09-2016-07-12 16:05:47" = data$commits[1:3, ],
"2016-07-12 16:05:47-2016-08-31 18:21:48" = data$commits[4:6, ],
"2016-08-31 18:21:48-2017-02-20 22:25:41" = data$commits[0, ],
"2017-02-20 22:25:41-2017-05-23 12:32:40" = data$commits[0, ]
),
mails = list(
"2013-04-21 23:52:09-2016-07-12 16:05:47" = data$mails[rownames(data$mails) %in% 14:17, ],
Expand Down Expand Up @@ -967,7 +967,7 @@ test_that("Split a data object activity-based (activity.type = 'issues').", {
expected.data = list(
commits = list(
"2013-04-21 23:52:09-2016-07-27 22:25:25" = data$commits,
"2016-07-27 22:25:25-2017-05-23 12:32:40" = data.frame()
"2016-07-27 22:25:25-2017-05-23 12:32:40" = data$commits[0, ]
),
mails = list(
"2013-04-21 23:52:09-2016-07-27 22:25:25" = data$mails[rownames(data$mails) %in% 14:17, ],
Expand Down
52 changes: 52 additions & 0 deletions util-misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,58 @@ match.arg.or.default = function(arg, choices, default = NULL, several.ok = FALSE
}
}

## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Empty dataframe creation-------------------------------------------------

#' Create an empty data.frame with the specified columns. Unless all columns should have to default datatype
jkronaw marked this conversation as resolved.
Show resolved Hide resolved
#' \code{logical}, the second parameter \code{data.types} should specify the datatypes.
#'
#' @param columns a character vector containing all the column names
#' @param data.types a character vector of the same length as \code{columns}, the datatypes can be \code{integer},
#' \code{numeric},\code{POSIXct},\code{character}, \code{factor} or \code{logical}
jkronaw marked this conversation as resolved.
Show resolved Hide resolved
#'
#' @return the newly created empty data.frame
jkronaw marked this conversation as resolved.
Show resolved Hide resolved
get.empty.dataframe = function(columns, data.types = NULL) {
jkronaw marked this conversation as resolved.
Show resolved Hide resolved
if (!is.null(data.types) && length(data.types) != length(columns)) {
stop("If specified, the length of the two given vectors columns and data.types must be the same.")
}

data.frame = data.frame(matrix(nrow = 0, ncol = length(columns)))
colnames(data.frame) = columns

if (length(data.types) > 0) {

for (i in 1:length(data.types)) {
jkronaw marked this conversation as resolved.
Show resolved Hide resolved
column = data.frame[[i]]
switch(tolower(data.types[i]),
"posixct" = {
column = as.POSIXct(column)
},
"integer" = {
column = as.integer(column)
},
"numeric" = {
column = as.numeric(column)
},
"logical" = {
column = as.logical(column)
},
"character" = {
column = as.character(column)
},
"factor" = {
column = as.factor(column)
},
{
stop(paste("Unknown datatype specified:", data.types[[i]]))
}
)
data.frame[[i]] = column
}
}

return(data.frame)
}

## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Stacktrace --------------------------------------------------------------
Expand Down
6 changes: 4 additions & 2 deletions util-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,10 @@ read.commits = function(data.path, artifact) {
logging::logwarn("Datapath: %s", data.path)

# return a dataframe with the correct columns but zero rows
commit.data = data.frame(matrix(nrow = 0, ncol = length(commit.data.columns)))
colnames(commit.data) = commit.data.columns
commit.data = get.empty.dataframe(commit.data.columns, data.types =
jkronaw marked this conversation as resolved.
Show resolved Hide resolved
c("character", "POSIXct", "character", "character", "POSIXct",
"character", "character", "character", "numeric", "numeric", "numeric",
"numeric", "character", "character", "character", "numeric"))
jkronaw marked this conversation as resolved.
Show resolved Hide resolved
return(commit.data)
}
colnames(commit.data) = commit.data.columns
jkronaw marked this conversation as resolved.
Show resolved Hide resolved
Expand Down