Skip to content

Commit

Permalink
Moved assign_types to preprocessing package
Browse files Browse the repository at this point in the history
  • Loading branch information
frankcorneliusmartin committed Jan 25, 2024
1 parent e0dc187 commit 0153deb
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 17 deletions.
2 changes: 1 addition & 1 deletion vtg.preprocessing/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
license
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
6 changes: 1 addition & 5 deletions vtg.preprocessing/NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1 @@
# Generated by roxygen2: do not edit by hand

export(extend_data)
export(subset_data)
export(factorize)
exportPattern("^[[:alpha:]]+")
Original file line number Diff line number Diff line change
@@ -1,27 +1,35 @@
#' Assign numeric or factor type to columns specified.
#' Set the types of the columns of a dataframe
#'
#' @param data dataframe
#' @param types containing the types to set to the columns
#' 'types': {'column_name': {'type': 'numeric' | 'factor'},
#' 'column_name': {
#' 'type': 'factor',
#' 'levels': ['a', 'b', 'c'],
#' 'ref': 'a'
#' }
#' }
#'
#' @return formatted dataframe
#' @return dataframe with the specified types
#'
#' @export
#'
assign_types <- function(data, types) {

column_names <- names(types)

# TODO validate types, if fails return error
# types should be a list with elements $type and (if $type == "factor")
# $levels and (if $type == "factor" and $ref != NULL) $ref

# for each column specified in types set the appropiate type
# for each specified column in types set the appropiate type
for (i in seq_len(length(types))) {

column_name <- column_names[i]
specs <- types[[i]]
type_ <- specs$type

if (type_ == "numeric") {

data[[column_name]] <- as.numeric(data[[column_name]])

} else if (type_ == "factor") {
# TODO check if this is what we want: we basically filter the data here!

Expand All @@ -37,10 +45,9 @@ assign_types <- function(data, types) {
}

} else {
# TODO error message, wrong type
vtg::log$error("Wrong type specified: ", type_, ". Continuing with next column.")
}
}


return(data)
}
7 changes: 5 additions & 2 deletions vtg.preprocessing/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
## TODO
- [ ] there is missing a time2 in the extend data. I commented it out for now
- [ ] The `age` column is read as string. This is because there was an `other` value in the column.
- [ ] ...


`devtools::document("./")`
`install.packages("./vtg.preprocessing/", repos=NULL, source=T)`

26 changes: 26 additions & 0 deletions vtg.preprocessing/man/assign_types.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions vtg.preprocessing/man/factorize.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vtg.summary/src/R/common_checks_rpc.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ common_checks_rpc <- function(data, columns, types) {

# Assign types
if (!is.null(types)) {
data <- vtg.summary::assign_types(data, types)
vtg.preprocessing::assign_types(data, types)
}

# check if all columns are either numeric or factors. If not, return error
Expand Down

0 comments on commit 0153deb

Please sign in to comment.