diff --git a/R/EvaluationSummary.R b/R/EvaluationSummary.R index 34924fd7..e35b5f1b 100644 --- a/R/EvaluationSummary.R +++ b/R/EvaluationSummary.R @@ -451,11 +451,14 @@ averagePrecision <- function(prediction){ } - -calibrationInLarge <- function(prediction){ +#' Calculate the calibration in large +#' @param prediction A prediction dataframe +#' @return data.frame with meanPredictionRisk, observedRisk, and N +#' @keywords internal +calibrationInLarge <- function(prediction) { result <- data.frame(meanPredictionRisk = mean(prediction$value), - observedRisk = sum(prediction$outcomeCount)/nrow(prediction), + observedRisk = sum(prediction$outcomeCount) / nrow(prediction), N = nrow(prediction) ) diff --git a/R/Glm.R b/R/Glm.R index 2f729513..4cbb779c 100644 --- a/R/Glm.R +++ b/R/Glm.R @@ -25,7 +25,7 @@ #' @param data An object of type \code{plpData} - the patient level prediction #' data extracted from the CDM. #' @param cohort The population dataframe created using -#' /code{createStudyPopulation} who will have their risks predicted or a cohort +#' \code{createStudyPopulation} who will have their risks predicted or a cohort #' without the outcome known #' @export #' @return A dataframe containing the prediction for each person in the @@ -75,7 +75,7 @@ predictGlm <- function(plpModel, data, cohort) { #' PatientLevelPrediction package. #' @param coefficients A dataframe containing two columns, coefficients and #' covariateId, both of type numeric. The covariateId column must contain -#' valid covariateIds that match those used in the /code{FeatureExtraction} +#' valid covariateIds that match those used in the \code{FeatureExtraction} #' package. #' @param intercept A numeric value representing the intercept of the model. #' @param finalMapping A string representing the final mapping from the diff --git a/R/Plotting.R b/R/Plotting.R index e3cadcda..a4f390cb 100644 --- a/R/Plotting.R +++ b/R/Plotting.R @@ -1259,14 +1259,14 @@ plotNetBenefit <- function(plpResult, } plots[[i]] <- ggplot2::ggplot(data = nbData, ggplot2::aes(x = .data$threshold)) + - ggplot2::geom_line(ggplot2::aes(y = treatAll, color = "Treat All", linetype = "Treat All")) + - ggplot2::geom_line(ggplot2::aes(y = treatNone, color = "Treat None", linetype = "Treat None")) + - ggplot2::geom_line(ggplot2::aes(y = netBenefit, color = "Net Benefit", linetype = "Net Benefit")) + + ggplot2::geom_line(ggplot2::aes(y = .data$treatAll, color = "Treat All", linetype = "Treat All")) + + ggplot2::geom_line(ggplot2::aes(y = .data$treatNone, color = "Treat None", linetype = "Treat None")) + + ggplot2::geom_line(ggplot2::aes(y = .data$netBenefit, color = "Net Benefit", linetype = "Net Benefit")) + ggplot2::scale_color_manual( name = "Strategies", values = c( - "Net Benefit" = "blue", - "Treat All" = "red", + "Model" = "blue", + "Treat all" = "red", "Treat None" = "brown" ) ) + @@ -1290,7 +1290,7 @@ plotNetBenefit <- function(plpResult, if (!is.null(saveLocation)) { if (!dir.exists(saveLocation)) { - dir.create(saveLocation, recursive = T) + dir.create(saveLocation, recursive = TRUE) } ggplot2::ggsave(file.path(saveLocation, fileName), plot, width = 5, height = 4.5, dpi = 400) } diff --git a/R/Recalibration.R b/R/Recalibration.R index 2053f7ed..f31ec322 100644 --- a/R/Recalibration.R +++ b/R/Recalibration.R @@ -20,264 +20,272 @@ #' recalibratePlpRefit #' #' @description -#' Train various models using a default parameter gird search or user specified parameters +#' Recalibrating a model by refitting it #' -#' @details -#' The user can define the machine learning model to train (regularised logistic regression, random forest, -#' gradient boosting machine, neural network and ) -#' #' @param plpModel The trained plpModel (runPlp$model) #' @param newPopulation The population created using createStudyPopulation() who will have their risks predicted #' @param newData An object of type \code{plpData} - the patient level prediction #' data extracted from the CDM. +#' @param returnModel Logical: return the refitted model #' @return -#' An object of class \code{runPlp} that is recalibrated on the new data +#' An prediction dataframe with the predictions of the recalibrated model added #' #' @export recalibratePlpRefit <- function( - plpModel, - newPopulation, - newData -){ - if (is.null(newPopulation)){ - stop("NULL population") - } - if (!inherits(x = newData, what = "plpData")){ - stop("Incorrect plpData class") - } - if (!inherits(x = plpModel, what = "plpModel")){ - stop("plpModel is not of class plpModel") - } - - #get selected covariates - includeCovariateIds <- plpModel$covariateImportance %>% - dplyr::filter(.data$covariateValue != 0) %>% - dplyr::select("covariateId") %>% + plpModel, + newPopulation, + newData, + returnModel = FALSE) { + checkNotNull(plpModel) + checkNotNull(newPopulation) + checkNotNull(newData) + checkIsClass(plpModel, "plpModel") + checkIsClass(newData, "plpData") + checkBoolean(returnModel) + + # get selected covariates + includeCovariateIds <- plpModel$covariateImportance %>% + dplyr::filter(.data$covariateValue != 0) %>% + dplyr::select("covariateId") %>% dplyr::pull() - + # check which covariates are included in new data containedIds <- newData$covariateData$covariateRef %>% dplyr::collect() noShrinkage <- intersect(includeCovariateIds, containedIds$covariateId) - + # add intercept noShrinkage <- append(noShrinkage, 0, 0) - + setLassoRefit <- setLassoLogisticRegression( includeCovariateIds = includeCovariateIds, - noShrinkage = noShrinkage, + noShrinkage = noShrinkage, maxIterations = 10000 # increasing this due to test code often not converging ) - - newData$labels <- newPopulation - + + newData$labels <- newPopulation + newData$folds <- data.frame( - rowId = newData$labels$rowId, - index = sample(2, length(newData$labels$rowId), replace = T) - ) - + rowId = newData$labels$rowId, + index = sample(2, length(newData$labels$rowId), replace = TRUE) + ) + # add dummy settings to fit model - attr(newData, "metaData")$outcomeId <- attr(newPopulation, 'metaData')$outcomeId - attr(newData, "metaData")$targetId <- attr(newPopulation, 'metaData')$targetId - attr(newData, "metaData")$restrictPlpDataSettings <- attr(newPopulation, 'metaData')$restrictPlpDataSettings + attr(newData, "metaData")$outcomeId <- attr(newPopulation, "metaData")$outcomeId + attr(newData, "metaData")$targetId <- attr(newPopulation, "metaData")$targetId + attr(newData, "metaData")$restrictPlpDataSettings <- attr(newPopulation, "metaData")$restrictPlpDataSettings attr(newData, "metaData")$covariateSettings <- newData$metaData$covariateSettings - attr(newData, "metaData")$populationSettings <- attr(newPopulation, 'metaData')$populationSettings + attr(newData, "metaData")$populationSettings <- attr(newPopulation, "metaData")$populationSettings attr(newData$covariateData, "metaData")$featureEngineeringSettings <- PatientLevelPrediction::createFeatureEngineeringSettings() attr(newData$covariateData, "metaData")$preprocessSettings <- PatientLevelPrediction::createPreprocessSettings() attr(newData, "metaData")$splitSettings <- PatientLevelPrediction::createDefaultSplitSetting() attr(newData, "metaData")$sampleSettings <- PatientLevelPrediction::createSampleSettings() - - newModel <- tryCatch({ - fitPlp( - trainData = newData, - modelSettings = setLassoRefit, - analysisId = 'recalibrationRefit', - analysisPath = NULL - ) - }, - error = function(e){ParallelLogger::logInfo(e); return(NULL)} + + newModel <- tryCatch( + { + fitPlp( + trainData = newData, + modelSettings = setLassoRefit, + analysisId = "recalibrationRefit", + analysisPath = NULL + ) + }, + error = function(e) { + ParallelLogger::logInfo(e) + return(NULL) + } ) - if(is.null(newModel)){ - ParallelLogger::logInfo('Recalibration fit failed') + if (is.null(newModel)) { + ParallelLogger::logInfo("Recalibration fit failed") return(NULL) } - - newModel$prediction$evaluationType <- 'recalibrationRefit' + + newModel$prediction$evaluationType <- "recalibrationRefit" oldPred <- predictPlp( - plpModel = plpModel, - plpData = newData, - population = newPopulation, + plpModel = plpModel, + plpData = newData, + population = newPopulation, timepoint = 0 - ) - - oldPred$evaluationType <- 'validation' - + ) + + oldPred$evaluationType <- "validation" + prediction <- rbind( - oldPred, + oldPred, newModel$prediction[, colnames(oldPred)] - ) + ) - if(!is.null(newModel$covariateImportance)){ - adjust <- newModel$covariateImportance %>% - dplyr::filter(.data$covariateValue != 0) %>% + if (!is.null(newModel$covariateImportance)) { + adjust <- newModel$covariateImportance %>% + dplyr::filter(.data$covariateValue != 0) %>% dplyr::select( - "covariateId", + "covariateId", "covariateValue" ) - } else{ + } else { adjust <- c() } - - newIntercept <- newModel$model$coefficients[names(newModel$model$coefficients) == '(Intercept)'] - + + newIntercept <- newModel$model$coefficients[names(newModel$model$coefficients) == "(Intercept)"] + attr(prediction, "metaData")$recalibratePlpRefit <- list(adjust = adjust, newIntercept = newIntercept) - return(prediction) + if (returnModel) { + return(list(prediction = prediction, model = newModel)) + } else { + return(prediction) + } } #' recalibratePlp #' #' @description -#' Train various models using a default parameter gird search or user specified parameters -#' -#' @details -#' The user can define the machine learning model to train (regularised logistic regression, random forest, -#' gradient boosting machine, neural network and ) +#' Recalibrating a model using the recalibrationInTheLarge or weakRecalibration methods #' +#' @details +#' 'recalibrationInTheLarge' calculates a single correction factor for the +#' average predicted risks to match the average observed risks. +#' 'weakRecalibration' fits a glm model to the logit of the predicted risks, +#' also known as Platt scaling/logistic recalibration. +#' #' @param prediction A prediction dataframe #' @param analysisId The model analysisId #' @param typeColumn The column name where the strata types are specified #' @param method Method used to recalibrate ('recalibrationInTheLarge' or 'weakRecalibration' ) #' @return -#' An object of class \code{runPlp} that is recalibrated on the new data +#' A prediction dataframe with the recalibrated predictions added #' #' @export -recalibratePlp <- function(prediction, analysisId, typeColumn = 'evaluationType', - method = c('recalibrationInTheLarge', 'weakRecalibration')){ +recalibratePlp <- function(prediction, analysisId, typeColumn = "evaluationType", + method = c("recalibrationInTheLarge", "weakRecalibration")) { # check input: - if (!inherits(x = prediction, what = 'data.frame')){ - stop("Incorrect prediction") - } - - if(!method %in% c('recalibrationInTheLarge', 'weakRecalibration')){ + if (!inherits(x = prediction, what = "data.frame")) { + stop("Incorrect prediction") + } + + if (!method %in% c("recalibrationInTheLarge", "weakRecalibration")) { stop("Unknown recalibration method type. must be of type: recalibrationInTheLarge, weakRecalibration") } - + prediction <- do.call(method, list(prediction = prediction, columnType = typeColumn)) - + return(prediction) - } - -recalibrationInTheLarge <- function(prediction, columnType = 'evaluationType'){ - - if(attr(prediction, "metaData")$modelType == 'binary'){ +#' recalibrationInTheLarge +#' +#' @description +#' Recalibrate a model using the recalibrationInTheLarge method which calculates a single correction factor +#' for the average predicted risks to match the average observed risks +#' +#' @param prediction A prediction dataframe +#' @param columnType The column name where the strata types are specified +#' @return +#' An prediction dataframe with the recalibrated predictions added +#' @keywords internal +recalibrationInTheLarge <- function(prediction, columnType = "evaluationType") { + if (attr(prediction, "metaData")$modelType == "binary") { misCal <- calibrationInLarge(prediction) - obsOdds <- misCal$observedRisk/ (1-misCal$observedRisk) - predOdds <- misCal$meanPredictionRisk/ (1 - misCal$meanPredictionRisk) + obsOdds <- misCal$observedRisk / (1 - misCal$observedRisk) + predOdds <- misCal$meanPredictionRisk / (1 - misCal$meanPredictionRisk) correctionFactor <- log(obsOdds / predOdds) - + recalibrated <- prediction - recalibrated$value = logFunct(inverseLog(recalibrated$value) + correctionFactor) - - recalibrated[,columnType] <- 'recalibrationInTheLarge' + recalibrated$value <- stats::plogis(stats::qlogis(recalibrated$value) + correctionFactor) + + recalibrated[, columnType] <- "recalibrationInTheLarge" prediction <- rbind(prediction, recalibrated) - attr(prediction, 'metaData')$recalibrationInTheLarge = list(correctionFactor = correctionFactor) - + attr(prediction, "metaData")$recalibrationInTheLarge <- + list(correctionFactor = correctionFactor) + return(prediction) } - - if(attr(prediction, "metaData")$modelType == 'survival'){ - ParallelLogger::logError('Survival recal in the large not currently available') + if (attr(prediction, "metaData")$modelType == "survival") { + ParallelLogger::logError("Survival recal in the large not currently available") } - - } - -weakRecalibration <- function(prediction, columnType = 'evaluationType'){ - +#' weakRecalibration +#' +#' @description +#' Recalibrate a model using the weakRecalibration method which fits a glm model +#' to the logit of the predicted risks. +#' Alsi known as Platt scaling/logistic recalibration +#' @param prediction A prediction dataframe +#' @param columnType The column name where the strata types are specified +#' @return +#' An prediction dataframe with the recalibrated predictions added +#' @keywords internal +weakRecalibration <- function(prediction, columnType = "evaluationType") { # if binary: - if(attr(prediction, "metaData")$modelType == 'binary'){ + if (attr(prediction, "metaData")$modelType == "binary") { recalibrated <- prediction - recalibrated$value[recalibrated$value==0] <- 0.000000000000001 - recalibrated$value[recalibrated$value==1] <- 1-0.000000000000001 - - y <- ifelse(recalibrated$outcomeCount>0, 1, 0) - inverseLog <- inverseLog(recalibrated$value) - refit <- suppressWarnings(stats::glm(y ~ inverseLog, family = 'binomial')) - - recalibrated$value <- logFunct((inverseLog * refit$coefficients[2]) + refit$coefficients[1]) - - recalibrated[,columnType] <- 'weakRecalibration' + epsilon <- .Machine$double.eps + recalibrated$value <- pmin(pmax(recalibrated$value, epsilon), 1 - epsilon) + + y <- ifelse(recalibrated$outcomeCount > 0, 1, 0) + + # convert risk probailities to logits + inverseLog <- stats::qlogis(recalibrated$value) + refit <- suppressWarnings(stats::glm(y ~ inverseLog, family = "binomial")) + + recalibrated$value <- stats::plogis((inverseLog * refit$coefficients[2]) + refit$coefficients[1]) + + recalibrated[, columnType] <- "weakRecalibration" prediction <- rbind(prediction, recalibrated) - attr(prediction, 'metaData')$weakRecalibration = list( - adjustGradient = refit$coefficients[2], + attr(prediction, "metaData")$weakRecalibration <- list( + adjustGradient = refit$coefficients[2], adjustIntercept = refit$coefficients[1] - ) - + ) + return(prediction) - } - + } + # add if survival - if(attr(prediction, "metaData")$modelType == 'survival'){ - + if (attr(prediction, "metaData")$modelType == "survival") { recalibrated <- prediction - + # this will make the recalibration work if the baselineSurvival is missing baseline <- ifelse(is.null(attr(recalibrated, "baselineSurvival")), 0.9, attr(recalibrated, "baselineSurvival")) - ParallelLogger::logInfo(paste0('recal initial baseline hazard: ',baseline)) - + ParallelLogger::logInfo(paste0("recal initial baseline hazard: ", baseline)) + offset <- ifelse(is.null(attr(recalibrated, "offset")), 0, attr(recalibrated, "offset")) - ParallelLogger::logInfo(paste0('recal initial offset: ',offset)) - + ParallelLogger::logInfo(paste0("recal initial offset: ", offset)) + timepoint <- ifelse(is.null(attr(recalibrated, "timePoint")), 365, attr(recalibrated, "timePoint")) - ParallelLogger::logInfo(paste0('recal initial timepoint: ',timepoint)) - - if(!is.null(baseline)){ - lp <- log(log(1-recalibrated$value)/log(baseline)) + offset - } else{ + ParallelLogger::logInfo(paste0("recal initial timepoint: ", timepoint)) + + if (!is.null(baseline)) { + lp <- log(log(1 - recalibrated$value) / log(baseline)) + offset + } else { lp <- log(recalibrated$value) } - - + + t <- apply(cbind(recalibrated$daysToCohortEnd, recalibrated$survivalTime), 1, min) - y <- ifelse(recalibrated$outcomeCount>0,1,0) # observed outcome - y[t>timepoint] <- 0 - t[t>timepoint] <- timepoint - S<- survival::Surv(t, y) + y <- ifelse(recalibrated$outcomeCount > 0, 1, 0) # observed outcome + y[t > timepoint] <- 0 + t[t > timepoint] <- timepoint + S <- survival::Surv(t, y) #### Intercept + Slope recalibration - f.slope <- survival::coxph(S~lp) - h.slope <- max(survival::basehaz(f.slope)$hazard) # maximum OK because of prediction_horizon + f.slope <- survival::coxph(S ~ lp) + h.slope <- max(survival::basehaz(f.slope)$hazard) # maximum OK because of prediction_horizon lp.slope <- stats::predict(f.slope) - recalibrated$value <- 1-exp(-h.slope*exp(lp.slope)) + recalibrated$value <- 1 - exp(-h.slope * exp(lp.slope)) # 1-h.slope^exp(lp.slope) - - - recalibrated[,columnType] <- 'weakRecalibration' + + + recalibrated[, columnType] <- "weakRecalibration" prediction <- rbind(prediction, recalibrated) - attr(prediction, 'metaData')$weakRecalibration = list( - adjustGradient = f.slope$coefficients['lp'], + attr(prediction, "metaData")$weakRecalibration <- list( + adjustGradient = f.slope$coefficients["lp"], adjustIntercept = h.slope ) - - return(prediction) - - } - -} - -logFunct <- function(values){ - return(1/(1 + exp(0 - values))) -} -inverseLog <- function(values){ - res <- log(values/(1-values)) - return(res) + return(prediction) + } } - diff --git a/man/calibrationInLarge.Rd b/man/calibrationInLarge.Rd new file mode 100644 index 00000000..28505d42 --- /dev/null +++ b/man/calibrationInLarge.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/EvaluationSummary.R +\name{calibrationInLarge} +\alias{calibrationInLarge} +\title{Calculate the calibration in large} +\usage{ +calibrationInLarge(prediction) +} +\arguments{ +\item{prediction}{A prediction dataframe} +} +\value{ +data.frame with meanPredictionRisk, observedRisk, and N +} +\description{ +Calculate the calibration in large +} +\keyword{internal} diff --git a/man/createGlmModel.Rd b/man/createGlmModel.Rd index 6a6509fd..1ba11ca0 100644 --- a/man/createGlmModel.Rd +++ b/man/createGlmModel.Rd @@ -9,7 +9,7 @@ createGlmModel(coefficients, intercept = 0, finalMapping = "logistic") \arguments{ \item{coefficients}{A dataframe containing two columns, coefficients and covariateId, both of type numeric. The covariateId column must contain -valid covariateIds that match those used in the /code{FeatureExtraction} +valid covariateIds that match those used in the \code{FeatureExtraction} package.} \item{intercept}{A numeric value representing the intercept of the model.} diff --git a/man/predictGlm.Rd b/man/predictGlm.Rd index e7e09a57..6f9e21cc 100644 --- a/man/predictGlm.Rd +++ b/man/predictGlm.Rd @@ -14,7 +14,7 @@ prediction model} data extracted from the CDM.} \item{cohort}{The population dataframe created using -/code{createStudyPopulation} who will have their risks predicted or a cohort +\code{createStudyPopulation} who will have their risks predicted or a cohort without the outcome known} } \value{ diff --git a/man/recalibratePlp.Rd b/man/recalibratePlp.Rd index 148e3c3e..139bd00b 100644 --- a/man/recalibratePlp.Rd +++ b/man/recalibratePlp.Rd @@ -21,12 +21,14 @@ recalibratePlp( \item{method}{Method used to recalibrate ('recalibrationInTheLarge' or 'weakRecalibration' )} } \value{ -An object of class \code{runPlp} that is recalibrated on the new data +A prediction dataframe with the recalibrated predictions added } \description{ -Train various models using a default parameter gird search or user specified parameters +Recalibrating a model using the recalibrationInTheLarge or weakRecalibration methods } \details{ -The user can define the machine learning model to train (regularised logistic regression, random forest, -gradient boosting machine, neural network and ) +'recalibrationInTheLarge' calculates a single correction factor for the +average predicted risks to match the average observed risks. +'weakRecalibration' fits a glm model to the logit of the predicted risks, +also known as Platt scaling/logistic recalibration. } diff --git a/man/recalibratePlpRefit.Rd b/man/recalibratePlpRefit.Rd index 41202c9b..ec83f986 100644 --- a/man/recalibratePlpRefit.Rd +++ b/man/recalibratePlpRefit.Rd @@ -4,7 +4,7 @@ \alias{recalibratePlpRefit} \title{recalibratePlpRefit} \usage{ -recalibratePlpRefit(plpModel, newPopulation, newData) +recalibratePlpRefit(plpModel, newPopulation, newData, returnModel = FALSE) } \arguments{ \item{plpModel}{The trained plpModel (runPlp$model)} @@ -13,14 +13,12 @@ recalibratePlpRefit(plpModel, newPopulation, newData) \item{newData}{An object of type \code{plpData} - the patient level prediction data extracted from the CDM.} + +\item{returnModel}{Logical: return the refitted model} } \value{ -An object of class \code{runPlp} that is recalibrated on the new data +An prediction dataframe with the predictions of the recalibrated model added } \description{ -Train various models using a default parameter gird search or user specified parameters -} -\details{ -The user can define the machine learning model to train (regularised logistic regression, random forest, -gradient boosting machine, neural network and ) +Recalibrating a model by refitting it } diff --git a/man/recalibrationInTheLarge.Rd b/man/recalibrationInTheLarge.Rd new file mode 100644 index 00000000..4a678d2e --- /dev/null +++ b/man/recalibrationInTheLarge.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Recalibration.R +\name{recalibrationInTheLarge} +\alias{recalibrationInTheLarge} +\title{recalibrationInTheLarge} +\usage{ +recalibrationInTheLarge(prediction, columnType = "evaluationType") +} +\arguments{ +\item{prediction}{A prediction dataframe} + +\item{columnType}{The column name where the strata types are specified} +} +\value{ +An prediction dataframe with the recalibrated predictions added +} +\description{ +Recalibrate a model using the recalibrationInTheLarge method which calculates a single correction factor +for the average predicted risks to match the average observed risks +} +\keyword{internal} diff --git a/man/weakRecalibration.Rd b/man/weakRecalibration.Rd new file mode 100644 index 00000000..24f1639c --- /dev/null +++ b/man/weakRecalibration.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Recalibration.R +\name{weakRecalibration} +\alias{weakRecalibration} +\title{weakRecalibration} +\usage{ +weakRecalibration(prediction, columnType = "evaluationType") +} +\arguments{ +\item{prediction}{A prediction dataframe} + +\item{columnType}{The column name where the strata types are specified} +} +\value{ +An prediction dataframe with the recalibrated predictions added +} +\description{ +Recalibrate a model using the weakRecalibration method which fits a glm model +to the logit of the predicted risks. +Alsi known as Platt scaling/logistic recalibration +} +\keyword{internal} diff --git a/tests/testthat/test-recalibration.R b/tests/testthat/test-recalibration.R index 33e71719..5a8de290 100644 --- a/tests/testthat/test-recalibration.R +++ b/tests/testthat/test-recalibration.R @@ -21,16 +21,16 @@ context("Recalibration") prediction <- data.frame( rowId = 1:100, - value = c(runif(20)/30,runif(80)/300), - outcomeCount = c(runif(20)>0.5, runif(80)>0.9)*1, + value = c(runif(20) / 30, runif(80) / 300), + outcomeCount = c(runif(20) > 0.5, runif(80) > 0.9) * 1, gender = sample(c(8507, 1111), 100, replace = T), - ageYear = sample(1:100,100, replace = T ), - survivalTime = rep(365,100), - evaluationType = rep('Test', 100) - ) + ageYear = sample(1:100, 100, replace = T), + survivalTime = rep(365, 100), + evaluationType = rep("Test", 100) +) metaData <- list( - modelType = "binary", + modelType = "binary", targetId = 1, outcomeId = outcomeId, timepoint = 365 @@ -39,75 +39,80 @@ metaData <- list( attr(prediction, "metaData") <- metaData test_that("recalibrationInTheLarge", { - - test <- recalibratePlp(prediction, analysisId = 'Analysis_1', - method = 'recalibrationInTheLarge') - - testthat::expect_true(sum(test$evaluationType == 'recalibrationInTheLarge') == 100) - + test <- recalibratePlp(prediction, + analysisId = "Analysis_1", + method = "recalibrationInTheLarge" + ) + + testthat::expect_true(sum(test$evaluationType == "recalibrationInTheLarge") == 100) }) -#'weakRecalibration' +#' weakRecalibration' test_that("weakRecalibration", { + test <- recalibratePlp(prediction, + analysisId = "Analysis_1", + method = "weakRecalibration" + ) -test <- recalibratePlp(prediction, analysisId = 'Analysis_1', - method = 'weakRecalibration') - -testthat::expect_true(sum(test$evaluationType == 'weakRecalibration') == 100) - + testthat::expect_true(sum(test$evaluationType == "weakRecalibration") == 100) }) - - test_that("recalibratePlpRefit", { - - newPop <- plpResult$prediction %>% dplyr::select(-"value") %>% dplyr::filter(.data$evaluationType %in% c('Test','Train')) - attr(newPop, 'metaData') <- list( - targetId = 1, + newPop <- plpResult$prediction %>% + dplyr::select(-"value") %>% + dplyr::filter(.data$evaluationType %in% c("Test", "Train")) + attr(newPop, "metaData") <- list( + targetId = 1, outcomeId = outcomeId, restrictPlpDataSettings = PatientLevelPrediction::createRestrictPlpDataSettings(), populationSettings = PatientLevelPrediction::createStudyPopulationSettings() ) - + testRecal <- recalibratePlpRefit( - plpModel = plpResult$model, - newPopulation = newPop, + plpModel = plpResult$model, + newPopulation = newPop, newData = plpData ) - - if(!is.null(testRecal)){ + + if (!is.null(testRecal)) { testthat::expect_true( - sum(testRecal$evaluationType == 'recalibrationRefit')>0 + sum(testRecal$evaluationType == "recalibrationRefit") > 0 ) - - testthat::expect_s3_class(testRecal, 'data.frame') + testthat::expect_s3_class(testRecal, "data.frame") } + testRecalWithModel <- recalibratePlpRefit( + plpModel = plpResult$model, + newPopulation = newPop, + newData = plpData, + returnModel = TRUE + ) + expect_type(testRecalWithModel, "list") + expect_s3_class(testRecalWithModel$model, "plpModel") + expect_s3_class(testRecalWithModel$prediction, "data.frame") + # add more test... }) test_that("survival", { -# survival -metaData <- list( - modelType = "survival", - targetId = 1, - outcomeId = outcomeId, - timepoint = 365 -) + # survival + metaData <- list( + modelType = "survival", + targetId = 1, + outcomeId = outcomeId, + timepoint = 365 + ) -attr(prediction, "metaData") <- metaData + attr(prediction, "metaData") <- metaData - test <- recalibratePlp(prediction, analysisId = 'Analysis_1', - method = 'weakRecalibration') + test <- recalibratePlp(prediction, + analysisId = "Analysis_1", + method = "weakRecalibration" + ) - testthat::expect_true(sum(test$evaluationType == 'weakRecalibration') == 100) - + testthat::expect_true(sum(test$evaluationType == "weakRecalibration") == 100) }) - - - -