From 454b24a2fa0bb260203ba7787e485e7d2333f124 Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Thu, 14 Nov 2024 13:04:29 +0000 Subject: [PATCH 1/5] Update performance_roc.R --- R/performance_roc.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/performance_roc.R b/R/performance_roc.R index 3b02d7b61..78be71a6b 100644 --- a/R/performance_roc.R +++ b/R/performance_roc.R @@ -4,6 +4,15 @@ #' @description This function calculates a simple ROC curves of x/y coordinates #' based on response and predictions of a binomial model. #' +#' It returns the area under the curve (AUC) as a percentage, which corresponds +#' to the probability that a randomly chosen observation of "condition 1" is correctly +#' classified by the model as having a higher probability of being "condition 1" than +#' a randomly chosen "condition 2" observation. +#' +#' Applying `as.data.frame()` to the ouput returns a data frame containing the following: +#' - `Sensitivity` (that actually corresponds to `1 - Specificity`): It is the False Positive Rate. +#' - `Sensitivity`: It is the True Positive Rate, which is the proportion of correctly classified "condition 1" observations. +#' #' @param x A numeric vector, representing the outcome (0/1), or a model with #' binomial outcome. #' @param predictions If `x` is numeric, a numeric vector of same length From ac1e372ae0c7ef1d5ba5cf1463052de716c422f4 Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Thu, 14 Nov 2024 13:58:43 +0000 Subject: [PATCH 2/5] add as.numeric method --- NAMESPACE | 1 + R/performance_roc.R | 16 ++++++++++++++++ man/performance_roc.Rd | 12 ++++++++++++ 3 files changed, 29 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index d35d82547..f3c9e008a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,6 +26,7 @@ S3method(as.data.frame,r2_bayes) S3method(as.data.frame,r2_loo) S3method(as.data.frame,r2_nakagawa) S3method(as.numeric,check_outliers) +S3method(as.numeric,performance_roc) S3method(check_autocorrelation,default) S3method(check_collinearity,BFBayesFactor) S3method(check_collinearity,MixMod) diff --git a/R/performance_roc.R b/R/performance_roc.R index 78be71a6b..8a22bfb6c 100644 --- a/R/performance_roc.R +++ b/R/performance_roc.R @@ -42,6 +42,7 @@ #' #' model <- glm(y ~ Sepal.Length + Sepal.Width, data = train_data, family = "binomial") #' as.data.frame(performance_roc(model, new_data = test_data)) +#' as.numeric(performance_roc(model)) #' #' roc <- performance_roc(model, new_data = test_data) #' area_under_curve(roc$Specificity, roc$Sensitivity) @@ -118,6 +119,21 @@ print.performance_roc <- function(x, ...) { } +#' @export +as.numeric.performance_roc <- function(x, ...) { + if (length(unique(x$Model)) == 1) { + auc <- bayestestR::area_under_curve(x$Specificity, x$Sensitivity) + } else { + dat <- split(x, f = x$Model) + + auc <- c() + for (i in seq_along(dat)) { + auc <- c(auc, bayestestR::area_under_curve(dat[[i]]$Specificity, dat[[i]]$Sensitivity)) + } + } + auc +} + # utilities --------------------------- diff --git a/man/performance_roc.Rd b/man/performance_roc.Rd index e14ef04c9..ac6501015 100644 --- a/man/performance_roc.Rd +++ b/man/performance_roc.Rd @@ -28,6 +28,17 @@ model name. \description{ This function calculates a simple ROC curves of x/y coordinates based on response and predictions of a binomial model. + +It returns the area under the curve (AUC) as a percentage, which corresponds +to the probability that a randomly chosen observation of "condition 1" is correctly +classified by the model as having a higher probability of being "condition 1" than +a randomly chosen "condition 2" observation. + +Applying \code{as.data.frame()} to the ouput returns a data frame containing the following: +\itemize{ +\item \code{Sensitivity} (that actually corresponds to \code{1 - Specificity}): It is the False Positive Rate. +\item \code{Sensitivity}: It is the True Positive Rate, which is the proportion of correctly classified "condition 1" observations. +} } \note{ There is also a \href{https://easystats.github.io/see/articles/performance.html}{\code{plot()}-method} @@ -45,6 +56,7 @@ train_data <- iris[-folds, ] model <- glm(y ~ Sepal.Length + Sepal.Width, data = train_data, family = "binomial") as.data.frame(performance_roc(model, new_data = test_data)) +as.numeric(performance_roc(model)) roc <- performance_roc(model, new_data = test_data) area_under_curve(roc$Specificity, roc$Sensitivity) From 31cccee7ad8fc8aaa94ba5644f13bc63b83a7e70 Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Thu, 14 Nov 2024 14:01:00 +0000 Subject: [PATCH 3/5] spelling --- R/performance_roc.R | 2 +- man/performance_roc.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/performance_roc.R b/R/performance_roc.R index 8a22bfb6c..ef10cc56c 100644 --- a/R/performance_roc.R +++ b/R/performance_roc.R @@ -9,7 +9,7 @@ #' classified by the model as having a higher probability of being "condition 1" than #' a randomly chosen "condition 2" observation. #' -#' Applying `as.data.frame()` to the ouput returns a data frame containing the following: +#' Applying `as.data.frame()` to the output returns a data frame containing the following: #' - `Sensitivity` (that actually corresponds to `1 - Specificity`): It is the False Positive Rate. #' - `Sensitivity`: It is the True Positive Rate, which is the proportion of correctly classified "condition 1" observations. #' diff --git a/man/performance_roc.Rd b/man/performance_roc.Rd index ac6501015..42a8cc8f7 100644 --- a/man/performance_roc.Rd +++ b/man/performance_roc.Rd @@ -34,7 +34,7 @@ to the probability that a randomly chosen observation of "condition 1" is correc classified by the model as having a higher probability of being "condition 1" than a randomly chosen "condition 2" observation. -Applying \code{as.data.frame()} to the ouput returns a data frame containing the following: +Applying \code{as.data.frame()} to the output returns a data frame containing the following: \itemize{ \item \code{Sensitivity} (that actually corresponds to \code{1 - Specificity}): It is the False Positive Rate. \item \code{Sensitivity}: It is the True Positive Rate, which is the proportion of correctly classified "condition 1" observations. From b9f037a74299ee3fefaf3b3f103123e6c0cba48c Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 24 Nov 2024 17:30:13 +0100 Subject: [PATCH 4/5] fix --- R/performance_roc.R | 9 --------- 1 file changed, 9 deletions(-) diff --git a/R/performance_roc.R b/R/performance_roc.R index 68c9a8b8b..927875b06 100644 --- a/R/performance_roc.R +++ b/R/performance_roc.R @@ -17,15 +17,6 @@ #' - `Sensitivity`: It is the True Positive Rate, which is the proportion of #' correctly classified "condition 1" observations. #' -#' It returns the area under the curve (AUC) as a percentage, which corresponds -#' to the probability that a randomly chosen observation of "condition 1" is correctly -#' classified by the model as having a higher probability of being "condition 1" than -#' a randomly chosen "condition 2" observation. -#' -#' Applying `as.data.frame()` to the output returns a data frame containing the following: -#' - `Sensitivity` (that actually corresponds to `1 - Specificity`): It is the False Positive Rate. -#' - `Sensitivity`: It is the True Positive Rate, which is the proportion of correctly classified "condition 1" observations. -#' #' @param x A numeric vector, representing the outcome (0/1), or a model with #' binomial outcome. #' @param predictions If `x` is numeric, a numeric vector of same length From 4fe8cf9bb83a4d0bfd1d9f830fc4ba422669708f Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 24 Nov 2024 17:34:13 +0100 Subject: [PATCH 5/5] fix --- R/performance_roc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/performance_roc.R b/R/performance_roc.R index 927875b06..e7d0f0006 100644 --- a/R/performance_roc.R +++ b/R/performance_roc.R @@ -197,5 +197,5 @@ as.numeric.performance_roc <- function(x, ...) { if (inherits(x, "model_fit")) { x <- x$fit } - inherits(x, c("glm", "glmerMod", "logitor", "logitmfx", "probitmfx")) + inherits(x, c("glm", "glmerMod", "logitor", "logitmfx", "probitmfx", "glmmTMB")) }