From 3cb5a12623ad963825f390060d0e0853ad94a6ff Mon Sep 17 00:00:00 2001
From: RemPsyc <RemPsyc@users.noreply.github.com>
Date: Fri, 24 Jun 2022 23:23:18 -0400
Subject: [PATCH 1/7] addresses #177 & #49 & #47 for winsorizing based on the
 MAD

---
 R/winsorize.R | 57 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/R/winsorize.R b/R/winsorize.R
index 84a32e9e1..6cf500157 100644
--- a/R/winsorize.R
+++ b/R/winsorize.R
@@ -19,10 +19,12 @@
 #' @param data Dataframe or vector.
 #' @param threshold The amount of winsorization.
 #' @param verbose Toggle warnings.
+#' @param robust Logical, if TRUE, winsorizing is done via the median absolute deviation (MAD).
 #' @param ... Currently not used.
 #'
 #' @examples
 #' winsorize(iris$Sepal.Length, threshold = 0.2)
+#' winsorize(iris$Sepal.Length, threshold = 3, robust = TRUE)
 #' winsorize(iris, threshold = 0.2)
 #' @inherit data_rename seealso
 #' @export
@@ -43,27 +45,50 @@ winsorize.character <- winsorize.factor
 winsorize.logical <- winsorize.factor
 
 #' @export
-winsorize.data.frame <- function(data, threshold = 0.2, verbose = TRUE, ...) {
-  out <- sapply(data, winsorize, threshold = threshold, verbose = verbose)
+winsorize.data.frame <- function(data, threshold = 0.2, verbose = TRUE, robust = FALSE, ...) {
+  out <- sapply(data, winsorize, threshold = threshold, verbose = verbose, robust = robust)
   as.data.frame(out)
 }
 
 #' @rdname winsorize
 #' @export
-winsorize.numeric <- function(data, threshold = 0.2, verbose = TRUE, ...) {
-  if (threshold < 0 || threshold > 1) {
-    if (isTRUE(verbose)) {
-      warning("'threshold' for winsorization must be a scalar between 0 and 1. Did not winsorize data.", call. = FALSE)
-    }
-    return(data)
+winsorize.numeric <- function(data, threshold = 0.2, verbose = TRUE, robust = FALSE, ...) {
+  if(robust == FALSE) {
+
+    if (threshold < 0 || threshold > 0.5) {
+      if (isTRUE(verbose)) {
+        warning("'threshold' for winsorization must be a scalar between 0 and 0.5. Did not winsorize data.", call. = FALSE)
+        }
+      return(data)
+      }
+
+    y <- sort(data)
+    n <- length(data)
+    ibot <- floor(threshold * n) + 1
+    itop <- length(data) - ibot + 1
+    xbot <- y[ibot]
+    xtop <- y[itop]
+
+    winval <- data
+    winval[winval <= xbot] <- xbot
+    winval[winval >= xtop] <- xtop
+    return(winval)
   }
 
-  y <- sort(data)
-  n <- length(data)
-  ibot <- floor(threshold * n) + 1
-  itop <- length(data) - ibot + 1
-  xbot <- y[ibot]
-  xtop <- y[itop]
-  winval <- ifelse(data <= xbot, xbot, data)
-  ifelse(winval >= xtop, xtop, winval)
+  if(robust == TRUE) {
+
+    if (threshold <= 0) {
+      if (isTRUE(verbose)) {
+        warning("'threshold' for winsorization must be a scalar greater than 1. Did not winsorize data.", call. = FALSE)
+      }
+      return(data)
+    }
+
+    med <- median(data, na.rm = TRUE)
+    y <- data - med
+    sc <- mad(y, center = 0, na.rm = TRUE) * threshold
+    y[y > sc] <- sc
+    y[y < -sc] <- -sc
+    y + med
+  }
 }

From 807b8fe99ef099ee05f191b2b7d9a8fac5cc76f1 Mon Sep 17 00:00:00 2001
From: RemPsyc <RemPsyc@users.noreply.github.com>
Date: Fri, 24 Jun 2022 23:36:27 -0400
Subject: [PATCH 2/7] forgot to push updated documentation

---
 R/winsorize.R    | 6 +++---
 man/winsorize.Rd | 5 ++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/R/winsorize.R b/R/winsorize.R
index 6cf500157..89f69b5ab 100644
--- a/R/winsorize.R
+++ b/R/winsorize.R
@@ -79,14 +79,14 @@ winsorize.numeric <- function(data, threshold = 0.2, verbose = TRUE, robust = FA
 
     if (threshold <= 0) {
       if (isTRUE(verbose)) {
-        warning("'threshold' for winsorization must be a scalar greater than 1. Did not winsorize data.", call. = FALSE)
+        warning("'threshold' for winsorization must be a scalar greater than 0. Did not winsorize data.", call. = FALSE)
       }
       return(data)
     }
 
-    med <- median(data, na.rm = TRUE)
+    med <- stats::median(data, na.rm = TRUE)
     y <- data - med
-    sc <- mad(y, center = 0, na.rm = TRUE) * threshold
+    sc <- stats::mad(y, center = 0, na.rm = TRUE) * threshold
     y[y > sc] <- sc
     y[y < -sc] <- -sc
     y + med
diff --git a/man/winsorize.Rd b/man/winsorize.Rd
index 85a08920e..4d082facc 100644
--- a/man/winsorize.Rd
+++ b/man/winsorize.Rd
@@ -7,7 +7,7 @@
 \usage{
 winsorize(data, ...)
 
-\method{winsorize}{numeric}(data, threshold = 0.2, verbose = TRUE, ...)
+\method{winsorize}{numeric}(data, threshold = 0.2, verbose = TRUE, robust = FALSE, ...)
 }
 \arguments{
 \item{data}{Dataframe or vector.}
@@ -17,6 +17,8 @@ winsorize(data, ...)
 \item{threshold}{The amount of winsorization.}
 
 \item{verbose}{Toggle warnings.}
+
+\item{robust}{Logical, if TRUE, winsorizing is done via the median absolute deviation (MAD).}
 }
 \value{
 A dataframe with winsorized columns or a winsorized vector.
@@ -37,6 +39,7 @@ their more standard forms.
 }
 \examples{
 winsorize(iris$Sepal.Length, threshold = 0.2)
+winsorize(iris$Sepal.Length, threshold = 3, robust = TRUE)
 winsorize(iris, threshold = 0.2)
 }
 \seealso{

From 0a7cc4eb37a5f50ec9a5d11645d6f16dbea21222 Mon Sep 17 00:00:00 2001
From: RemPsyc <RemPsyc@users.noreply.github.com>
Date: Sat, 25 Jun 2022 20:05:19 -0400
Subject: [PATCH 3/7] new argument "method", updated NEWS, resolved failed
 test, #179

---
 NEWS.md                             |  1 +
 R/winsorize.R                       | 35 ++++++++++++++++++++---------
 man/winsorize.Rd                    | 16 ++++++++++---
 tests/testthat/test-winsorization.R |  5 +++--
 4 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 9b22a1033..5d363f620 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -19,6 +19,7 @@ CHANGES
 
 * Some of the text formatting helpers (like `text_concatenate()`) gain an
   `enclose` argument, to wrap text elements with surrounding characters.
+* `winsorize` now accepts "zscore" method (in addition to "percentile"). Additionally, when `robust` is also set to `TRUE`, winsorizes via the median and median absolute deviation (MAD); else via the mean and standard deviation. (@rempsyc, #177, #49, #47).
 
 NEW FUNCTIONS
 
diff --git a/R/winsorize.R b/R/winsorize.R
index 89f69b5ab..590d762e7 100644
--- a/R/winsorize.R
+++ b/R/winsorize.R
@@ -19,12 +19,14 @@
 #' @param data Dataframe or vector.
 #' @param threshold The amount of winsorization.
 #' @param verbose Toggle warnings.
-#' @param robust Logical, if TRUE, winsorizing is done via the median absolute deviation (MAD).
+#' @param method One of "percentile" or "zscore".
+#' @param robust Logical, if TRUE, winsorizing through the "zscore" method is done via the median and the median absolute deviation (MAD); if FALSE, via the mean and the standard deviation.
 #' @param ... Currently not used.
 #'
 #' @examples
 #' winsorize(iris$Sepal.Length, threshold = 0.2)
-#' winsorize(iris$Sepal.Length, threshold = 3, robust = TRUE)
+#' winsorize(iris$Sepal.Length, threshold = 2, method = "zscore")
+#' winsorize(iris$Sepal.Length, threshold = 2, method = "zscore", robust = TRUE)
 #' winsorize(iris, threshold = 0.2)
 #' @inherit data_rename seealso
 #' @export
@@ -52,8 +54,8 @@ winsorize.data.frame <- function(data, threshold = 0.2, verbose = TRUE, robust =
 
 #' @rdname winsorize
 #' @export
-winsorize.numeric <- function(data, threshold = 0.2, verbose = TRUE, robust = FALSE, ...) {
-  if(robust == FALSE) {
+winsorize.numeric <- function(data, threshold = 0.2, verbose = TRUE, method = "percentile", robust = FALSE, ...) {
+  if(method == "percentile") {
 
     if (threshold < 0 || threshold > 0.5) {
       if (isTRUE(verbose)) {
@@ -75,7 +77,7 @@ winsorize.numeric <- function(data, threshold = 0.2, verbose = TRUE, robust = FA
     return(winval)
   }
 
-  if(robust == TRUE) {
+  if(method == "zscore") {
 
     if (threshold <= 0) {
       if (isTRUE(verbose)) {
@@ -84,11 +86,22 @@ winsorize.numeric <- function(data, threshold = 0.2, verbose = TRUE, robust = FA
       return(data)
     }
 
-    med <- stats::median(data, na.rm = TRUE)
-    y <- data - med
-    sc <- stats::mad(y, center = 0, na.rm = TRUE) * threshold
-    y[y > sc] <- sc
-    y[y < -sc] <- -sc
-    y + med
+    if(isTRUE(robust)) {
+      med <- stats::median(data, na.rm = TRUE)
+      y <- data - med
+      winval <- stats::mad(y, center = 0, na.rm = TRUE) * threshold
+      y[y > winval] <- winval
+      y[y < -winval] <- -winval
+      return(y + med)
+    }
+
+    if(isFALSE(robust)) {
+      m <- mean(data, na.rm = TRUE)
+      y <- data - m
+      winval <- stats::sd(y, na.rm = TRUE) * threshold
+      y[y > winval] <- winval
+      y[y < -winval] <- -winval
+      y + m
+    }
   }
 }
diff --git a/man/winsorize.Rd b/man/winsorize.Rd
index 4d082facc..aa284ac22 100644
--- a/man/winsorize.Rd
+++ b/man/winsorize.Rd
@@ -7,7 +7,14 @@
 \usage{
 winsorize(data, ...)
 
-\method{winsorize}{numeric}(data, threshold = 0.2, verbose = TRUE, robust = FALSE, ...)
+\method{winsorize}{numeric}(
+  data,
+  threshold = 0.2,
+  verbose = TRUE,
+  method = "percentile",
+  robust = FALSE,
+  ...
+)
 }
 \arguments{
 \item{data}{Dataframe or vector.}
@@ -18,7 +25,9 @@ winsorize(data, ...)
 
 \item{verbose}{Toggle warnings.}
 
-\item{robust}{Logical, if TRUE, winsorizing is done via the median absolute deviation (MAD).}
+\item{method}{One of "percentile" or "zscore".}
+
+\item{robust}{Logical, if TRUE, winsorizing through the "zscore" method is done via the median and the median absolute deviation (MAD); if FALSE, via the mean and the standard deviation.}
 }
 \value{
 A dataframe with winsorized columns or a winsorized vector.
@@ -39,7 +48,8 @@ their more standard forms.
 }
 \examples{
 winsorize(iris$Sepal.Length, threshold = 0.2)
-winsorize(iris$Sepal.Length, threshold = 3, robust = TRUE)
+winsorize(iris$Sepal.Length, threshold = 2, method = "zscore")
+winsorize(iris$Sepal.Length, threshold = 2, method = "zscore", robust = TRUE)
 winsorize(iris, threshold = 0.2)
 }
 \seealso{
diff --git a/tests/testthat/test-winsorization.R b/tests/testthat/test-winsorization.R
index dcccfe888..c618ad7c9 100644
--- a/tests/testthat/test-winsorization.R
+++ b/tests/testthat/test-winsorization.R
@@ -11,11 +11,11 @@ test_that("with missing values", {
 test_that("winsorize: threshold must be between 0 and 1", {
   expect_warning(
     winsorize(sample(1:10, 5), threshold = -0.1),
-    regexp = "must be a scalar between 0 and 1"
+    regexp = "must be a scalar between 0 and 0.5"
   )
   expect_warning(
     winsorize(sample(1:10, 5), threshold = 1.1),
-    regexp = "must be a scalar between 0 and 1"
+    regexp = "must be a scalar between 0 and 0.5"
   )
   x <- sample(1:10, 5)
   suppressWarnings({
@@ -38,3 +38,4 @@ test_that("winsorize on data.frame", {
   )
   expect_equal(names(iris2), names(iris))
 })
+

From 658b2b4965a6378c1ccbfc3a53ae478546f5e518 Mon Sep 17 00:00:00 2001
From: "Mattan S. Ben-Shachar" <matanshm@post.bgu.ac.il>
Date: Sun, 26 Jun 2022 11:15:00 +0300
Subject: [PATCH 4/7] update winsorize.numeric

added raw method

made the code easier to maintain by modularizing it

made doc more explicit about the methods

updated examples to visualize the effect

update NEWS
---
 NEWS.md          |  2 +-
 R/winsorize.R    | 84 ++++++++++++++++++++++++++++++------------------
 man/winsorize.Rd | 34 +++++++++++++++-----
 3 files changed, 79 insertions(+), 41 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 5d363f620..adef3d618 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -19,7 +19,7 @@ CHANGES
 
 * Some of the text formatting helpers (like `text_concatenate()`) gain an
   `enclose` argument, to wrap text elements with surrounding characters.
-* `winsorize` now accepts "zscore" method (in addition to "percentile"). Additionally, when `robust` is also set to `TRUE`, winsorizes via the median and median absolute deviation (MAD); else via the mean and standard deviation. (@rempsyc, #177, #49, #47).
+* `winsorize` now accepts "raw" and "zscore" methods (in addition to "percentile"). Additionally, when `robust` is set to `TRUE` together with `method = "zscore"`, winsorizes via the median and median absolute deviation (MAD); else via the mean and standard deviation. (@rempsyc, #177, #49, #47).
 
 NEW FUNCTIONS
 
diff --git a/R/winsorize.R b/R/winsorize.R
index 590d762e7..c13d921de 100644
--- a/R/winsorize.R
+++ b/R/winsorize.R
@@ -17,17 +17,33 @@
 #' A dataframe with winsorized columns or a winsorized vector.
 #'
 #' @param data Dataframe or vector.
-#' @param threshold The amount of winsorization.
+#' @param threshold The amount of winsorization, depends on the value of `method`:
+#' - For `method = "percentile"`: the amount to windzorize from *each* tail.
+#' - For `method = "zscore"`: the number of *SD*/*MAD*-deviations from the *mean*/*median* (see `robust`)
+#' - For `method = "raw"`: a vector of length 2 with the lower and upper bound for winsorization.
 #' @param verbose Toggle warnings.
-#' @param method One of "percentile" or "zscore".
+#' @param method One of "percentile" (default), "zscore" or "raw".
 #' @param robust Logical, if TRUE, winsorizing through the "zscore" method is done via the median and the median absolute deviation (MAD); if FALSE, via the mean and the standard deviation.
 #' @param ... Currently not used.
 #'
 #' @examples
-#' winsorize(iris$Sepal.Length, threshold = 0.2)
-#' winsorize(iris$Sepal.Length, threshold = 2, method = "zscore")
-#' winsorize(iris$Sepal.Length, threshold = 2, method = "zscore", robust = TRUE)
+#' hist(iris$Sepal.Length, main = "Original data")
+#'
+#' hist(winsorize(iris$Sepal.Length, threshold = 0.2),
+#'      xlim = c(4, 8), main = "Percentile Winz")
+#'
+#' hist(winsorize(iris$Sepal.Length, threshold = 1.5, method = "zscore"),
+#'      xlim = c(4, 8), main = "Mean+-SD Winz")
+#'
+#' hist(winsorize(iris$Sepal.Length, threshold = 1.5, method = "zscore", robust = TRUE),
+#'      xlim = c(4, 8), main = "Median+-MAD Winz")
+#'
+#' hist(winsorize(iris$Sepal.Length, threshold = c(5, 7.5), method = "raw"),
+#'      xlim = c(4, 8), main = "Raw Thresholds")
+#'
+#' # Also works on a data frame:
 #' winsorize(iris, threshold = 0.2)
+#'
 #' @inherit data_rename seealso
 #' @export
 winsorize <- function(data, ...) {
@@ -47,34 +63,41 @@ winsorize.character <- winsorize.factor
 winsorize.logical <- winsorize.factor
 
 #' @export
-winsorize.data.frame <- function(data, threshold = 0.2, verbose = TRUE, robust = FALSE, ...) {
-  out <- sapply(data, winsorize, threshold = threshold, verbose = verbose, robust = robust)
-  as.data.frame(out)
+winsorize.data.frame <- function(data, threshold = 0.2, method = "percentile", robust = FALSE,
+                                 verbose = TRUE, ...) {
+  data <- lapply(data, winsorize, threshold = threshold, method = method, robust = robust, verbose = verbose)
+  as.data.frame(data)
 }
 
 #' @rdname winsorize
 #' @export
-winsorize.numeric <- function(data, threshold = 0.2, verbose = TRUE, method = "percentile", robust = FALSE, ...) {
-  if(method == "percentile") {
+winsorize.numeric <- function(data, threshold = 0.2, method = "percentile", robust = FALSE,
+                              verbose = TRUE, ...) {
+  method <- match.arg(method, choices = c("percentile", "zscore", "raw"))
+
+  if (method == "raw") {
+    if (length(threshold) != 2L) {
+      if (isTRUE(verbose)) {
+        warning("threshold must be of length 2 for lower and upper bound. Did not winsorize data.", call. = FALSE)
+      }
+      return(data)
+    }
+  }
 
+  if(method == "percentile") {
     if (threshold < 0 || threshold > 0.5) {
       if (isTRUE(verbose)) {
         warning("'threshold' for winsorization must be a scalar between 0 and 0.5. Did not winsorize data.", call. = FALSE)
-        }
-      return(data)
       }
+      return(data)
+    }
 
     y <- sort(data)
     n <- length(data)
     ibot <- floor(threshold * n) + 1
     itop <- length(data) - ibot + 1
-    xbot <- y[ibot]
-    xtop <- y[itop]
 
-    winval <- data
-    winval[winval <= xbot] <- xbot
-    winval[winval >= xtop] <- xtop
-    return(winval)
+    threshold <- c(y[ibot], y[itop])
   }
 
   if(method == "zscore") {
@@ -87,21 +110,18 @@ winsorize.numeric <- function(data, threshold = 0.2, verbose = TRUE, method = "p
     }
 
     if(isTRUE(robust)) {
-      med <- stats::median(data, na.rm = TRUE)
-      y <- data - med
-      winval <- stats::mad(y, center = 0, na.rm = TRUE) * threshold
-      y[y > winval] <- winval
-      y[y < -winval] <- -winval
-      return(y + med)
+      centeral <- stats::median(data, na.rm = TRUE)
+      deviation <- stats::mad(data, center = centeral, na.rm = TRUE)
+    } else {
+      centeral <- mean(data, na.rm = TRUE)
+      deviation <- stats::sd(data, na.rm = TRUE)
     }
 
-    if(isFALSE(robust)) {
-      m <- mean(data, na.rm = TRUE)
-      y <- data - m
-      winval <- stats::sd(y, na.rm = TRUE) * threshold
-      y[y > winval] <- winval
-      y[y < -winval] <- -winval
-      y + m
-    }
+    threshold <- centeral + c(-1, 1) * deviation * threshold
   }
+
+
+  data[data < threshold[1]] <- threshold[1]
+  data[data > threshold[2]] <- threshold[2]
+  return(data)
 }
diff --git a/man/winsorize.Rd b/man/winsorize.Rd
index aa284ac22..0872163d5 100644
--- a/man/winsorize.Rd
+++ b/man/winsorize.Rd
@@ -10,9 +10,9 @@ winsorize(data, ...)
 \method{winsorize}{numeric}(
   data,
   threshold = 0.2,
-  verbose = TRUE,
   method = "percentile",
   robust = FALSE,
+  verbose = TRUE,
   ...
 )
 }
@@ -21,13 +21,18 @@ winsorize(data, ...)
 
 \item{...}{Currently not used.}
 
-\item{threshold}{The amount of winsorization.}
-
-\item{verbose}{Toggle warnings.}
+\item{threshold}{The amount of winsorization, depends on the value of \code{method}:
+\itemize{
+\item For \code{method = "percentile"}: the amount to windzorize from \emph{each} tail.
+\item For \code{method = "zscore"}: the number of \emph{SD}/\emph{MAD}-deviations from the \emph{mean}/\emph{median} (see \code{robust})
+\item For \code{method = "raw"}: a vector of length 2 with the lower and upper bound for winsorization.
+}}
 
-\item{method}{One of "percentile" or "zscore".}
+\item{method}{One of "percentile" (default), "zscore" or "raw".}
 
 \item{robust}{Logical, if TRUE, winsorizing through the "zscore" method is done via the median and the median absolute deviation (MAD); if FALSE, via the mean and the standard deviation.}
+
+\item{verbose}{Toggle warnings.}
 }
 \value{
 A dataframe with winsorized columns or a winsorized vector.
@@ -47,10 +52,23 @@ percentile. Winsorized estimators are usually more robust to outliers than
 their more standard forms.
 }
 \examples{
-winsorize(iris$Sepal.Length, threshold = 0.2)
-winsorize(iris$Sepal.Length, threshold = 2, method = "zscore")
-winsorize(iris$Sepal.Length, threshold = 2, method = "zscore", robust = TRUE)
+hist(iris$Sepal.Length, main = "Original data")
+
+hist(winsorize(iris$Sepal.Length, threshold = 0.2),
+     xlim = c(4, 8), main = "Percentile Winz")
+
+hist(winsorize(iris$Sepal.Length, threshold = 1.5, method = "zscore"),
+     xlim = c(4, 8), main = "Mean+-SD Winz")
+
+hist(winsorize(iris$Sepal.Length, threshold = 1.5, method = "zscore", robust = TRUE),
+     xlim = c(4, 8), main = "Median+-MAD Winz")
+
+hist(winsorize(iris$Sepal.Length, threshold = c(5, 7.5), method = "raw"),
+     xlim = c(4, 8), main = "Raw Thresholds")
+
+# Also works on a data frame:
 winsorize(iris, threshold = 0.2)
+
 }
 \seealso{
 \itemize{

From c626eca6cd9bf31b6f00e5f2fa28665705c3d017 Mon Sep 17 00:00:00 2001
From: RemPsyc <RemPsyc@users.noreply.github.com>
Date: Sun, 26 Jun 2022 07:21:43 -0400
Subject: [PATCH 5/7] minor modifications to docs

---
 DESCRIPTION      |  1 +
 R/winsorize.R    | 10 +++++-----
 man/winsorize.Rd | 10 +++++-----
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index e2b920e1e..244ee8839 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -50,6 +50,7 @@ Suggests:
     rmarkdown,
     rstanarm,
     see,
+    tidyr,
     testthat (>= 3.0.0)
 VignetteBuilder: 
     knitr
diff --git a/R/winsorize.R b/R/winsorize.R
index c13d921de..50a0ac92e 100644
--- a/R/winsorize.R
+++ b/R/winsorize.R
@@ -18,11 +18,11 @@
 #'
 #' @param data Dataframe or vector.
 #' @param threshold The amount of winsorization, depends on the value of `method`:
-#' - For `method = "percentile"`: the amount to windzorize from *each* tail.
+#' - For `method = "percentile"`: the amount to winsorize from *each* tail.
 #' - For `method = "zscore"`: the number of *SD*/*MAD*-deviations from the *mean*/*median* (see `robust`)
 #' - For `method = "raw"`: a vector of length 2 with the lower and upper bound for winsorization.
 #' @param verbose Toggle warnings.
-#' @param method One of "percentile" (default), "zscore" or "raw".
+#' @param method One of "percentile" (default), "zscore", or "raw".
 #' @param robust Logical, if TRUE, winsorizing through the "zscore" method is done via the median and the median absolute deviation (MAD); if FALSE, via the mean and the standard deviation.
 #' @param ... Currently not used.
 #'
@@ -30,13 +30,13 @@
 #' hist(iris$Sepal.Length, main = "Original data")
 #'
 #' hist(winsorize(iris$Sepal.Length, threshold = 0.2),
-#'      xlim = c(4, 8), main = "Percentile Winz")
+#'      xlim = c(4, 8), main = "Percentile Winsorization")
 #'
 #' hist(winsorize(iris$Sepal.Length, threshold = 1.5, method = "zscore"),
-#'      xlim = c(4, 8), main = "Mean+-SD Winz")
+#'      xlim = c(4, 8), main = "Mean (+/- SD) Winsorization")
 #'
 #' hist(winsorize(iris$Sepal.Length, threshold = 1.5, method = "zscore", robust = TRUE),
-#'      xlim = c(4, 8), main = "Median+-MAD Winz")
+#'      xlim = c(4, 8), main = "Median (+/- MAD) Winsorization")
 #'
 #' hist(winsorize(iris$Sepal.Length, threshold = c(5, 7.5), method = "raw"),
 #'      xlim = c(4, 8), main = "Raw Thresholds")
diff --git a/man/winsorize.Rd b/man/winsorize.Rd
index 0872163d5..d13c8b6d6 100644
--- a/man/winsorize.Rd
+++ b/man/winsorize.Rd
@@ -23,12 +23,12 @@ winsorize(data, ...)
 
 \item{threshold}{The amount of winsorization, depends on the value of \code{method}:
 \itemize{
-\item For \code{method = "percentile"}: the amount to windzorize from \emph{each} tail.
+\item For \code{method = "percentile"}: the amount to winsorize from \emph{each} tail.
 \item For \code{method = "zscore"}: the number of \emph{SD}/\emph{MAD}-deviations from the \emph{mean}/\emph{median} (see \code{robust})
 \item For \code{method = "raw"}: a vector of length 2 with the lower and upper bound for winsorization.
 }}
 
-\item{method}{One of "percentile" (default), "zscore" or "raw".}
+\item{method}{One of "percentile" (default), "zscore", or "raw".}
 
 \item{robust}{Logical, if TRUE, winsorizing through the "zscore" method is done via the median and the median absolute deviation (MAD); if FALSE, via the mean and the standard deviation.}
 
@@ -55,13 +55,13 @@ their more standard forms.
 hist(iris$Sepal.Length, main = "Original data")
 
 hist(winsorize(iris$Sepal.Length, threshold = 0.2),
-     xlim = c(4, 8), main = "Percentile Winz")
+     xlim = c(4, 8), main = "Percentile Winsorization")
 
 hist(winsorize(iris$Sepal.Length, threshold = 1.5, method = "zscore"),
-     xlim = c(4, 8), main = "Mean+-SD Winz")
+     xlim = c(4, 8), main = "Mean (+/- SD) Winsorization")
 
 hist(winsorize(iris$Sepal.Length, threshold = 1.5, method = "zscore", robust = TRUE),
-     xlim = c(4, 8), main = "Median+-MAD Winz")
+     xlim = c(4, 8), main = "Median (+/- MAD) Winsorization")
 
 hist(winsorize(iris$Sepal.Length, threshold = c(5, 7.5), method = "raw"),
      xlim = c(4, 8), main = "Raw Thresholds")

From d45ca3f815b03b6319c417253e336fbf4b9fd10f Mon Sep 17 00:00:00 2001
From: RemPsyc <RemPsyc@users.noreply.github.com>
Date: Sun, 26 Jun 2022 08:41:20 -0400
Subject: [PATCH 6/7] removed tidyr from Suggests, replaced
 `tidyr::pivot_longer` with `datawizard::data_to_long` in vignette

---
 DESCRIPTION                    |  1 -
 vignettes/standardize_data.Rmd | 25 ++++++++++++-------------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 244ee8839..e2b920e1e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -50,7 +50,6 @@ Suggests:
     rmarkdown,
     rstanarm,
     see,
-    tidyr,
     testthat (>= 3.0.0)
 VignetteBuilder: 
     knitr
diff --git a/vignettes/standardize_data.Rmd b/vignettes/standardize_data.Rmd
index afd22b249..8c3d815a0 100644
--- a/vignettes/standardize_data.Rmd
+++ b/vignettes/standardize_data.Rmd
@@ -73,17 +73,16 @@ We can see that different methods give different central and variation values:
 
 ```{r, eval=FALSE}
 library(dplyr)
-library(tidyr)
 
 hardlyworking %>%
   select(starts_with("xtra_hours")) %>%
-  pivot_longer(everything()) %>%
-  group_by(name) %>%
+  data_to_long() %>%
+  group_by(Name) %>%
   summarise(
-    mean = mean(value),
-    sd = sd(value),
-    median = median(value),
-    mad = mad(value)
+    mean = mean(Value),
+    sd = sd(Value),
+    median = median(Value),
+    mad = mad(Value)
   )
 ```
 
@@ -113,13 +112,13 @@ hardlyworking_z <- standardize(hardlyworking)
 ```{r, eval=FALSE}
 hardlyworking_z %>%
   select(-xtra_hours_z, -xtra_hours_zr) %>%
-  pivot_longer(everything()) %>%
-  group_by(name) %>%
+  data_to_long() %>%
+  group_by(Name) %>%
   summarise(
-    mean = mean(value),
-    sd = sd(value),
-    median = median(value),
-    mad = mad(value)
+    mean = mean(Value),
+    sd = sd(Value),
+    median = median(Value),
+    mad = mad(Value)
   )
 ```
 

From 03f85bf5ba50ab0b2a2a16d39d1489a9309ca548 Mon Sep 17 00:00:00 2001
From: RemPsyc <RemPsyc@users.noreply.github.com>
Date: Sun, 26 Jun 2022 15:29:55 -0400
Subject: [PATCH 7/7] added new tests for new winsorization methods,
 insight::format_message(), data[] <- lapply...

---
 R/winsorize.R                                        | 10 +++++-----
 ...convert_data_to_numeric.md => data_to_numeric.md} |  6 +++---
 tests/testthat/test-winsorization.R                  | 12 ++++++++++++
 3 files changed, 20 insertions(+), 8 deletions(-)
 rename tests/testthat/_snaps/{convert_data_to_numeric.md => data_to_numeric.md} (82%)

diff --git a/R/winsorize.R b/R/winsorize.R
index 50a0ac92e..e74d5fae8 100644
--- a/R/winsorize.R
+++ b/R/winsorize.R
@@ -65,8 +65,8 @@ winsorize.logical <- winsorize.factor
 #' @export
 winsorize.data.frame <- function(data, threshold = 0.2, method = "percentile", robust = FALSE,
                                  verbose = TRUE, ...) {
-  data <- lapply(data, winsorize, threshold = threshold, method = method, robust = robust, verbose = verbose)
-  as.data.frame(data)
+  data[] <- lapply(data, winsorize, threshold = threshold, method = method, robust = robust, verbose = verbose)
+  data
 }
 
 #' @rdname winsorize
@@ -78,7 +78,7 @@ winsorize.numeric <- function(data, threshold = 0.2, method = "percentile", robu
   if (method == "raw") {
     if (length(threshold) != 2L) {
       if (isTRUE(verbose)) {
-        warning("threshold must be of length 2 for lower and upper bound. Did not winsorize data.", call. = FALSE)
+        warning(insight::format_message("threshold must be of length 2 for lower and upper bound. Did not winsorize data."), call. = FALSE)
       }
       return(data)
     }
@@ -87,7 +87,7 @@ winsorize.numeric <- function(data, threshold = 0.2, method = "percentile", robu
   if(method == "percentile") {
     if (threshold < 0 || threshold > 0.5) {
       if (isTRUE(verbose)) {
-        warning("'threshold' for winsorization must be a scalar between 0 and 0.5. Did not winsorize data.", call. = FALSE)
+        warning(insight::format_message("'threshold' for winsorization must be a scalar between 0 and 0.5. Did not winsorize data."), call. = FALSE)
       }
       return(data)
     }
@@ -104,7 +104,7 @@ winsorize.numeric <- function(data, threshold = 0.2, method = "percentile", robu
 
     if (threshold <= 0) {
       if (isTRUE(verbose)) {
-        warning("'threshold' for winsorization must be a scalar greater than 0. Did not winsorize data.", call. = FALSE)
+        warning(insight::format_message("'threshold' for winsorization must be a scalar greater than 0. Did not winsorize data."), call. = FALSE)
       }
       return(data)
     }
diff --git a/tests/testthat/_snaps/convert_data_to_numeric.md b/tests/testthat/_snaps/data_to_numeric.md
similarity index 82%
rename from tests/testthat/_snaps/convert_data_to_numeric.md
rename to tests/testthat/_snaps/data_to_numeric.md
index 8ccb2e767..029f13c37 100644
--- a/tests/testthat/_snaps/convert_data_to_numeric.md
+++ b/tests/testthat/_snaps/data_to_numeric.md
@@ -1,7 +1,7 @@
 # convert dataframe to numeric
 
     Code
-      convert_data_to_numeric(head(ToothGrowth))
+      data_to_numeric(head(ToothGrowth))
     Output
          len supp.OJ supp.VC dose
       1  4.2       0       1  0.5
@@ -14,7 +14,7 @@
 ---
 
     Code
-      convert_data_to_numeric(head(ToothGrowth), dummy_factors = FALSE)
+      data_to_numeric(head(ToothGrowth), dummy_factors = FALSE)
     Output
          len supp dose
       1  4.2    2  0.5
@@ -27,7 +27,7 @@
 # convert factor to numeric
 
     Code
-      convert_data_to_numeric(f)
+      data_to_numeric(f)
     Output
          a c i s t
       1  0 0 0 1 0
diff --git a/tests/testthat/test-winsorization.R b/tests/testthat/test-winsorization.R
index c618ad7c9..f6ebb23f0 100644
--- a/tests/testthat/test-winsorization.R
+++ b/tests/testthat/test-winsorization.R
@@ -17,6 +17,18 @@ test_that("winsorize: threshold must be between 0 and 1", {
     winsorize(sample(1:10, 5), threshold = 1.1),
     regexp = "must be a scalar between 0 and 0.5"
   )
+  expect_warning(
+    winsorize(sample(1:10, 5), method = "zscore", threshold = -3),
+    regexp = "must be a scalar greater than 0"
+  )
+  expect_warning(
+    winsorize(sample(1:10, 5), method = "zscore", threshold = -3, robust = TRUE),
+    regexp = "must be a scalar greater than 0"
+  )
+  expect_warning(
+    winsorize(sample(1:10, 5), method = "raw", threshold = 1.1),
+    regexp = "must be of length 2 for lower and upper bound"
+  )
   x <- sample(1:10, 5)
   suppressWarnings({
     y <- winsorize(x, threshold = -0.1)