From 3859277e16fd308eafb9a3d3c8b3a837622b7a09 Mon Sep 17 00:00:00 2001 From: Lixun Zhang Date: Mon, 21 Dec 2015 16:03:40 -0500 Subject: [PATCH 1/2] Add an update.dataset() function --- R/datasets.R | 36 ++++++++++++++++++++++++++++++++++ inst/examples/example_update.R | 17 ++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 inst/examples/example_update.R diff --git a/R/datasets.R b/R/datasets.R index f631674..44c5a51 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -228,3 +228,39 @@ delete.datasets <- function(ws, name, host){ refresh(ws, "datasets") ans } + +#' Update an existing R data frame in an AzureML workspace. +#' +#' Update an existing R data frame to an AzureML workspace using the \code{GenericTSV} format. +#' +#' @inheritParams refresh +#' @param x An R data frame object +#' @param name A character name for an existing AzureML dataset +#' @param description An optional character description of the dataset, description for the existing dataset will be used if none provided here +#' @param family_id An optional AzureML family identifier, family identifier for the existing dataset will be used if none provided here +#' @param ... Optional additional options passed to \code{write.table} +#' @note The additional \code{\link[utils]{write.table}} options may not include \code{sep} or \code{row.names} or \code{file}, but any other options are accepted. +#' +#' @return A single-row data frame of "Datasets" class that corresponds to the updated object now available in ws$datasets. +#' @importFrom curl curl_escape new_handle handle_setheaders handle_reset handle_setopt curl_fetch_memory +#' @importFrom jsonlite fromJSON +#' @export +#' @family dataset functions +#' @example inst/examples/example_update.R + +update.dataset <- function(x, ws, name, description = "", family_id="", ...) +{ + # use description and family_id from the existing dataset if no new values are provided + if (description == "") description = ws$datasets$Description[ws$datasets$Name == name] + if (family_id == "") family_id = ws$datasets$FamilyId[ws$datasets$Name == name] + + # delete existing data and save status + dr <- delete.datasets(ws, name) + + # take care of exceptions from delete: dataset does not exist or it's Microsoft dataset + if (dim(dr)[1] == 0) stop("The dataset named '", name, "' does not exist") + else if (dr$Deleted == FALSE) stop("Update not allowed for Microsoft default datasets") + + # upload + upload.dataset(x, ws, name, description, family_id) +} \ No newline at end of file diff --git a/inst/examples/example_update.R b/inst/examples/example_update.R new file mode 100644 index 0000000..2a8dc13 --- /dev/null +++ b/inst/examples/example_update.R @@ -0,0 +1,17 @@ +\dontrun{ + library(AzureML) + + ws <- workspace() + + # Upload the R airquality data.frame to the workspace. + upload.dataset(airquality, ws, "airquality") + + # Make an update by using only the first 10 rows + update.dataset(airquality[1:10, ], ws, "airquality") + + # Download the updated dataset to check its content + download.datasets(ws, name = "airquality") + + # Now delete what we've just uploaded + delete.datasets(ws, "airquality") +} From 8331e6368d6a454133ecbcaf39be0525a33500e2 Mon Sep 17 00:00:00 2001 From: Lixun Zhang Date: Tue, 22 Dec 2015 11:12:12 -0500 Subject: [PATCH 2/2] Modify update.dataset() function to 1)make sure of data type GenericTSV and 2) check data existence before deletion --- R/datasets.R | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/R/datasets.R b/R/datasets.R index 44c5a51..a766a89 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -248,19 +248,23 @@ delete.datasets <- function(ws, name, host){ #' @family dataset functions #' @example inst/examples/example_update.R -update.dataset <- function(x, ws, name, description = "", family_id="", ...) +update.dataset <- function(x, ws, name, description = "", family_id = "", ...) { + + # take care of exceptions from delete: it's Microsoft dataset or dataset does not exist + if (name %in% datasets(ws, filter = "samples")$Name) stop("Update not allowed for Microsoft sample datasets.") + else if (!(name %in% datasets(ws, filter = "my datasets")$Name)) stop("The dataset named '", name, "' does not exist.") + + # check to make sure the existing file is in GenericTSV format + if (tolower(ws$datasets$DataTypeId[ws$datasets$Name == name]) != "generictsv") stop("The existing dataset is not of type GenericTSV.") + # use description and family_id from the existing dataset if no new values are provided if (description == "") description = ws$datasets$Description[ws$datasets$Name == name] if (family_id == "") family_id = ws$datasets$FamilyId[ws$datasets$Name == name] - # delete existing data and save status + # delete existing data dr <- delete.datasets(ws, name) - # take care of exceptions from delete: dataset does not exist or it's Microsoft dataset - if (dim(dr)[1] == 0) stop("The dataset named '", name, "' does not exist") - else if (dr$Deleted == FALSE) stop("Update not allowed for Microsoft default datasets") - # upload upload.dataset(x, ws, name, description, family_id) -} \ No newline at end of file +}