Skip to content

Commit

Permalink
download non-tabular datasets with export.socrata #126
Browse files Browse the repository at this point in the history
  • Loading branch information
nicklucius committed May 6, 2017
1 parent 4efc592 commit afd8d64
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 12 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Description: Provides easier interaction with
format and manages throttling by 'Socrata'.
Users can upload data to Socrata portals directly
from R.
Version: 1.8.0-1
Version: 1.8.0-2
Date: 2017-05-05
Author: Hugh Devlin, Ph. D., Tom Schenk, Jr., and John Malc
Maintainer: "Tom Schenk Jr." <[email protected]>
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(export.socrata)
export(fieldName)
export(isFourByFour)
export(ls.socrata)
Expand All @@ -17,3 +18,4 @@ importFrom(jsonlite,fromJSON)
importFrom(mime,guess_type)
importFrom(plyr,rbind.fill)
importFrom(utils,read.csv)
importFrom(utils,write.csv)
43 changes: 32 additions & 11 deletions R/RSocrata.R
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,8 @@ write.socrata <- function(dataframe, dataset_json_endpoint, update_mode, email,
#' @param url - the base URL of a domain (e.g., "data.cityofchicago.org")
#' @return a Gzipped file with the four-by-four and timestamp of when the download began in filename
#' @author Tom Schenk Jr \email{tom.schenk@@cityofchicago.org}
#' @importFrom httr GET
#' @importFrom utils write.csv
#' @export
export.socrata <- function(url) {
dir.create(basename(url), showWarnings = FALSE) # Create directory based on URL
Expand All @@ -480,16 +482,35 @@ export.socrata <- function(url) {

# Download data
downloadUrl <- ls$distribution[[i]]$downloadURL[1] # Currently grabs CSV, which is the first element
d <- read.socrata(downloadUrl)

# Construct the filename output
downloadTimeChr <- gsub('\\s+','_',downloadTime) # Remove spaces and replaces with underscore
downloadTimeChr <- gsub(':', '', downloadTimeChr) # Removes colon from timestamp to be valid filename
filename <- httr::parse_url(ls$identifier[i])
filename$path <- substr(filename$path, 11, 19)
filename <- paste0(filename$hostname, "/", filename$path, "_", downloadTimeChr, ".", default_format, ".gz")

# Write file
write.csv(d, file = gzfile(filename))
if (grepl(".csv", downloadUrl)) {
d <- read.socrata(downloadUrl)

# Construct the filename output
default_format <- "csv"
downloadTimeChr <- gsub('\\s+','_',downloadTime) # Remove spaces and replaces with underscore
downloadTimeChr <- gsub(':', '', downloadTimeChr) # Removes colon from timestamp to be valid filename
filename <- httr::parse_url(ls$identifier[i])
filename$path <- substr(filename$path, 11, 19)
filename <- paste0(filename$hostname, "/", filename$path, "_", downloadTimeChr, ".", default_format, ".gz")

# Write file
write.csv(d, file = gzfile(filename))

} else {
response <- GET(downloadUrl)

# Construct the filename output
default_format <- response$headers$`content-disposition`
default_format <- strsplit(default_format, "filename=")[[1]][2]
downloadTimeChr <- gsub('\\s+','_',downloadTime) # Remove spaces and replaces with underscore
downloadTimeChr <- gsub(':', '', downloadTimeChr) # Removes colon from timestamp to be valid filename
filename <- httr::parse_url(ls$identifier[i])
filename$path <- substr(filename$path, 11, 19)
filename <- paste0(filename$hostname, "/", filename$path, "_", downloadTimeChr, ".", default_format)

# Write file
writeBin(response$content, filename)
}

}
}

0 comments on commit afd8d64

Please sign in to comment.