From d0b64348cb3ded89c75eea9a5a2ec4cae2a4bfd2 Mon Sep 17 00:00:00 2001 From: Jordan Bradford <36420801+jrdnbradford@users.noreply.github.com> Date: Sun, 1 Sep 2024 21:32:45 -0400 Subject: [PATCH 1/5] Fixes --- README.Rmd | 5 +++-- README.md | 14 +++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/README.Rmd b/README.Rmd index 14d6c5e..0ac7f34 100644 --- a/README.Rmd +++ b/README.Rmd @@ -32,7 +32,7 @@ tmp <- packageDescription( basename(getwd()) ) ``` ```{r, results='asis', echo=FALSE} -cat(paste0(tmp$Title, ".")) +cat(paste0(tmp$Title, ". ")) cat(tmp$Description) ``` @@ -379,4 +379,5 @@ paths_allowed("petermeissner.de/I_want_to_scrape_this_now", verbose = TRUE) - https://www.robotstxt.org/robotstxt.html -[![ropensci\_footer](https://raw.githubusercontent.com/ropensci/robotstxt/master/logo/github_footer.png)](https://ropensci.org) +[![ropensci\_footer](https://ropensci.org/public_images/github_footer.png)](https://ropensci.org/) + diff --git a/README.md b/README.md index 086df3a..fa8085c 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ coverage](https://codecov.io/gh/ropensci/robotstxt/graph/badge.svg)](https://app A ‘robots.txt’ Parser and ‘Webbot’/‘Spider’/‘Crawler’ Permissions -Checker.Provides functions to download and parse ‘robots.txt’ files. +Checker. Provides functions to download and parse ‘robots.txt’ files. Ultimately the package makes it easy to check if bots (spiders, crawler, scrapers, …) are allowed to access specific resources on a domain. @@ -430,7 +430,7 @@ The last HTTP request is stored in an object ``` r rt_last_http$request ## Response [https://petermeissner.de/robots.txt] -## Date: 2024-08-31 17:33 +## Date: 2024-09-02 01:32 ## Status: 200 ## Content-Type: text/plain ## Size: 20 B @@ -472,7 +472,7 @@ was going on in the client-server exchange. ``` r attr(rt, "request") ## Response [https://petermeissner.de/robots.txt] -## Date: 2024-08-31 17:33 +## Date: 2024-09-02 01:32 ## Status: 200 ## Content-Type: text/plain ## Size: 20 B @@ -524,7 +524,7 @@ rt_req$all_headers ## [1] "nginx/1.10.3 (Ubuntu)" ## ## $date -## [1] "Sat, 31 Aug 2024 17:33:06 GMT" +## [1] "Mon, 02 Sep 2024 01:32:23 GMT" ## ## $`content-type` ## [1] "text/html" @@ -554,7 +554,7 @@ rt_req$all_headers ## [1] "nginx/1.10.3 (Ubuntu)" ## ## $date -## [1] "Sat, 31 Aug 2024 17:33:06 GMT" +## [1] "Mon, 02 Sep 2024 01:32:24 GMT" ## ## $`content-type` ## [1] "text/plain" @@ -613,7 +613,7 @@ as.list(rt) ## ## $request ## Response [https://petermeissner.de/robots.txt] -## Date: 2024-08-31 17:33 +## Date: 2024-09-02 01:32 ## Status: 200 ## Content-Type: text/plain ## Size: 20 B @@ -647,4 +647,4 @@ paths_allowed("petermeissner.de/I_want_to_scrape_this_now", verbose = TRUE) - - -[![ropensci_footer](https://raw.githubusercontent.com/ropensci/robotstxt/master/logo/github_footer.png)](https://ropensci.org) +[![ropensci_footer](https://ropensci.org/public_images/github_footer.png)](https://ropensci.org/) From 077ffaf1fe4b20bbe947227877487cb7370011fe Mon Sep 17 00:00:00 2001 From: Jordan Bradford <36420801+jrdnbradford@users.noreply.github.com> Date: Sun, 1 Sep 2024 21:38:07 -0400 Subject: [PATCH 2/5] Fix `null_to_default` typo --- DESCRIPTION | 2 +- NEWS.md | 5 +++++ R/null_to_default.R | 6 +++--- R/rt_request_handler.R | 4 ++-- man/{null_to_defeault.Rd => null_to_default.Rd} | 10 +++++----- 5 files changed, 16 insertions(+), 11 deletions(-) rename man/{null_to_defeault.Rd => null_to_default.Rd} (65%) diff --git a/DESCRIPTION b/DESCRIPTION index 1df44a7..53fc9f7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,6 +47,6 @@ Suggests: Depends: R (>= 3.0.0) VignetteBuilder: knitr -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Encoding: UTF-8 Config/testthat/edition: 3 diff --git a/NEWS.md b/NEWS.md index f8d47ad..ca13bb5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,11 @@ NEWS robotstxt ========================================================================== +0.7.15.9000 +-------------------------------------------------------------------------- + +- `null_to_default` typo fixed + 0.7.15 | 2024-08-24 -------------------------------------------------------------------------- diff --git a/R/null_to_default.R b/R/null_to_default.R index 5ef6195..9864cc9 100644 --- a/R/null_to_default.R +++ b/R/null_to_default.R @@ -1,13 +1,13 @@ -#' null_to_defeault +#' null_to_default #' #' @param x value to check and return #' @param d value to return in case x is NULL #' -null_to_defeault <- +null_to_default <- function(x, d){ if ( is.null(x) ){ d }else{ x } - } \ No newline at end of file + } diff --git a/R/rt_request_handler.R b/R/rt_request_handler.R index 893ea22..7932a14 100644 --- a/R/rt_request_handler.R +++ b/R/rt_request_handler.R @@ -80,7 +80,7 @@ rt_request_handler <- # encoding suplied or not encoding_supplied <- - grepl("charset", null_to_defeault(request$headers$`content-type`, "")) + grepl("charset", null_to_default(request$headers$`content-type`, "")) if ( encoding_supplied == TRUE ) { @@ -211,7 +211,7 @@ rt_request_handler <- ## file type mismatch file_type_mismatch <- - !(grepl("text/plain", null_to_defeault(request$headers$`content-type`, ""))) + !(grepl("text/plain", null_to_default(request$headers$`content-type`, ""))) if ( file_type_mismatch == TRUE ){ res <- diff --git a/man/null_to_defeault.Rd b/man/null_to_default.Rd similarity index 65% rename from man/null_to_defeault.Rd rename to man/null_to_default.Rd index 6ce2810..31b1a07 100644 --- a/man/null_to_defeault.Rd +++ b/man/null_to_default.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/null_to_default.R -\name{null_to_defeault} -\alias{null_to_defeault} -\title{null_to_defeault} +\name{null_to_default} +\alias{null_to_default} +\title{null_to_default} \usage{ -null_to_defeault(x, d) +null_to_default(x, d) } \arguments{ \item{x}{value to check and return} @@ -12,5 +12,5 @@ null_to_defeault(x, d) \item{d}{value to return in case x is NULL} } \description{ -null_to_defeault +null_to_default } From 3b88f81bf5b98925a532875141fc17a83359017d Mon Sep 17 00:00:00 2001 From: Jordan Bradford <36420801+jrdnbradford@users.noreply.github.com> Date: Sun, 1 Sep 2024 22:06:04 -0400 Subject: [PATCH 3/5] Update function docs --- R/as_list.R | 6 ++---- R/fix_url.R | 4 ++-- R/get_robotstxt.R | 2 +- R/get_robotstxt_http_get.R | 7 +++---- R/get_robotstxts.R | 3 +-- R/guess_domain.R | 4 ++-- R/http_domain_changed.R | 2 +- R/http_subdomain_changed.R | 4 ++-- R/http_was_redirected.R | 2 +- R/is_suspect_robotstxt.R | 6 ++---- R/is_valid_robotstxt.R | 6 ++---- R/null_to_default.R | 2 +- R/parse_robotstxt.R | 23 ++--------------------- R/parse_url.R | 6 +----- R/paths_allowed.R | 4 ++-- R/paths_allowed_worker_spiderbar.R | 4 +--- R/print_robotstxt.R | 3 +-- R/print_robotstxt_text.R | 7 +++---- R/remove_domain.R | 4 +--- R/request_handler_handler.R | 6 +++--- R/robotstxt.R | 5 +++-- R/rt_cache.R | 3 +-- R/rt_get_comments.R | 4 +--- R/rt_get_fields.R | 3 +-- R/rt_get_fields_worker.R | 3 +-- R/rt_get_useragent.R | 4 ++-- R/rt_request_handler.R | 14 ++++---------- R/sanitize_path.R | 3 +-- R/tools.R | 7 +++---- man/as.list.robotstxt_text.Rd | 4 ++-- man/fix_url.Rd | 4 ++-- man/get_robotstxt.Rd | 4 ++-- man/get_robotstxt_http_get.Rd | 8 ++++---- man/get_robotstxts.Rd | 4 ++-- man/guess_domain.Rd | 4 ++-- man/http_domain_changed.Rd | 4 ++-- man/http_subdomain_changed.Rd | 6 +++--- man/http_was_redirected.Rd | 4 ++-- man/is_suspect_robotstxt.Rd | 4 ++-- man/is_valid_robotstxt.Rd | 6 +++--- man/named_list.Rd | 4 ++-- man/null_to_default.Rd | 4 ++-- man/parse_robotstxt.Rd | 4 ++-- man/parse_url.Rd | 4 ++-- man/paths_allowed.Rd | 6 +++--- man/paths_allowed_worker_spiderbar.Rd | 6 +++--- man/print.robotstxt.Rd | 4 ++-- man/print.robotstxt_text.Rd | 4 ++-- man/remove_domain.Rd | 4 ++-- man/request_handler_handler.Rd | 4 ++-- man/robotstxt.Rd | 4 ++-- man/rt_cache.Rd | 4 ++-- man/rt_get_comments.Rd | 4 ++-- man/rt_get_fields.Rd | 4 ++-- man/rt_get_fields_worker.Rd | 4 ++-- man/rt_get_rtxt.Rd | 2 +- man/rt_get_useragent.Rd | 4 ++-- man/rt_list_rtxt.Rd | 2 +- man/rt_request_handler.Rd | 6 +++--- man/sanitize_path.Rd | 4 ++-- 60 files changed, 118 insertions(+), 167 deletions(-) diff --git a/R/as_list.R b/R/as_list.R index 08fc547..cabc891 100644 --- a/R/as_list.R +++ b/R/as_list.R @@ -1,6 +1,4 @@ - - -#' Method as.list() for class robotstxt_text +#' Convert robotstxt_text to list #' #' @param x class robotstxt_text object to be transformed into list #' @param ... further arguments (inherited from \code{base::as.list()}) @@ -17,4 +15,4 @@ as.list.robotstxt_text <- res$request <- attr(x, "request") res - } \ No newline at end of file + } diff --git a/R/fix_url.R b/R/fix_url.R index 50a51f7..4fc4a78 100644 --- a/R/fix_url.R +++ b/R/fix_url.R @@ -1,4 +1,4 @@ -#' fix_url +#' Add http protocal if missing from URL #' #' #' @param url a character string containing a single URL @@ -10,4 +10,4 @@ fix_url <- url <- paste0("http://", url) } url - } \ No newline at end of file + } diff --git a/R/get_robotstxt.R b/R/get_robotstxt.R index 42e7947..b1778cc 100644 --- a/R/get_robotstxt.R +++ b/R/get_robotstxt.R @@ -1,4 +1,4 @@ -#' downloading robots.txt file +#' Download a robots.txt file #' #' @param domain domain from which to download robots.txt file #' @param warn warn about being unable to download domain/robots.txt because of diff --git a/R/get_robotstxt_http_get.R b/R/get_robotstxt_http_get.R index 369176e..472eefb 100644 --- a/R/get_robotstxt_http_get.R +++ b/R/get_robotstxt_http_get.R @@ -1,5 +1,4 @@ - -#' storage for http request response objects +#' Storage for http request response objects #' #' @rdname get_robotstxt_http_get #' @@ -7,11 +6,11 @@ rt_last_http <- new.env() rt_last_http$request <- list() -#' get_robotstxt() worker function to execute HTTP request +#' Execute HTTP request for get_robotstxt() #' #' @param ssl_verifypeer either 1 (default) or 0, if 0 it disables SSL peer verification, which #' might help with robots.txt file retrieval -#' @param domain the domain to get tobots.txt. file for +#' @param domain the domain to get robots.txt. file for #' @param user_agent the user agent to use for HTTP request header #' #' @export diff --git a/R/get_robotstxts.R b/R/get_robotstxts.R index fbf755c..36ed107 100644 --- a/R/get_robotstxts.R +++ b/R/get_robotstxts.R @@ -1,5 +1,4 @@ - -#' function to get multiple robotstxt files +#' Download multiple robotstxt files #' #' @inheritParams get_robotstxt #' @param use_futures Should future::future_lapply be used for possible diff --git a/R/guess_domain.R b/R/guess_domain.R index 1ba030e..4f2b142 100644 --- a/R/guess_domain.R +++ b/R/guess_domain.R @@ -1,4 +1,4 @@ -#' function guessing domain from path +#' Guess a domain from path #' @param x path aka URL from which to infer domain guess_domain <- function(x){ @@ -23,4 +23,4 @@ guess_domain <- function(x){ return(domain) } -} \ No newline at end of file +} diff --git a/R/http_domain_changed.R b/R/http_domain_changed.R index a017548..66af645 100644 --- a/R/http_domain_changed.R +++ b/R/http_domain_changed.R @@ -1,4 +1,4 @@ -#' http_domain_changed +#' Check if HTTP domain changed #' #' @param response an httr response object, e.g. from a call to httr::GET() #' diff --git a/R/http_subdomain_changed.R b/R/http_subdomain_changed.R index 0f95fc7..a9ec4db 100644 --- a/R/http_subdomain_changed.R +++ b/R/http_subdomain_changed.R @@ -1,8 +1,8 @@ -#' http_subdomain_changed +#' Check if HTTP subdomain changed #' #' @param response an httr response object, e.g. from a call to httr::GET() #' -#' @return logical of length 1 indicating whether or not any domain change +#' @return logical of length 1 indicating whether or not any subdomain change #' happened during the HTTP request #' #' diff --git a/R/http_was_redirected.R b/R/http_was_redirected.R index ef0ef24..010d78f 100644 --- a/R/http_was_redirected.R +++ b/R/http_was_redirected.R @@ -1,4 +1,4 @@ -#' http_was_redirected +#' Check if HTTP redirect occurred #' #' @param response an httr response object, e.g. from a call to httr::GET() #' diff --git a/R/is_suspect_robotstxt.R b/R/is_suspect_robotstxt.R index a916fdb..aad3ef9 100644 --- a/R/is_suspect_robotstxt.R +++ b/R/is_suspect_robotstxt.R @@ -1,6 +1,6 @@ -#' is_suspect_robotstxt +#' Check if file is valid / parsable robots.txt file #' -#' function that checks if file is valid / parsable robots.txt file +#' Function that checks if file is valid / parsable robots.txt file #' #' @param text content of a robots.txt file provides as character vector #' @@ -26,5 +26,3 @@ is_suspect_robotstxt <- function(text){ # return default return(FALSE) } - - diff --git a/R/is_valid_robotstxt.R b/R/is_valid_robotstxt.R index 77ac906..8aa367c 100644 --- a/R/is_valid_robotstxt.R +++ b/R/is_valid_robotstxt.R @@ -1,6 +1,6 @@ -#' function that checks if file is valid / parsable robots.txt file +#' Validate if a file is valid / parsable robots.txt file #' -#' @param text content of a robots.txt file provides as character vector +#' @param text content of a robots.txt file provided as character vector #' @param check_strickt_ascii whether or not to check if content does adhere to the specification of RFC to use plain text aka ASCII #' #' @export @@ -39,5 +39,3 @@ is_valid_robotstxt <- function(text, check_strickt_ascii = FALSE){ ) } - - diff --git a/R/null_to_default.R b/R/null_to_default.R index 9864cc9..530e5e2 100644 --- a/R/null_to_default.R +++ b/R/null_to_default.R @@ -1,4 +1,4 @@ -#' null_to_default +#' Return default value if NULL #' #' @param x value to check and return #' @param d value to return in case x is NULL diff --git a/R/parse_robotstxt.R b/R/parse_robotstxt.R index 168ce6b..e819c16 100644 --- a/R/parse_robotstxt.R +++ b/R/parse_robotstxt.R @@ -1,9 +1,8 @@ -#' function parsing robots.txt -#' @param txt content of the robots.txt file +#' Parse a robots.txt file +#' @param txt content of the robots.txt file #' @return a named list with useragents, comments, permissions, sitemap #' @export parse_robotstxt <- function(txt){ - # return res <- list( useragents = rt_get_useragent(txt), @@ -21,21 +20,3 @@ parse_robotstxt <- function(txt){ ) return(res) } - - - - - - - - - - - - - - - - - - diff --git a/R/parse_url.R b/R/parse_url.R index 2e393c2..ed768f4 100644 --- a/R/parse_url.R +++ b/R/parse_url.R @@ -1,13 +1,9 @@ - - - -#' parse_url +#' Parse a URL #' #' @param url url to parse into its components #' #' @return data.frame with columns protocol, domain, path #' -#' #' @keywords internal #' #' @examples diff --git a/R/paths_allowed.R b/R/paths_allowed.R index 199f04e..8822dd1 100644 --- a/R/paths_allowed.R +++ b/R/paths_allowed.R @@ -1,4 +1,4 @@ -#' check if a bot has permissions to access page(s) +#' Check if a bot has permissions to access page(s) #' #' #' @param domain Domain for which paths should be checked. Defaults to "auto". @@ -6,7 +6,7 @@ #' argument. Note however, that these are educated guesses which might utterly #' fail. To be on the safe side, provide appropriate domains manually. #' @param bot name of the bot, defaults to "*" -#' @param paths paths for which to check bot's permission, defaults to "/". Please, note that path to a folder should end with a trailing slash ("/"). +#' @param paths paths for which to check bot's permission, defaults to "/". Please note that path to a folder should end with a trailing slash ("/"). #' @param check_method at the moment only kept for backward compatibility reasons - do not use parameter anymore --> will let the function simply use the default #' @param robotstxt_list either NULL -- the default -- or a list of character #' vectors with one vector per path to check diff --git a/R/paths_allowed_worker_spiderbar.R b/R/paths_allowed_worker_spiderbar.R index 94e2aeb..e21fc86 100644 --- a/R/paths_allowed_worker_spiderbar.R +++ b/R/paths_allowed_worker_spiderbar.R @@ -1,6 +1,4 @@ - - -#' paths_allowed_worker spiderbar flavor +#' Check if a spiderbar bot has permissions to access page(s) #' #' @inheritParams paths_allowed #' diff --git a/R/print_robotstxt.R b/R/print_robotstxt.R index 5ed245c..85aa65d 100644 --- a/R/print_robotstxt.R +++ b/R/print_robotstxt.R @@ -1,4 +1,4 @@ -#' printing robotstxt +#' Print robotstxt #' @param x robotstxt instance to be printed #' @param ... goes down the sink #' @export @@ -29,4 +29,3 @@ print.robotstxt <- function(x, ...){ invisible(x) } - diff --git a/R/print_robotstxt_text.R b/R/print_robotstxt_text.R index 808dfe0..dbffd4a 100644 --- a/R/print_robotstxt_text.R +++ b/R/print_robotstxt_text.R @@ -1,11 +1,10 @@ - -#' printing robotstxt_text +#' Print robotstxt's text #' @param x character vector aka robotstxt$text to be printed #' @param ... goes down the sink #' @export print.robotstxt_text <- function(x, ...){ - # rpint part of the robots.txt file + # print part of the robots.txt file cat("[robots.txt]\n--------------------------------------\n\n") tmp <- unlist(strsplit(x, "\n")) cat(tmp[seq_len(min(length(tmp), 50))], sep ="\n") @@ -29,4 +28,4 @@ print.robotstxt_text <- function(x, ...){ # return invisible(x) -} \ No newline at end of file +} diff --git a/R/remove_domain.R b/R/remove_domain.R index ddea714..c921e03 100644 --- a/R/remove_domain.R +++ b/R/remove_domain.R @@ -1,4 +1,4 @@ -#' function to remove domain from path +#' Remove domain from path #' @param x path aka URL from which to first infer domain and then remove it remove_domain <- function(x){ unlist(lapply( @@ -12,5 +12,3 @@ remove_domain <- function(x){ } )) } - - diff --git a/R/request_handler_handler.R b/R/request_handler_handler.R index 81637f8..d3f8805 100644 --- a/R/request_handler_handler.R +++ b/R/request_handler_handler.R @@ -1,10 +1,10 @@ -#' request_handler_handler +#' Handle robotstxt handlers #' #' Helper function to handle robotstxt handlers. #' #' @param request the request object returned by call to httr::GET() #' @param handler the handler either a character string entailing various options or a function producing a specific list, see return. -#' @param res a list a list with elements '[handler names], ...', 'rtxt', and 'cache' +#' @param res a list with elements '[handler names], ...', 'rtxt', and 'cache' #' @param info info to add to problems list #' @param warn if FALSE warnings and messages are suppressed #' @@ -72,4 +72,4 @@ request_handler_handler <- # return res - } \ No newline at end of file + } diff --git a/R/robotstxt.R b/R/robotstxt.R index 1f2ddb8..7df7682 100644 --- a/R/robotstxt.R +++ b/R/robotstxt.R @@ -1,4 +1,4 @@ -#' Generate a representations of a robots.txt file +#' Generate a representation of a robots.txt file #' #' The function generates a list that entails data resulting from parsing a robots.txt file #' as well as a function called check that enables to ask the representation if bot (or @@ -6,6 +6,7 @@ #' #' @param domain Domain for which to generate a representation. If text equals to NULL, #' the function will download the file from server - the default. +#' #' @param text If automatic download of the robots.txt is not preferred, the text can be #' supplied directly. #' @inheritParams get_robotstxt @@ -20,7 +21,7 @@ #' @field domain character vector holding domain name for which the robots.txt #' file is valid; will be set to NA if not supplied on initialization #' -#' @field text character vector of text of robots.txt file; either supplied on +#' @field character vector of text of robots.txt file; either supplied on #' initialization or automatically downloaded from domain supplied on #' initialization #' diff --git a/R/rt_cache.R b/R/rt_cache.R index 6d921b4..cd25ebe 100644 --- a/R/rt_cache.R +++ b/R/rt_cache.R @@ -1,3 +1,2 @@ -#' get_robotstxt() cache +#' Get the robotstxt cache rt_cache <- new.env( parent = emptyenv() ) - diff --git a/R/rt_get_comments.R b/R/rt_get_comments.R index 2006a07..c01059a 100644 --- a/R/rt_get_comments.R +++ b/R/rt_get_comments.R @@ -1,5 +1,4 @@ - -#' extracting comments from robots.txt +#' Extract comments from robots.txt #' @param txt content of the robots.txt file #' @keywords internal rt_get_comments <- function(txt){ @@ -8,4 +7,3 @@ rt_get_comments <- function(txt){ ccontent <- stringr::str_extract(txt[clines], "#.*") data.frame(line=clines, comment=ccontent, stringsAsFactors = FALSE) } - diff --git a/R/rt_get_fields.R b/R/rt_get_fields.R index 70647e8..b1ed964 100644 --- a/R/rt_get_fields.R +++ b/R/rt_get_fields.R @@ -1,5 +1,4 @@ - -#' extracting permissions from robots.txt +#' Extract permissions from robots.txt #' @param txt content of the robots.txt file #' @param regex regular expression specify field #' @param invert invert selection made via regex? diff --git a/R/rt_get_fields_worker.R b/R/rt_get_fields_worker.R index 9fcda79..f366464 100644 --- a/R/rt_get_fields_worker.R +++ b/R/rt_get_fields_worker.R @@ -1,5 +1,4 @@ - -#' extracting robotstxt fields +#' Extract robotstxt fields #' @param txt content of the robots.txt file #' @param type name or names of the fields to be returned, defaults to all #' fields diff --git a/R/rt_get_useragent.R b/R/rt_get_useragent.R index b32be3f..6d98f4f 100644 --- a/R/rt_get_useragent.R +++ b/R/rt_get_useragent.R @@ -1,4 +1,4 @@ -#' extracting HTTP useragents from robots.txt +#' Extract HTTP useragents from robots.txt #' @param txt content of the robots.txt file #' @keywords internal # rt_get_useragent <- function(txt){ @@ -19,4 +19,4 @@ rt_get_useragent <- function(txt){ pattern = stringr::regex("U.*:| |\n", ignore_case = TRUE), replacement = "" ) -} \ No newline at end of file +} diff --git a/R/rt_request_handler.R b/R/rt_request_handler.R index 7932a14..3ea1861 100644 --- a/R/rt_request_handler.R +++ b/R/rt_request_handler.R @@ -1,15 +1,13 @@ - -#' rt_request_handler +#' Handle robotstxt object retrieved from HTTP request #' #' A helper function for get_robotstxt() that will extract the robots.txt file -#' from the HTTP request result object. furthermore it will inform -#' get_robotstxt() if the request should be cached and which problems occured. +#' from the HTTP request result object. It will inform get_robotstxt() if the +#' request should be cached and which problems occurred. #' #' #' #' @param request result of an HTTP request (e.g. httr::GET()) #' -#' #' @param on_server_error request state handler for any 5xx status #' #' @param on_client_error request state handler for any 4xx HTTP status that is @@ -31,8 +29,8 @@ #' @param on_suspect_content request state handler for content that seems to be #' something else than a robots.txt file (usually a JSON, XML or HTML) #' -#' #' @param warn suppress warnings +#' #' @param encoding The text encoding to assume if no encoding is provided in the #' headers of the response #' @@ -99,8 +97,6 @@ rt_request_handler <- } - - ## server error server_error <- request$status_code >= 500 @@ -207,8 +203,6 @@ rt_request_handler <- } - - ## file type mismatch file_type_mismatch <- !(grepl("text/plain", null_to_default(request$headers$`content-type`, ""))) diff --git a/R/sanitize_path.R b/R/sanitize_path.R index 4a778f6..5afd78c 100644 --- a/R/sanitize_path.R +++ b/R/sanitize_path.R @@ -1,4 +1,4 @@ -#' making paths uniform +#' Make paths uniform #' @param path path to be sanitized #' @return sanitized path #' @keywords internal @@ -7,4 +7,3 @@ sanitize_path <- function(path){ path <- ifelse( !grepl("^/", path), paste0("/", path), path) return(path) } - diff --git a/R/tools.R b/R/tools.R index e44e675..d36fd67 100644 --- a/R/tools.R +++ b/R/tools.R @@ -1,4 +1,4 @@ -#' make automatically named list +#' Create a named list #' @param ... things to be put in list #' @keywords internal named_list <- function(...){ @@ -7,7 +7,7 @@ named_list <- function(...){ thelist } -#' load robots.txt files saved along with the package +#' Load robots.txt files saved along with the package #' #' load robots.txt files saved along with the package: #' these functions are very handy for testing (not used otherwise) @@ -23,7 +23,7 @@ rt_get_rtxt <- function(name=sample(rt_list_rtxt(),1)){ ) } -#' list robots.txt files saved along with the package +#' List robots.txt files saved along with the package #' #' list robots.txt files saved along with the package: #' these functions ar very handy for testing (not used otherwise) @@ -31,4 +31,3 @@ rt_get_rtxt <- function(name=sample(rt_list_rtxt(),1)){ rt_list_rtxt <- function(){ list.files(system.file("robotstxts", package = "robotstxt" )) } - diff --git a/man/as.list.robotstxt_text.Rd b/man/as.list.robotstxt_text.Rd index 7b353d1..9a720a7 100644 --- a/man/as.list.robotstxt_text.Rd +++ b/man/as.list.robotstxt_text.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/as_list.R \name{as.list.robotstxt_text} \alias{as.list.robotstxt_text} -\title{Method as.list() for class robotstxt_text} +\title{Convert robotstxt_text to list} \usage{ \method{as.list}{robotstxt_text}(x, ...) } @@ -12,5 +12,5 @@ \item{...}{further arguments (inherited from \code{base::as.list()})} } \description{ -Method as.list() for class robotstxt_text +Convert robotstxt_text to list } diff --git a/man/fix_url.Rd b/man/fix_url.Rd index 235fdc6..16d21a2 100644 --- a/man/fix_url.Rd +++ b/man/fix_url.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/fix_url.R \name{fix_url} \alias{fix_url} -\title{fix_url} +\title{Add http protocal if missing from URL} \usage{ fix_url(url) } @@ -10,5 +10,5 @@ fix_url(url) \item{url}{a character string containing a single URL} } \description{ -fix_url +Add http protocal if missing from URL } diff --git a/man/get_robotstxt.Rd b/man/get_robotstxt.Rd index d4384bf..f45650c 100644 --- a/man/get_robotstxt.Rd +++ b/man/get_robotstxt.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/get_robotstxt.R \name{get_robotstxt} \alias{get_robotstxt} -\title{downloading robots.txt file} +\title{Download a robots.txt file} \usage{ get_robotstxt( domain, @@ -66,5 +66,5 @@ than 'text/plain'} something else than a robots.txt file (usually a JSON, XML or HTML)} } \description{ -downloading robots.txt file +Download a robots.txt file } diff --git a/man/get_robotstxt_http_get.Rd b/man/get_robotstxt_http_get.Rd index 7394273..f14bd91 100644 --- a/man/get_robotstxt_http_get.Rd +++ b/man/get_robotstxt_http_get.Rd @@ -4,7 +4,7 @@ \name{rt_last_http} \alias{rt_last_http} \alias{get_robotstxt_http_get} -\title{storage for http request response objects} +\title{Storage for http request response objects} \format{ An object of class \code{environment} of length 1. } @@ -18,7 +18,7 @@ get_robotstxt_http_get( ) } \arguments{ -\item{domain}{the domain to get tobots.txt. file for} +\item{domain}{the domain to get robots.txt. file for} \item{user_agent}{the user agent to use for HTTP request header} @@ -26,8 +26,8 @@ get_robotstxt_http_get( might help with robots.txt file retrieval} } \description{ -storage for http request response objects +Storage for http request response objects -get_robotstxt() worker function to execute HTTP request +Execute HTTP request for get_robotstxt() } \keyword{datasets} diff --git a/man/get_robotstxts.Rd b/man/get_robotstxts.Rd index 2df0a1b..5aefe89 100644 --- a/man/get_robotstxts.Rd +++ b/man/get_robotstxts.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/get_robotstxts.R \name{get_robotstxts} \alias{get_robotstxts} -\title{function to get multiple robotstxt files} +\title{Download multiple robotstxt files} \usage{ get_robotstxts( domain, @@ -70,5 +70,5 @@ than 'text/plain'} something else than a robots.txt file (usually a JSON, XML or HTML)} } \description{ -function to get multiple robotstxt files +Download multiple robotstxt files } diff --git a/man/guess_domain.Rd b/man/guess_domain.Rd index 5e2b731..9d8ce48 100644 --- a/man/guess_domain.Rd +++ b/man/guess_domain.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/guess_domain.R \name{guess_domain} \alias{guess_domain} -\title{function guessing domain from path} +\title{Guess a domain from path} \usage{ guess_domain(x) } @@ -10,5 +10,5 @@ guess_domain(x) \item{x}{path aka URL from which to infer domain} } \description{ -function guessing domain from path +Guess a domain from path } diff --git a/man/http_domain_changed.Rd b/man/http_domain_changed.Rd index 9c960fb..592b524 100644 --- a/man/http_domain_changed.Rd +++ b/man/http_domain_changed.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/http_domain_changed.R \name{http_domain_changed} \alias{http_domain_changed} -\title{http_domain_changed} +\title{Check if HTTP domain changed} \usage{ http_domain_changed(response) } @@ -14,5 +14,5 @@ logical of length 1 indicating whether or not any domain change happened during the HTTP request } \description{ -http_domain_changed +Check if HTTP domain changed } diff --git a/man/http_subdomain_changed.Rd b/man/http_subdomain_changed.Rd index c29070f..beae495 100644 --- a/man/http_subdomain_changed.Rd +++ b/man/http_subdomain_changed.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/http_subdomain_changed.R \name{http_subdomain_changed} \alias{http_subdomain_changed} -\title{http_subdomain_changed} +\title{Check if HTTP subdomain changed} \usage{ http_subdomain_changed(response) } @@ -10,9 +10,9 @@ http_subdomain_changed(response) \item{response}{an httr response object, e.g. from a call to httr::GET()} } \value{ -logical of length 1 indicating whether or not any domain change +logical of length 1 indicating whether or not any subdomain change happened during the HTTP request } \description{ -http_subdomain_changed +Check if HTTP subdomain changed } diff --git a/man/http_was_redirected.Rd b/man/http_was_redirected.Rd index 3133926..2da5fd7 100644 --- a/man/http_was_redirected.Rd +++ b/man/http_was_redirected.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/http_was_redirected.R \name{http_was_redirected} \alias{http_was_redirected} -\title{http_was_redirected} +\title{Check if HTTP redirect occurred} \usage{ http_was_redirected(response) } @@ -14,5 +14,5 @@ logical of length 1 indicating whether or not any redirect happened during the HTTP request } \description{ -http_was_redirected +Check if HTTP redirect occurred } diff --git a/man/is_suspect_robotstxt.Rd b/man/is_suspect_robotstxt.Rd index 7d83ea0..62473f3 100644 --- a/man/is_suspect_robotstxt.Rd +++ b/man/is_suspect_robotstxt.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/is_suspect_robotstxt.R \name{is_suspect_robotstxt} \alias{is_suspect_robotstxt} -\title{is_suspect_robotstxt} +\title{Check if file is valid / parsable robots.txt file} \usage{ is_suspect_robotstxt(text) } @@ -10,5 +10,5 @@ is_suspect_robotstxt(text) \item{text}{content of a robots.txt file provides as character vector} } \description{ -function that checks if file is valid / parsable robots.txt file +Function that checks if file is valid / parsable robots.txt file } diff --git a/man/is_valid_robotstxt.Rd b/man/is_valid_robotstxt.Rd index b459a31..7a23257 100644 --- a/man/is_valid_robotstxt.Rd +++ b/man/is_valid_robotstxt.Rd @@ -2,15 +2,15 @@ % Please edit documentation in R/is_valid_robotstxt.R \name{is_valid_robotstxt} \alias{is_valid_robotstxt} -\title{function that checks if file is valid / parsable robots.txt file} +\title{Validate if a file is valid / parsable robots.txt file} \usage{ is_valid_robotstxt(text, check_strickt_ascii = FALSE) } \arguments{ -\item{text}{content of a robots.txt file provides as character vector} +\item{text}{content of a robots.txt file provided as character vector} \item{check_strickt_ascii}{whether or not to check if content does adhere to the specification of RFC to use plain text aka ASCII} } \description{ -function that checks if file is valid / parsable robots.txt file +Validate if a file is valid / parsable robots.txt file } diff --git a/man/named_list.Rd b/man/named_list.Rd index 8165db0..630b71f 100644 --- a/man/named_list.Rd +++ b/man/named_list.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tools.R \name{named_list} \alias{named_list} -\title{make automatically named list} +\title{Create a named list} \usage{ named_list(...) } @@ -10,6 +10,6 @@ named_list(...) \item{...}{things to be put in list} } \description{ -make automatically named list +Create a named list } \keyword{internal} diff --git a/man/null_to_default.Rd b/man/null_to_default.Rd index 31b1a07..590bf39 100644 --- a/man/null_to_default.Rd +++ b/man/null_to_default.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/null_to_default.R \name{null_to_default} \alias{null_to_default} -\title{null_to_default} +\title{Return default value if NULL} \usage{ null_to_default(x, d) } @@ -12,5 +12,5 @@ null_to_default(x, d) \item{d}{value to return in case x is NULL} } \description{ -null_to_default +Return default value if NULL } diff --git a/man/parse_robotstxt.Rd b/man/parse_robotstxt.Rd index b3c3c57..2ff320a 100644 --- a/man/parse_robotstxt.Rd +++ b/man/parse_robotstxt.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/parse_robotstxt.R \name{parse_robotstxt} \alias{parse_robotstxt} -\title{function parsing robots.txt} +\title{Parse a robots.txt file} \usage{ parse_robotstxt(txt) } @@ -13,5 +13,5 @@ parse_robotstxt(txt) a named list with useragents, comments, permissions, sitemap } \description{ -function parsing robots.txt +Parse a robots.txt file } diff --git a/man/parse_url.Rd b/man/parse_url.Rd index 62ef016..f6f2eea 100644 --- a/man/parse_url.Rd +++ b/man/parse_url.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/parse_url.R \name{parse_url} \alias{parse_url} -\title{parse_url} +\title{Parse a URL} \usage{ parse_url(url) } @@ -13,7 +13,7 @@ parse_url(url) data.frame with columns protocol, domain, path } \description{ -parse_url +Parse a URL } \examples{ diff --git a/man/paths_allowed.Rd b/man/paths_allowed.Rd index 1cd178f..e75a545 100644 --- a/man/paths_allowed.Rd +++ b/man/paths_allowed.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/paths_allowed.R \name{paths_allowed} \alias{paths_allowed} -\title{check if a bot has permissions to access page(s)} +\title{Check if a bot has permissions to access page(s)} \usage{ paths_allowed( paths = "/", @@ -28,7 +28,7 @@ paths_allowed( ) } \arguments{ -\item{paths}{paths for which to check bot's permission, defaults to "/". Please, note that path to a folder should end with a trailing slash ("/").} +\item{paths}{paths for which to check bot's permission, defaults to "/". Please note that path to a folder should end with a trailing slash ("/").} \item{domain}{Domain for which paths should be checked. Defaults to "auto". If set to "auto" function will try to guess the domain by parsing the paths @@ -86,5 +86,5 @@ than 'text/plain'} something else than a robots.txt file (usually a JSON, XML or HTML)} } \description{ -check if a bot has permissions to access page(s) +Check if a bot has permissions to access page(s) } diff --git a/man/paths_allowed_worker_spiderbar.Rd b/man/paths_allowed_worker_spiderbar.Rd index bffaea9..b8919c9 100644 --- a/man/paths_allowed_worker_spiderbar.Rd +++ b/man/paths_allowed_worker_spiderbar.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/paths_allowed_worker_spiderbar.R \name{paths_allowed_worker_spiderbar} \alias{paths_allowed_worker_spiderbar} -\title{paths_allowed_worker spiderbar flavor} +\title{Check if a spiderbar bot has permissions to access page(s)} \usage{ paths_allowed_worker_spiderbar(domain, bot, paths, robotstxt_list) } @@ -14,11 +14,11 @@ fail. To be on the safe side, provide appropriate domains manually.} \item{bot}{name of the bot, defaults to "*"} -\item{paths}{paths for which to check bot's permission, defaults to "/". Please, note that path to a folder should end with a trailing slash ("/").} +\item{paths}{paths for which to check bot's permission, defaults to "/". Please note that path to a folder should end with a trailing slash ("/").} \item{robotstxt_list}{either NULL -- the default -- or a list of character vectors with one vector per path to check} } \description{ -paths_allowed_worker spiderbar flavor +Check if a spiderbar bot has permissions to access page(s) } diff --git a/man/print.robotstxt.Rd b/man/print.robotstxt.Rd index 816ab0a..2c24608 100644 --- a/man/print.robotstxt.Rd +++ b/man/print.robotstxt.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/print_robotstxt.R \name{print.robotstxt} \alias{print.robotstxt} -\title{printing robotstxt} +\title{Print robotstxt} \usage{ \method{print}{robotstxt}(x, ...) } @@ -12,5 +12,5 @@ \item{...}{goes down the sink} } \description{ -printing robotstxt +Print robotstxt } diff --git a/man/print.robotstxt_text.Rd b/man/print.robotstxt_text.Rd index 2033e8e..0ecbccb 100644 --- a/man/print.robotstxt_text.Rd +++ b/man/print.robotstxt_text.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/print_robotstxt_text.R \name{print.robotstxt_text} \alias{print.robotstxt_text} -\title{printing robotstxt_text} +\title{Print robotstxt's text} \usage{ \method{print}{robotstxt_text}(x, ...) } @@ -12,5 +12,5 @@ \item{...}{goes down the sink} } \description{ -printing robotstxt_text +Print robotstxt's text } diff --git a/man/remove_domain.Rd b/man/remove_domain.Rd index 37de302..65866d9 100644 --- a/man/remove_domain.Rd +++ b/man/remove_domain.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/remove_domain.R \name{remove_domain} \alias{remove_domain} -\title{function to remove domain from path} +\title{Remove domain from path} \usage{ remove_domain(x) } @@ -10,5 +10,5 @@ remove_domain(x) \item{x}{path aka URL from which to first infer domain and then remove it} } \description{ -function to remove domain from path +Remove domain from path } diff --git a/man/request_handler_handler.Rd b/man/request_handler_handler.Rd index a6e6cc4..7cd9f3d 100644 --- a/man/request_handler_handler.Rd +++ b/man/request_handler_handler.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/request_handler_handler.R \name{request_handler_handler} \alias{request_handler_handler} -\title{request_handler_handler} +\title{Handle robotstxt handlers} \usage{ request_handler_handler(request, handler, res, info = TRUE, warn = TRUE) } @@ -11,7 +11,7 @@ request_handler_handler(request, handler, res, info = TRUE, warn = TRUE) \item{handler}{the handler either a character string entailing various options or a function producing a specific list, see return.} -\item{res}{a list a list with elements '[handler names], ...', 'rtxt', and 'cache'} +\item{res}{a list with elements '[handler names], ...', 'rtxt', and 'cache'} \item{info}{info to add to problems list} diff --git a/man/robotstxt.Rd b/man/robotstxt.Rd index e6af5ab..a6e0e6b 100644 --- a/man/robotstxt.Rd +++ b/man/robotstxt.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/robotstxt.R \name{robotstxt} \alias{robotstxt} -\title{Generate a representations of a robots.txt file} +\title{Generate a representation of a robots.txt file} \usage{ robotstxt( domain = NULL, @@ -79,7 +79,7 @@ particular bots) are allowed to access a resource on the domain. \item{\code{domain}}{character vector holding domain name for which the robots.txt file is valid; will be set to NA if not supplied on initialization} -\item{\code{text}}{character vector of text of robots.txt file; either supplied on +\item{\code{character}}{vector of text of robots.txt file; either supplied on initialization or automatically downloaded from domain supplied on initialization} diff --git a/man/rt_cache.Rd b/man/rt_cache.Rd index ce3d87f..9b48e9f 100644 --- a/man/rt_cache.Rd +++ b/man/rt_cache.Rd @@ -3,7 +3,7 @@ \docType{data} \name{rt_cache} \alias{rt_cache} -\title{get_robotstxt() cache} +\title{Get the robotstxt cache} \format{ An object of class \code{environment} of length 0. } @@ -11,6 +11,6 @@ An object of class \code{environment} of length 0. rt_cache } \description{ -get_robotstxt() cache +Get the robotstxt cache } \keyword{datasets} diff --git a/man/rt_get_comments.Rd b/man/rt_get_comments.Rd index f141d9c..0c09504 100644 --- a/man/rt_get_comments.Rd +++ b/man/rt_get_comments.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/rt_get_comments.R \name{rt_get_comments} \alias{rt_get_comments} -\title{extracting comments from robots.txt} +\title{Extract comments from robots.txt} \usage{ rt_get_comments(txt) } @@ -10,6 +10,6 @@ rt_get_comments(txt) \item{txt}{content of the robots.txt file} } \description{ -extracting comments from robots.txt +Extract comments from robots.txt } \keyword{internal} diff --git a/man/rt_get_fields.Rd b/man/rt_get_fields.Rd index 81e6a08..0885e7c 100644 --- a/man/rt_get_fields.Rd +++ b/man/rt_get_fields.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/rt_get_fields.R \name{rt_get_fields} \alias{rt_get_fields} -\title{extracting permissions from robots.txt} +\title{Extract permissions from robots.txt} \usage{ rt_get_fields(txt, regex = "", invert = FALSE) } @@ -14,6 +14,6 @@ rt_get_fields(txt, regex = "", invert = FALSE) \item{invert}{invert selection made via regex?} } \description{ -extracting permissions from robots.txt +Extract permissions from robots.txt } \keyword{internal} diff --git a/man/rt_get_fields_worker.Rd b/man/rt_get_fields_worker.Rd index 30d8920..8cd0031 100644 --- a/man/rt_get_fields_worker.Rd +++ b/man/rt_get_fields_worker.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/rt_get_fields_worker.R \name{rt_get_fields_worker} \alias{rt_get_fields_worker} -\title{extracting robotstxt fields} +\title{Extract robotstxt fields} \usage{ rt_get_fields_worker(txt, type = "all", regex = NULL, invert = FALSE) } @@ -17,6 +17,6 @@ fields} \item{invert}{field selection} } \description{ -extracting robotstxt fields +Extract robotstxt fields } \keyword{internal} diff --git a/man/rt_get_rtxt.Rd b/man/rt_get_rtxt.Rd index 067d522..dfdf590 100644 --- a/man/rt_get_rtxt.Rd +++ b/man/rt_get_rtxt.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tools.R \name{rt_get_rtxt} \alias{rt_get_rtxt} -\title{load robots.txt files saved along with the package} +\title{Load robots.txt files saved along with the package} \usage{ rt_get_rtxt(name = sample(rt_list_rtxt(), 1)) } diff --git a/man/rt_get_useragent.Rd b/man/rt_get_useragent.Rd index 6cbbe1a..d267f46 100644 --- a/man/rt_get_useragent.Rd +++ b/man/rt_get_useragent.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/rt_get_useragent.R \name{rt_get_useragent} \alias{rt_get_useragent} -\title{extracting HTTP useragents from robots.txt} +\title{Extract HTTP useragents from robots.txt} \usage{ rt_get_useragent(txt) } @@ -10,6 +10,6 @@ rt_get_useragent(txt) \item{txt}{content of the robots.txt file} } \description{ -extracting HTTP useragents from robots.txt +Extract HTTP useragents from robots.txt } \keyword{internal} diff --git a/man/rt_list_rtxt.Rd b/man/rt_list_rtxt.Rd index 0d20fe4..d73f5ae 100644 --- a/man/rt_list_rtxt.Rd +++ b/man/rt_list_rtxt.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tools.R \name{rt_list_rtxt} \alias{rt_list_rtxt} -\title{list robots.txt files saved along with the package} +\title{List robots.txt files saved along with the package} \usage{ rt_list_rtxt() } diff --git a/man/rt_request_handler.Rd b/man/rt_request_handler.Rd index 8d02ade..ea56479 100644 --- a/man/rt_request_handler.Rd +++ b/man/rt_request_handler.Rd @@ -12,7 +12,7 @@ \alias{on_sub_domain_change_default} \alias{on_file_type_mismatch_default} \alias{on_suspect_content_default} -\title{rt_request_handler} +\title{Handle robotstxt object retrieved from HTTP request} \format{ An object of class \code{list} of length 4. @@ -97,7 +97,7 @@ a list with three items following the following schema: \cr \code{ } \description{ A helper function for get_robotstxt() that will extract the robots.txt file -from the HTTP request result object. furthermore it will inform -get_robotstxt() if the request should be cached and which problems occured. +from the HTTP request result object. It will inform get_robotstxt() if the +request should be cached and which problems occurred. } \keyword{datasets} diff --git a/man/sanitize_path.Rd b/man/sanitize_path.Rd index 1da481c..cdc22ef 100644 --- a/man/sanitize_path.Rd +++ b/man/sanitize_path.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/sanitize_path.R \name{sanitize_path} \alias{sanitize_path} -\title{making paths uniform} +\title{Make paths uniform} \usage{ sanitize_path(path) } @@ -13,6 +13,6 @@ sanitize_path(path) sanitized path } \description{ -making paths uniform +Make paths uniform } \keyword{internal} From ccd36ab7d435affc357c74a380b6423daaa66f3b Mon Sep 17 00:00:00 2001 From: Jordan Bradford <36420801+jrdnbradford@users.noreply.github.com> Date: Sun, 1 Sep 2024 22:07:16 -0400 Subject: [PATCH 4/5] Update `NEWS` --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index ca13bb5..484e716 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ NEWS robotstxt -------------------------------------------------------------------------- - `null_to_default` typo fixed +- Updates to function documentation 0.7.15 | 2024-08-24 -------------------------------------------------------------------------- From 607ea03353e130c2bad489421dc60f1b69fb6bae Mon Sep 17 00:00:00 2001 From: Jordan Bradford <36420801+jrdnbradford@users.noreply.github.com> Date: Mon, 2 Sep 2024 09:46:43 -0400 Subject: [PATCH 5/5] Bump dev version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 53fc9f7..af4fb76 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: robotstxt Date: 2024-08-25 Type: Package Title: A 'robots.txt' Parser and 'Webbot'/'Spider'/'Crawler' Permissions Checker -Version: 0.7.15 +Version: 0.7.15.9000 Authors@R: c( person( "Pedro", "Baltazar", role = c("ctb"),