-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjob_scrape_tool.r
72 lines (61 loc) · 1.61 KB
/
job_scrape_tool.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#' Function searches a specific airline by IATA code
#'
#' @param
#' @return csv and data.frame() with the jobs
#'
#' @author Phil Steinke, \email{[email protected]}
#'
#' @examples
#' jobScrape("job_title", "b.jobtitle font", "indeed")
#'
#' @import RCurl
#' @import jsonlite
#' @export
#'
jobScrapeTool <-
function(value) {
dat <- list()
setwd("./..")
urls <- list("Lead_Senior_fed.htm", "Javascript_dev_Traffio.html") # local test URL's
parseJob <- function(header, html_class, i) {
tryCatch(
page %>%
html_node(html_class) %>%
html_text() %>%
str_trim() %>%
unlist(),
error = function(e){NA}
)
}
for (i in 1:length(urls)) {
# page <- sprintf(urls, i) %>%
page <-
urls[i] %>%
as.character() %>%
read_html()
job_site <- "Indeed"
job_title <- parseJob("job_title", "b.jobtitle font", i)
job_location <- parseJob("job_location", "span.location", i)
job_summary <- parseJob("job_summary", "span.summary", i)
job_date <- parseJob("job_date", "date", i) # days ago on Indeed
job_salary <- parseJob("job_salary", "span.no-wrap", i) # days ago on Indeed
dat[[i]] <-
data.frame(job_title, job_location, job_date, job_salary, job_summary, job_site)
}
job_search_data <- do.call(rbind, dat)
write.table(
job_search_data,
"job_search_data.csv",
sep = ",",
col.names = T,
append = T)
}
setwd("~/code/scrapy")
package.skeleton(name = "Scrapy", list = c("jobScrapeTool"))
library(devtools)
# to automatically generate the documentation:
document()
# to build the package
build()
# to install the package
install()