-
Notifications
You must be signed in to change notification settings - Fork 1
/
12강_실습코드_youtube.R
57 lines (46 loc) · 1.69 KB
/
12강_실습코드_youtube.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#0. 패키지 불러오기####
library(httr)
library(rvest)
library(RSelenium)
library(rJava)
require(binman)
require(wdman)
#1. Selenium 실행- chrome 연결####
port <- 4445L #포트 설정
list_versions(appname ='chromedriver')
driver <- chrome(port = port, version = "85.0.4183.87")
driver <- remoteDriver(remoteServerAddr = 'localhost',
port = port, # 포트번호 입력
browserName = "chrome",
version = '85.0.4183.87')
driver$open() #서버에 연결
driver$navigate("https://www.youtube.com/watch?v=H8YW1tlsmE8") #이 홈페이지로 이동
#2.element 추출####
element <- driver$findElement("css", "body")
#2.1 Scroll down n- times
for(i in 1:2){
element$sendKeysToElement(list("key"="page_down"))
if(exists("pagesource")){
if(pagesource == driver$getPageSource()[[1]]){
#flag <- FALSE
writeLines(paste0("Scrolled down ",n*counter," times.\n"))
} else {
pagesource <- driver$getPageSource()[[1]]
}
} else {
pagesource <- driver$getPageSource()[[1]]
}
Sys.sleep(2)
}
#3. scraping:ID와 Comment 추출
html <- read_html(pagesource) #html 파일 가져오기
youtube_user_IDs <- html %>% html_nodes('div#header-author > a > span')
youtube_user_IDs <- youtube_user_IDs %>% html_text(trim = T)
youtube_user_comments <- html %>% html_nodes('yt-formatted-string#content-text')
youtube_user_comments <- youtube_user_comments %>% html_text(trim = T)
#4.데이터 프레임 생성####
df <-data.frame(youtube_user_IDs, youtube_user_comments)
head(df,2)
#5. csv파일 변환 ####
write.csv(df, 'comment.csv')
#https://awesomeopensource.com/project/yusuzech/r-web-scraping-cheat-sheet