Skip to content

Commit ecd6520

Browse files
committed
update renv
need to update for cran test fail.
1 parent a14b8f9 commit ecd6520

File tree

7 files changed

+814
-464
lines changed

7 files changed

+814
-464
lines changed

.Rprofile

-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1 @@
11
source("renv/activate.R")
2-
if (interactive()) {
3-
suppressMessages(require(usethis))
4-
}

R/getContent.R

+19-38
Original file line numberDiff line numberDiff line change
@@ -15,55 +15,38 @@
1515

1616
getContent <-
1717
function(turl,
18-
col = c("url",
19-
"original_url",
20-
"section",
21-
"datetime",
22-
"edittime",
23-
"press",
24-
"title",
25-
"body",
26-
"value")) {
27-
18+
col = c(
19+
"url",
20+
"original_url",
21+
"section",
22+
"datetime",
23+
"edittime",
24+
"press",
25+
"title",
26+
"body"
27+
)) {
2828
httr2::request(turl) %>%
2929
httr2::req_user_agent("N2H4 by chanyub.park <[email protected]>") %>%
3030
httr2::req_method("GET") %>%
3131
httr2::req_perform() -> root
3232

3333
html_obj <- httr2::resp_body_html(root)
3434
urlcheck <- root$url
35-
value <- T
36-
if (identical(grep("^https?://n.news.naver.com",
37-
urlcheck),
38-
integer(0))) {
35+
36+
if (
37+
identical(
38+
grep("^https?://n.news.naver.com", urlcheck), integer(0)
39+
)
40+
) {
3941
original_url <- "page is not news section."
4042
title <- "page is not news section."
4143
datetime <- "page is not news section."
4244
edittime <- "page is not news section."
4345
press <- "page is not news section."
4446
body <- "page is not news section."
4547
section <- "page is not news section."
46-
value <- F
48+
4749
} else {
48-
# TODO: 이거 동작하는지 확인해야 함.
49-
chk <- rvest::html_nodes(html_obj, "div#main_content div div")
50-
chk <- rvest::html_attr(chk, "class")
51-
chk <- chk[1]
52-
if (is.na(chk)) {
53-
chk <- "not error"
54-
}
55-
if ("error_msg 404" == chk & value) {
56-
original_url <- "page is moved."
57-
title <- "page is moved."
58-
datetime <- "page is moved."
59-
edittime <- "page is moved."
60-
press <- "page is moved."
61-
body <- "page is moved."
62-
section <- "page is moved."
63-
value <- F
64-
}
65-
}
66-
if (value) {
6750
original_url <- getOriginalUrl(html_obj)
6851
title <- getContentTitle(html_obj)
6952
datetime <- getContentDatetime(html_obj)
@@ -84,8 +67,7 @@ getContent <-
8467
press = press,
8568
title = title,
8669
body = body,
87-
section = section,
88-
value = value
70+
section = section
8971
)
9072
return(newsInfo[, col])
9173
}
@@ -141,7 +123,7 @@ getContentPress <-
141123

142124
getContentBody <-
143125
function(html_obj,
144-
body_node_info = "div#dic_area",
126+
body_node_info = "article#dic_area",
145127
body_attr = "") {
146128
node <- rvest::html_nodes(html_obj, body_node_info)
147129
body <- rvest::html_text(node)
@@ -171,4 +153,3 @@ getSection <- function(turl) {
171153
}
172154
return(httr2::url_parse(turl)$query$sid)
173155
}
174-

0 commit comments

Comments
 (0)