Skip to content

Commit 93f2c01

Browse files
committed
Add Yule (1944), linting
1 parent 75d279d commit 93f2c01

File tree

3 files changed

+89
-14
lines changed

3 files changed

+89
-14
lines changed

R/textstat_lexdiv.R

+9-5
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
#' \left[ -\frac{1}{N} + \sum_{i=1}^{V} f_v(i, N) \left( \frac{i}{N} \right)^2 \right] }}
3737
#'
3838
#' \item{\code{"I"}:}{Yule's \emph{I} (Yule, 1944) is calculated by: \deqn{I = \frac{V^2}{M_2 - V}}
39-
#' \deqn{M_2 = \sum_{i=1}^{V} i^2 * f_v(i, N)}
39+
#' \deqn{M_2 = \sum_{i=1}^{V} i^2 * f_v(i, N)}}
4040
#'
4141
#' \item{\code{"D"}:}{Simpson's \emph{D} (Simpson 1949, as presented in
4242
#' Tweedie & Baayen, 1998, Eq. 17) is calculated by:
@@ -61,8 +61,9 @@
6161
#' halves of the text.}
6262
#'
6363
#' \item{\code{"MATTR"}:}{The Moving-Average Type-Token Ratio (Covington &
64-
#' McFall, 2010) calculates TTRs for a moving window of tokens from the first to the last token, computing a TTR for each window.
65-
#' The MATTR is the mean of the TTRs of each window.}
64+
#' McFall, 2010) calculates TTRs for a moving window of tokens from the first
65+
#' to the last token, computing a TTR for each window. The MATTR is the mean
66+
#' of the TTRs of each window.}
6667
#'
6768
#' \item{\code{"MSTTR"}:}{Mean Segmental Type-Token Ratio (sometimes referred
6869
#' to as \emph{Split TTR}) splits the tokens into segments of the given size,
@@ -127,6 +128,9 @@
127128
#' Variable May a Constant Be? Measures of Lexical Richness in Perspective}. \emph{Computers and the
128129
#' Humanities}, 32(5), 323--352.
129130
#'
131+
#' Yule, G. U. (1944) \emph{The Statistical Study of Literary Vocabulary.}
132+
#' Cambridge: Cambridge University Press.
133+
#'
130134
#' @return A data.frame of documents and their lexical diversity scores.
131135
#' @export
132136
#' @examples
@@ -145,7 +149,7 @@
145149
#' toks <- tokens(corpus_subset(data_corpus_inaugural, Year > 2000))
146150
#' textstat_lexdiv(toks, c("CTTR", "TTR", "MATTR"), MATTR_window = 100)
147151
textstat_lexdiv <- function(x,
148-
measure = c("TTR", "C", "R", "CTTR", "U", "S", "K", "I","D",
152+
measure = c("TTR", "C", "R", "CTTR", "U", "S", "K", "I", "D",
149153
"Vm", "Maas", "MATTR", "MSTTR", "all"),
150154
remove_numbers = TRUE, remove_punct = TRUE,
151155
remove_symbols = TRUE, remove_hyphens = FALSE,
@@ -324,7 +328,7 @@ compute_lexdiv_dfm_stats <- function(x, measure = NULL, log.base = 10) {
324328
M_2 <- vapply(ViN, function(y) sum(y$ViN * y$i^2), numeric(1))
325329
M_1 <- temp$n_types
326330
yule_i <- (M_1 ^ 2) / (M_2 - M_1)
327-
yule_i[yule_i== Inf] <- 0
331+
yule_i[is.infinite(yule_i)] <- 0
328332
temp[, I := yule_i]
329333
}
330334

man/textstat_lexdiv.Rd

+71
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-textstat_lexdiv.R

+9-9
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ test_that("textstat_lexdiv computation is correct", {
1010
)
1111
})
1212

13-
test_that("textstat_lexdiv CTTR works correct", {
13+
test_that("textstat_lexdiv CTTR works correctly", {
1414
mydfm <- dfm(c(d1 = "b a b a b a b a",
1515
d2 = "a a b b"))
1616

@@ -21,7 +21,7 @@ test_that("textstat_lexdiv CTTR works correct", {
2121
)
2222
})
2323

24-
test_that("textstat_lexdiv R works correct", {
24+
test_that("textstat_lexdiv R works correctly", {
2525
mydfm <- dfm(c(d1 = "b a b a b a b a",
2626
d2 = "a a b b"))
2727

@@ -32,7 +32,7 @@ test_that("textstat_lexdiv R works correct", {
3232
)
3333
})
3434

35-
test_that("textstat_lexdiv C works correct", {
35+
test_that("textstat_lexdiv C works correctly", {
3636
mydfm <- dfm(c(d1 = "b a b a b a b a",
3737
d2 = "a a b b"))
3838

@@ -43,7 +43,7 @@ test_that("textstat_lexdiv C works correct", {
4343
)
4444
})
4545

46-
test_that("textstat_lexdiv Maas works correct", {
46+
test_that("textstat_lexdiv Maas works correctly", {
4747
mydfm <- dfm(c(d1 = "b a b a b a b a",
4848
d2 = "a a b b"))
4949

@@ -54,7 +54,7 @@ test_that("textstat_lexdiv Maas works correct", {
5454
)
5555
})
5656

57-
test_that("textstat_lexdiv I works correct", {
57+
test_that("textstat_lexdiv Yule's I works correctly", {
5858
mydfm <- dfm(c(d1 = "a b c",
5959
d2 = "a a b b c"))
6060
expect_equivalent(
@@ -269,7 +269,7 @@ test_that("textstat_lexdiv.tokens raises errors if parameters for moving measure
269269
# )
270270
})
271271

272-
test_that("textstat_lexdiv.tokens MATTR works correct on its own", {
272+
test_that("textstat_lexdiv.tokens MATTR works correctly on its own", {
273273
mytxt <- "one one two one one two one"
274274
mytoken <- tokens(mytxt)
275275
wsize2_MATTR <- (1/2 + 1 + 1 + 1/2 + 1 + 1) / 6
@@ -290,7 +290,7 @@ test_that("textstat_lexdiv.tokens MATTR works correct on its own", {
290290
)
291291
})
292292

293-
test_that("textstat_lexdiv.tokens MATTR works correct in conjunction with static measures", {
293+
test_that("textstat_lexdiv.tokens MATTR works correctly in conjunction with static measures", {
294294
mytxt <- "one one two one one two one"
295295
mytoken <- tokens(mytxt)
296296
wsize2_MATTR <- (1/2 + 1 + 1 + 1/2 + 1 + 1) / 6
@@ -301,7 +301,7 @@ test_that("textstat_lexdiv.tokens MATTR works correct in conjunction with static
301301
)
302302
})
303303

304-
test_that("textstat_lexdiv.tokens MSTTR works correct on its own", {
304+
test_that("textstat_lexdiv.tokens MSTTR works correctly on its own", {
305305
mytxt <- "apple orange apple orange pear pear apple orange"
306306
mytoken <- tokens(mytxt)
307307
wsize2_MSTTR <- (2/2 + 2/2 + 1/2 + 2/2) / 4
@@ -329,7 +329,7 @@ test_that("textstat_lexdiv.tokens MSTTR works correct on its own", {
329329
textstat_lexdiv(mytoken, measure = "TTR")[[2]])
330330
})
331331

332-
test_that("textstat_lexdiv.tokens MSTTR works correct in conjunction with static measures", {
332+
test_that("textstat_lexdiv.tokens MSTTR works correctly in conjunction with static measures", {
333333
mytxt <- "apple orange apple orange pear pear apple orange"
334334
mytoken <- tokens(mytxt)
335335
wsize2_MSTTR <- (2/2 + 2/2 + 1/2 + 2/2) / 4

0 commit comments

Comments
 (0)