forked from quanteda/quanteda
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtokens_wordstem.Rd
64 lines (55 loc) · 1.96 KB
/
tokens_wordstem.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/wordstem.R
\name{tokens_wordstem}
\alias{tokens_wordstem}
\alias{char_wordstem}
\alias{dfm_wordstem}
\title{Stem the terms in an object}
\usage{
tokens_wordstem(x, language = quanteda_options("language_stemmer"))
char_wordstem(x, language = quanteda_options("language_stemmer"))
dfm_wordstem(x, language = quanteda_options("language_stemmer"))
}
\arguments{
\item{x}{a character, tokens, or dfm object whose word stems are to be
removed. If tokenized texts, the tokenization must be word-based.}
\item{language}{the name of a recognized language, as returned by
\link[SnowballC]{getStemLanguages}, or a two- or three-letter ISO-639 code
corresponding to one of these languages (see references for the list of
codes)}
}
\value{
\code{tokens_wordstem} returns a \link{tokens} object whose word
types have been stemmed.
\code{char_wordstem} returns a \link{character} object whose word
types have been stemmed.
\code{dfm_wordstem} returns a \link{dfm} object whose word
types (features) have been stemmed, and recombined to consolidate features made
equivalent because of stemming.
}
\description{
Apply a stemmer to words. This is a wrapper to \link[SnowballC]{wordStem}
designed to allow this function to be called without loading the entire
\pkg{SnowballC} package. \link[SnowballC]{wordStem} uses Martin Porter's
stemming algorithm and the C libstemmer library generated by Snowball.
}
\examples{
# example applied to tokens
txt <- c(one = "eating eater eaters eats ate",
two = "taxing taxes taxed my tax return")
th <- tokens(txt)
tokens_wordstem(th)
# simple example
char_wordstem(c("win", "winning", "wins", "won", "winner"))
# example applied to a dfm
(origdfm <- dfm(txt))
dfm_wordstem(origdfm)
}
\references{
\url{http://snowball.tartarus.org/}
\url{http://www.iso.org/iso/home/standards/language_codes.htm} for the
ISO-639 language codes
}
\seealso{
\link[SnowballC]{wordStem}
}