-
Notifications
You must be signed in to change notification settings - Fork 200
/
Copy pathggscatterstats.Rd
223 lines (186 loc) · 8.89 KB
/
ggscatterstats.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ggscatterstats.R
\name{ggscatterstats}
\alias{ggscatterstats}
\title{Scatterplot with marginal distributions and statistical results}
\usage{
ggscatterstats(
data,
x,
y,
type = "parametric",
conf.level = 0.95,
bf.prior = 0.707,
bf.message = TRUE,
tr = 0.2,
digits = 2L,
results.subtitle = TRUE,
label.var = NULL,
label.expression = NULL,
marginal = TRUE,
point.args = list(size = 3, alpha = 0.4, stroke = 0),
point.width.jitter = 0,
point.height.jitter = 0,
point.label.args = list(size = 3, max.overlaps = 1e+06),
smooth.line.args = list(linewidth = 1.5, color = "blue", method = "lm", formula = y ~
x),
xsidehistogram.args = list(fill = "#009E73", color = "black", na.rm = TRUE),
ysidehistogram.args = list(fill = "#D55E00", color = "black", na.rm = TRUE),
xlab = NULL,
ylab = NULL,
title = NULL,
subtitle = NULL,
caption = NULL,
ggtheme = ggstatsplot::theme_ggstatsplot(),
ggplot.component = NULL,
...
)
}
\arguments{
\item{data}{A data frame (or a tibble) from which variables specified are to
be taken. Other data types (e.g., matrix,table, array, etc.) will \strong{not}
be accepted. Additionally, grouped data frames from \code{{dplyr}} should be
ungrouped before they are entered as \code{data}.}
\item{x}{The column in \code{data} containing the explanatory variable to be
plotted on the \code{x}-axis.}
\item{y}{The column in \code{data} containing the response (outcome) variable to
be plotted on the \code{y}-axis.}
\item{type}{A character specifying the type of statistical approach:
\itemize{
\item \code{"parametric"}
\item \code{"nonparametric"}
\item \code{"robust"}
\item \code{"bayes"}
}
You can specify just the initial letter.}
\item{conf.level}{Scalar between \code{0} and \code{1} (default: \verb{95\%}
confidence/credible intervals, \code{0.95}). If \code{NULL}, no confidence intervals
will be computed.}
\item{bf.prior}{A number between \code{0.5} and \code{2} (default \code{0.707}), the prior
width to use in calculating Bayes factors and posterior estimates. In
addition to numeric arguments, several named values are also recognized:
\code{"medium"}, \code{"wide"}, and \code{"ultrawide"}, corresponding to \emph{r} scale values
of \code{1/2}, \code{sqrt(2)/2}, and \code{1}, respectively. In case of an ANOVA, this
value corresponds to scale for fixed effects.}
\item{bf.message}{Logical that decides whether to display Bayes Factor in
favor of the \emph{null} hypothesis. This argument is relevant only \strong{for
parametric test} (Default: \code{TRUE}).}
\item{tr}{Trim level for the mean when carrying out \code{robust} tests. In case
of an error, try reducing the value of \code{tr}, which is by default set to
\code{0.2}. Lowering the value might help.}
\item{digits}{Number of digits for rounding or significant figures. May also
be \code{"signif"} to return significant figures or \code{"scientific"}
to return scientific notation. Control the number of digits by adding the
value as suffix, e.g. \code{digits = "scientific4"} to have scientific
notation with 4 decimal places, or \code{digits = "signif5"} for 5
significant figures (see also \code{\link[=signif]{signif()}}).}
\item{results.subtitle}{Decides whether the results of statistical tests are
to be displayed as a subtitle (Default: \code{TRUE}). If set to \code{FALSE}, only
the plot will be returned.}
\item{label.var}{Variable to use for points labels entered as a symbol (e.g.
\code{var1}).}
\item{label.expression}{An expression evaluating to a logical vector that
determines the subset of data points to label (e.g. \code{y < 4 & z < 20}).
While using this argument with \code{\link[purrr:pmap]{purrr::pmap()}}, you will have to provide
a quoted expression (e.g. \code{quote(y < 4 & z < 20)}).}
\item{marginal}{Decides whether marginal distributions will be plotted on
axes using \code{{ggside}} functions. The default is \code{TRUE}. The package
\code{{ggside}} must already be installed by the user.}
\item{point.args}{A list of additional aesthetic arguments to be passed to
the \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}}.}
\item{point.width.jitter, point.height.jitter}{Degree of jitter in \code{x} and \code{y}
direction, respectively. Defaults to \code{0} (0\%) of the resolution of the
data. Note that the jitter should not be specified in the \code{point.args}
because this information will be passed to two different \code{geom}s: one
displaying the \strong{points} and the other displaying the *\strong{labels} for
these points.}
\item{point.label.args}{A list of additional aesthetic arguments to be passed
to \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_label_repel()}}geom used to display the labels.}
\item{smooth.line.args}{A list of additional aesthetic arguments to be passed
to \code{geom_smooth} geom used to display the regression line.}
\item{xsidehistogram.args, ysidehistogram.args}{A list of arguments passed to
respective \code{geom_}s from the \code{{ggside}} package to change the marginal
distribution histograms plots.}
\item{xlab}{Label for \code{x} axis variable. If \code{NULL} (default),
variable name for \code{x} will be used.}
\item{ylab}{Labels for \code{y} axis variable. If \code{NULL} (default),
variable name for \code{y} will be used.}
\item{title}{The text for the plot title.}
\item{subtitle}{The text for the plot subtitle. Will work only if
\code{results.subtitle = FALSE}.}
\item{caption}{The text for the plot caption. This argument is relevant only
if \code{bf.message = FALSE}.}
\item{ggtheme}{A \code{{ggplot2}} theme. Default value is
\code{\link[=theme_ggstatsplot]{theme_ggstatsplot()}}. Any of the \code{{ggplot2}} themes (e.g.,
\code{\link[ggplot2:ggtheme]{ggplot2::theme_bw()}}), or themes from extension packages are allowed
(e.g., \code{ggthemes::theme_fivethirtyeight()}, \code{hrbrthemes::theme_ipsum_ps()},
etc.). But note that sometimes these themes will remove some of the details
that \code{{ggstatsplot}} plots typically contains. For example, if relevant,
\code{\link[=ggbetweenstats]{ggbetweenstats()}} shows details about multiple comparison test as a
label on the secondary Y-axis. Some themes (e.g.
\code{ggthemes::theme_fivethirtyeight()}) will remove the secondary Y-axis and
thus the details as well.}
\item{ggplot.component}{A \code{ggplot} component to be added to the plot prepared
by \code{{ggstatsplot}}. This argument is primarily helpful for \code{grouped_}
variants of all primary functions. Default is \code{NULL}. The argument should
be entered as a \code{{ggplot2}} function or a list of \code{{ggplot2}} functions.}
\item{...}{Currently ignored.}
}
\description{
Scatterplots from \code{{ggplot2}} combined with marginal distributions plots
with statistical details.
}
\details{
For details, see:
\url{https://indrajeetpatil.github.io/ggstatsplot/articles/web_only/ggscatterstats.html}
}
\note{
The plot uses \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_label_repel()}} to attempt to keep labels
from over-lapping to the largest degree possible. As a consequence plot
times will slow down massively (and the plot file will grow in size) if you
have a lot of labels that overlap.
}
\section{Summary of graphics}{
\tabular{lll}{
graphical element \tab \code{geom} used \tab argument for further modification \cr
raw data \tab \code{ggplot2::geom_point()} \tab \code{point.args} \cr
labels for raw data \tab \code{ggrepel::geom_label_repel()} \tab \code{point.label.args} \cr
smooth line \tab \code{ggplot2::geom_smooth()} \tab \code{smooth.line.args} \cr
marginal histograms \tab \code{ggside::geom_xsidehistogram()}, \code{ggside::geom_ysidehistogram()} \tab \code{xsidehistogram.args}, \code{ysidehistogram.args} \cr
}
}
\section{Correlation analyses}{
The table below provides summary about:
\itemize{
\item statistical test carried out for inferential statistics
\item type of effect size estimate and a measure of uncertainty for this estimate
\item functions used internally to compute these details
}
\strong{Hypothesis testing} and \strong{Effect size estimation}\tabular{llll}{
Type \tab Test \tab CI available? \tab Function used \cr
Parametric \tab Pearson's correlation coefficient \tab Yes \tab \code{correlation::correlation()} \cr
Non-parametric \tab Spearman's rank correlation coefficient \tab Yes \tab \code{correlation::correlation()} \cr
Robust \tab Winsorized Pearson's correlation coefficient \tab Yes \tab \code{correlation::correlation()} \cr
Bayesian \tab Bayesian Pearson's correlation coefficient \tab Yes \tab \code{correlation::correlation()} \cr
}
}
\examples{
set.seed(123)
# creating a plot
p <- ggscatterstats(
iris,
x = Sepal.Width,
y = Petal.Length,
label.var = Species,
label.expression = Sepal.Length > 7.6
) +
ggplot2::geom_rug(sides = "b")
# looking at the plot
p
# extracting details from statistical tests
extract_stats(p)
}
\seealso{
\code{\link{grouped_ggscatterstats}}, \code{\link{ggcorrmat}},
\code{\link{grouped_ggcorrmat}}
}