man/ggscatterstats.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ggscatterstats.R
\name{ggscatterstats}
\alias{ggscatterstats}
\title{Scatterplot with marginal distributions and statistical results}
\usage{
ggscatterstats(
  data,
  x,
  y,
  type = "parametric",
  conf.level = 0.95,
  bf.prior = 0.707,
  bf.message = TRUE,
  tr = 0.2,
  digits = 2L,
  results.subtitle = TRUE,
  label.var = NULL,
  label.expression = NULL,
  marginal = TRUE,
  point.args = list(size = 3, alpha = 0.4, stroke = 0),
  point.width.jitter = 0,
  point.height.jitter = 0,
  point.label.args = list(size = 3, max.overlaps = 1e+06),
  smooth.line.args = list(linewidth = 1.5, color = "blue", method = "lm", formula = y ~
    x),
  xsidehistogram.args = list(fill = "#009E73", color = "black", na.rm = TRUE),
  ysidehistogram.args = list(fill = "#D55E00", color = "black", na.rm = TRUE),
  xlab = NULL,
  ylab = NULL,
  title = NULL,
  subtitle = NULL,
  caption = NULL,
  ggtheme = ggstatsplot::theme_ggstatsplot(),
  ggplot.component = NULL,
  ...
)
}
\arguments{
\item{data}{A data frame (or a tibble) from which variables specified are to
be taken. Other data types (e.g., matrix,table, array, etc.) will \strong{not}
be accepted. Additionally, grouped data frames from \code{{dplyr}} should be
ungrouped before they are entered as \code{data}.}

\item{x}{The column in \code{data} containing the explanatory variable to be
plotted on the \code{x}-axis.}

\item{y}{The column in \code{data} containing the response (outcome) variable to
be plotted on the \code{y}-axis.}

\item{type}{A character specifying the type of statistical approach:
\itemize{
\item \code{"parametric"}
\item \code{"nonparametric"}
\item \code{"robust"}
\item \code{"bayes"}
}

You can specify just the initial letter.}

\item{conf.level}{Scalar between \code{0} and \code{1} (default: \verb{95\%}
confidence/credible intervals, \code{0.95}). If \code{NULL}, no confidence intervals
will be computed.}

\item{bf.prior}{A number between \code{0.5} and \code{2} (default \code{0.707}), the prior
width to use in calculating Bayes factors and posterior estimates. In
addition to numeric arguments, several named values are also recognized:
\code{"medium"}, \code{"wide"}, and \code{"ultrawide"}, corresponding to \emph{r} scale values
of \code{1/2}, \code{sqrt(2)/2}, and \code{1}, respectively. In case of an ANOVA, this
value corresponds to scale for fixed effects.}

\item{bf.message}{Logical that decides whether to display Bayes Factor in
favor of the \emph{null} hypothesis. This argument is relevant only \strong{for
parametric test} (Default: \code{TRUE}).}

\item{tr}{Trim level for the mean when carrying out \code{robust} tests. In case
of an error, try reducing the value of \code{tr}, which is by default set to
\code{0.2}. Lowering the value might help.}

\item{digits}{Number of digits for rounding or significant figures. May also
be \code{"signif"} to return significant figures or \code{"scientific"}
to return scientific notation. Control the number of digits by adding the
value as suffix, e.g. \code{digits = "scientific4"} to have scientific
notation with 4 decimal places, or \code{digits = "signif5"} for 5
significant figures (see also \code{\link[=signif]{signif()}}).}

\item{results.subtitle}{Decides whether the results of statistical tests are
to be displayed as a subtitle (Default: \code{TRUE}). If set to \code{FALSE}, only
the plot will be returned.}

\item{label.var}{Variable to use for points labels entered as a symbol (e.g.
\code{var1}).}

\item{label.expression}{An expression evaluating to a logical vector that
determines the subset of data points to label (e.g. \code{y < 4 & z < 20}).
While using this argument with \code{\link[purrr:pmap]{purrr::pmap()}}, you will have to provide
a quoted expression  (e.g. \code{quote(y < 4 & z < 20)}).}

\item{marginal}{Decides whether marginal distributions will be plotted on
axes using \code{{ggside}} functions. The default is \code{TRUE}. The package
\code{{ggside}} must already be installed by the user.}

\item{point.args}{A list of additional aesthetic arguments to be passed to
the \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}}.}

\item{point.width.jitter, point.height.jitter}{Degree of jitter in \code{x} and \code{y}
direction, respectively. Defaults to \code{0} (0\%) of the resolution of the
data. Note that the jitter should not be specified in the \code{point.args}
because this information will be passed to two different \code{geom}s: one
displaying the \strong{points} and the other displaying the *\strong{labels} for
these points.}

\item{point.label.args}{A list of additional aesthetic arguments to be passed
to \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_label_repel()}}geom used to display the labels.}

\item{smooth.line.args}{A list of additional aesthetic arguments to be passed
to \code{geom_smooth} geom used to display the regression line.}

\item{xsidehistogram.args, ysidehistogram.args}{A list of arguments passed to
respective \code{geom_}s from the \code{{ggside}} package to change the marginal
distribution histograms plots.}

\item{xlab}{Label for \code{x} axis variable. If \code{NULL} (default),
variable name for \code{x} will be used.}

\item{ylab}{Labels for \code{y} axis variable. If \code{NULL} (default),
variable name for \code{y} will be used.}

\item{title}{The text for the plot title.}

\item{subtitle}{The text for the plot subtitle. Will work only if
\code{results.subtitle = FALSE}.}

\item{caption}{The text for the plot caption. This argument is relevant only
if \code{bf.message = FALSE}.}

\item{ggtheme}{A \code{{ggplot2}} theme. Default value is
\code{\link[=theme_ggstatsplot]{theme_ggstatsplot()}}. Any of the \code{{ggplot2}} themes (e.g.,
\code{\link[ggplot2:ggtheme]{ggplot2::theme_bw()}}), or themes from extension packages are allowed
(e.g., \code{ggthemes::theme_fivethirtyeight()}, \code{hrbrthemes::theme_ipsum_ps()},
etc.). But note that sometimes these themes will remove some of the details
that \code{{ggstatsplot}} plots typically contains. For example, if relevant,
\code{\link[=ggbetweenstats]{ggbetweenstats()}} shows details about multiple comparison test as a
label on the secondary Y-axis. Some themes (e.g.
\code{ggthemes::theme_fivethirtyeight()}) will remove the secondary Y-axis and
thus the details as well.}

\item{ggplot.component}{A \code{ggplot} component to be added to the plot prepared
by \code{{ggstatsplot}}. This argument is primarily helpful for \code{grouped_}
variants of all primary functions. Default is \code{NULL}. The argument should
be entered as a \code{{ggplot2}} function or a list of \code{{ggplot2}} functions.}

\item{...}{Currently ignored.}
}
\description{
Scatterplots from \code{{ggplot2}} combined with marginal distributions plots
with statistical details.
}
\details{
For details, see:
\url{https://indrajeetpatil.github.io/ggstatsplot/articles/web_only/ggscatterstats.html}
}
\note{
The plot uses \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_label_repel()}} to attempt to keep labels
from over-lapping to the largest degree possible. As a consequence plot
times will slow down massively (and the plot file will grow in size) if you
have a lot of labels that overlap.
}
\section{Summary of graphics}{
\tabular{lll}{
   graphical element \tab \code{geom} used \tab argument for further modification \cr
   raw data \tab \code{ggplot2::geom_point()} \tab \code{point.args} \cr
   labels for raw data \tab \code{ggrepel::geom_label_repel()} \tab \code{point.label.args} \cr
   smooth line \tab \code{ggplot2::geom_smooth()} \tab \code{smooth.line.args} \cr
   marginal histograms \tab \code{ggside::geom_xsidehistogram()}, \code{ggside::geom_ysidehistogram()} \tab \code{xsidehistogram.args}, \code{ysidehistogram.args} \cr
}
}

\section{Correlation analyses}{


The table below provides summary about:
\itemize{
\item statistical test carried out for inferential statistics
\item type of effect size estimate and a measure of uncertainty for this estimate
\item functions used internally to compute these details
}

\strong{Hypothesis testing} and \strong{Effect size estimation}\tabular{llll}{
   Type \tab Test \tab CI available? \tab Function used \cr
   Parametric \tab Pearson's correlation coefficient \tab Yes \tab \code{correlation::correlation()} \cr
   Non-parametric \tab Spearman's rank correlation coefficient \tab Yes \tab \code{correlation::correlation()} \cr
   Robust \tab Winsorized Pearson's correlation coefficient \tab Yes \tab \code{correlation::correlation()} \cr
   Bayesian \tab Bayesian Pearson's correlation coefficient \tab Yes \tab \code{correlation::correlation()} \cr
}

}

\examples{
set.seed(123)

# creating a plot
p <- ggscatterstats(
  iris,
  x = Sepal.Width,
  y = Petal.Length,
  label.var = Species,
  label.expression = Sepal.Length > 7.6
) +
  ggplot2::geom_rug(sides = "b")

# looking at the plot
p

# extracting details from statistical tests
extract_stats(p)

}
\seealso{
\code{\link{grouped_ggscatterstats}}, \code{\link{ggcorrmat}},
\code{\link{grouped_ggcorrmat}}
}