From 4072badab16df3d54e5813aab17b97dcd16964c8 Mon Sep 17 00:00:00 2001
From: "Russell V. Lenth" <russell-lenth@uiowa.edu>
Date: Fri, 29 Nov 2024 12:29:50 -0600
Subject: [PATCH] Touch-ups on eff_size and add_submodels

---
 DESCRIPTION           |  4 ++--
 NEWS.md               |  7 ++++++-
 R/eff-size.R          | 19 +++++++++++++------
 R/emmGrid-methods.R   |  5 +++++
 R/factors.R           |  6 ++++--
 man/eff_size.Rd       | 11 +++++------
 man/manip-factors.Rd  |  7 +++++--
 man/update.emmGrid.Rd |  5 +++++
 8 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 1e8d066a..647cff65 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: emmeans
 Type: Package
 Title: Estimated Marginal Means, aka Least-Squares Means
-Version: 1.10.5-0900001
-Date: 2024-11-21
+Version: 1.10.5-0900003
+Date: 2024-11-29
 Authors@R: c(person("Russell V.", "Lenth", role = c("aut", "cre", "cph"), 
     email = "russell-lenth@uiowa.edu"),
     person("Balazs", "Banfai", role = "ctb"),
diff --git a/NEWS.md b/NEWS.md
index 8fdab89e..01108024 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,9 +2,14 @@
 title: "NEWS for the emmeans package"
 ---
 
-## emmeans 1.10.5.900xxx
+## emmeans 1.10.5-090xxx
   * Added new `add_submodels()` function that allows for comparison od estimates
     from different submodels (when supported)
+  * Additional notes for `eff_size()`. Also, an questionable example was deleted.
+    It is so easy to misuse this function, and I don't even buy into the idea
+    of standardized effect sizes except in the simplest of cases. So I am
+    considering deprecating `eff_size()` and letting some other package
+    be to blame for unsuitable or misleading results.
     
  
 ## emmeans 1.10.5
diff --git a/R/eff-size.R b/R/eff-size.R
index fb758e66..5e5eb830 100644
--- a/R/eff-size.R
+++ b/R/eff-size.R
@@ -1,7 +1,7 @@
 # Cohen's effect sizes
 
 
-#' Calculate effect sizes and confidence bounds thereof
+#' Calculate Cohen effect sizes and confidence bounds thereof
 #' 
 #' Standardized effect sizes are typically calculated using pairwise differences of estimates,
 #' divided by the SD of the population providing the context for those effects.
@@ -73,6 +73,7 @@
 #' the SD of the \emph{paired differences} rather than the \emph{residual} SD.
 #' You may need to enlarge \code{sigma} by a factor of \code{sqrt(2)} to obtain
 #' comparable results with other software.
+#' 
 #' @note
 #' \strong{Disclaimer:} There is substantial disagreement among practitioners on
 #' what is the appropriate \code{sigma} to use in computing effect sizes; or,
@@ -80,6 +81,9 @@
 #' situations. The user is completely responsible for specifying 
 #' appropriate parameters (or for failing to do so).
 #' 
+#' Cohen effect sizes do not even exist for generalized linear models or other
+#' models lacking an additive residual error term. 
+#' 
 #' @export
 #' @note 
 #' The examples here illustrate a sobering message that effect sizes are often not nearly as accurate as you may think.
@@ -108,12 +112,15 @@
 #'   eff_size(emmV, sigma = totSD, edf = 51)
 #' }, spaced = TRUE)
 #' 
-#' # Multivariate model for the same data:
-#'  MOats.lm <- lm(yield ~ Variety, data = MOats)
-#'  eff_size(emmeans(MOats.lm, "Variety"), 
-#'           sigma = sqrt(mean(sigma(MOats.lm)^2)),   # RMS of sigma()
-#'           edf = df.residual(MOats.lm))
+#' 
+
 eff_size = function(object, sigma, edf, method = "pairwise", ...) {
+    ### Lame attempt to warn if unsuitable...
+    # obj.sig = object@misc$sigma
+    # if(is.null(obj.sig) || is.na(obj.sig[1]))
+    #     warning("'eff_size()' results are highly suspect for many models.\n",
+    #             " Cohen effect sizes are not even defined for generalized linear models.\n",
+    #             " See documentation notes.", call. = FALSE)
     if (inherits(object, "emm_list") && ("contrasts" %in% names(object))) {
         message("Since 'object' is a list, we are using the contrasts already present.")
         object = object$contrasts
diff --git a/R/emmGrid-methods.R b/R/emmGrid-methods.R
index 492da0e4..f5212903 100644
--- a/R/emmGrid-methods.R
+++ b/R/emmGrid-methods.R
@@ -332,6 +332,11 @@ vcov.emmGrid = function(object, ..., sep = get_emm_option("sep")) {
 #' situation such as an \code{lm} model, the joint test
 #' of the modified object is in essence a type-2 test as in \code{car::Anova}.
 #' 
+#' Please note that it is possible (or even likely) that there will be disparity
+#' between the \code{grid} and \code{linfct} slots when a submodel is used. This is
+#' because \code{grid} contains the \emph{claimed} values of the predictors and
+#' \code{linfct} contains \emph{aliases} of them computed from the submodel.
+#' 
 #' For some objects such as generalized linear models, specifying \code{submodel}
 #' will typically not produce the same estimates or type-2 tests as would be
 #' obtained by actually fitting a separate model with those specifications.
diff --git a/R/factors.R b/R/factors.R
index 8b2f5739..be2d9e7a 100644
--- a/R/factors.R
+++ b/R/factors.R
@@ -319,8 +319,10 @@ add_grouping = function(object, newname, refname, newlevs, ...) {
 #' 
 #' @examples
 #' ## Using 'add_submodels' to compare adjusted and unadjusted means
-#' fibs <- add_submodels(frg, adj = ~ ., unadj = ~ machine)
-#' emmeans(fibs, consec ~ model | machine)
+#' fibint.lm <- lm(strength ~ machine * diameter, data = fiber)
+#' fibsub <- add_submodels(emmeans(fibint.lm, "machine"), 
+#'     full = ~ ., additive = ~ . - machine:diameter, unadj = ~ machine)
+#' emmeans(fibsub, pairwise ~ model | machine, adjust = "none")
 #' 
 add_submodels = function(object, ..., newname = "model") {
     all = lapply(list(...), \(s) update(object, submodel = s))
diff --git a/man/eff_size.Rd b/man/eff_size.Rd
index 6d5d9c95..a052c0f4 100644
--- a/man/eff_size.Rd
+++ b/man/eff_size.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/eff-size.R
 \name{eff_size}
 \alias{eff_size}
-\title{Calculate effect sizes and confidence bounds thereof}
+\title{Calculate Cohen effect sizes and confidence bounds thereof}
 \usage{
 eff_size(object, sigma, edf, method = "pairwise", ...)
 }
@@ -78,6 +78,9 @@ indeed, whether \emph{any} effect-size measure is appropriate for some
 situations. The user is completely responsible for specifying 
 appropriate parameters (or for failing to do so).
 
+Cohen effect sizes do not even exist for generalized linear models or other
+models lacking an additive residual error term.
+
 The examples here illustrate a sobering message that effect sizes are often not nearly as accurate as you may think.
 }
 \section{Computation}{
@@ -117,9 +120,5 @@ if (require(nlme)) withAutoprint({
   eff_size(emmV, sigma = totSD, edf = 51)
 }, spaced = TRUE)
 
-# Multivariate model for the same data:
- MOats.lm <- lm(yield ~ Variety, data = MOats)
- eff_size(emmeans(MOats.lm, "Variety"), 
-          sigma = sqrt(mean(sigma(MOats.lm)^2)),   # RMS of sigma()
-          edf = df.residual(MOats.lm))
+
 }
diff --git a/man/manip-factors.Rd b/man/manip-factors.Rd
index 63a95440..5a4dd797 100644
--- a/man/manip-factors.Rd
+++ b/man/manip-factors.Rd
@@ -181,9 +181,12 @@ emmeans(gwrg, ~ wool * tension)   # some NAs due to impossible combinations
 emmeans(gwrg, "prod")
 
 ## Using 'add_submodels' to compare adjusted and unadjusted means
-fibs <- add_submodels(frg, adj = ~ ., unadj = ~ machine)
-emmeans(fibs, consec ~ model | machine)
+fibint.lm <- lm(strength ~ machine * diameter, data = fiber)
+fibsub <- add_submodels(emmeans(fibint.lm, "machine"), 
+    full = ~ ., additive = ~ . - machine:diameter, unadj = ~ machine)
+emmeans(fibsub, pairwise ~ model | machine, adjust = "none")
 
+# Permuting factor levels...
 str(v.c.g)
 str(permute_levels(v.c.g, "cyl", c(2,3,1)))
 
diff --git a/man/update.emmGrid.Rd b/man/update.emmGrid.Rd
index 139fa9cf..58f7a7eb 100644
--- a/man/update.emmGrid.Rd
+++ b/man/update.emmGrid.Rd
@@ -200,6 +200,11 @@ and all effects not containing it are orthogonalized-out. Thus, in a purely line
 situation such as an \code{lm} model, the joint test
 of the modified object is in essence a type-2 test as in \code{car::Anova}.
 
+Please note that it is possible (or even likely) that there will be disparity
+between the \code{grid} and \code{linfct} slots when a submodel is used. This is
+because \code{grid} contains the \emph{claimed} values of the predictors and
+\code{linfct} contains \emph{aliases} of them computed from the submodel.
+
 For some objects such as generalized linear models, specifying \code{submodel}
 will typically not produce the same estimates or type-2 tests as would be
 obtained by actually fitting a separate model with those specifications.