From 254488ef021ccb9cb5ce655868573ed6f758fa12 Mon Sep 17 00:00:00 2001
From: "Russell V. Lenth" <russell-lenth@uiowa.edu>
Date: Sat, 28 Dec 2024 18:48:14 -0600
Subject: [PATCH] Overhauled counterfactuals design and code

---
 DESCRIPTION         |   4 +-
 NEWS.md             |  10 +++
 R/emmGrid-methods.R |   5 ++
 R/emmeans.R         |  46 +++++++++-
 R/ref-grid.R        | 215 +++++++++++++++++++++++++++++++++-----------
 man/emmeans.Rd      |  16 ++++
 man/ref_grid.Rd     |  48 +++++-----
 vignettes/FAQs.Rmd  |  14 +++
 8 files changed, 279 insertions(+), 79 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 6f8db22d..1b5504e4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: emmeans
 Type: Package
 Title: Estimated Marginal Means, aka Least-Squares Means
-Version: 1.10.6-090001
-Date: 2024-12-18
+Version: 1.10.6-090002
+Date: 2024-12-28
 Authors@R: c(person("Russell V.", "Lenth", role = c("aut", "cre", "cph"), 
     email = "russell-lenth@uiowa.edu"),
     person("Balazs", "Banfai", role = "ctb"),
diff --git a/NEWS.md b/NEWS.md
index feb1c8b4..fb6debbe 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,16 @@ title: "NEWS for the emmeans package"
 
 ## emmeans 1.10-6-090xxx
   * Spelling changes in several vignettes
+  * We have completely revamped the design of reference grids involving
+    counterfactuals. Now, if we specify counterfactuals `A` and `B`, the
+    reference grid comprises combinations of `A`, `B`, `actual_A`, and `actual_B`
+    the latter two used to track the original settings of `A` and `B` in the dataset.
+    We always average over combinations of these factors. The previous code was
+    a memory hog, and we have made it much more efficient for large datasets.
+  * `emmeans()` has also been revised to do special handling of counterfactual
+    reference grids. Whenever we average over a counterfactual `B`, we only
+    use the cases where `B == actual_B`, thus obtaining the same results as 
+    would be obtained when `B` is not regarded as a counterfactual.
   
 
 ## emmeans 1.10.6
diff --git a/R/emmGrid-methods.R b/R/emmGrid-methods.R
index f5212903..80d2bf86 100644
--- a/R/emmGrid-methods.R
+++ b/R/emmGrid-methods.R
@@ -85,6 +85,11 @@ str.emmGrid <- function(object, ...) {
         cat(paste(tmp, collapse = ", "))
         cat("\n")
     }
+    if (length(cf <- object@roles$counterfactuals) > 0) {
+        cat("Counterfactuals\n    ")
+        cat(paste(cf, collapse = ", "))
+        cat("\n")
+    }
     if(!is.null(object@model.info$nesting)) {
         cat("Nesting structure:  ")
         cat(.fmt.nest(object@model.info$nesting))
diff --git a/R/emmeans.R b/R/emmeans.R
index c4c9b230..afaa2e78 100644
--- a/R/emmeans.R
+++ b/R/emmeans.R
@@ -211,6 +211,20 @@ emmeans.list = function(object, specs, ...) {
 #' \code{"cells"}, except nonempty cells are weighted equally and empty cells
 #' are ignored.
 #' 
+#' @section Counterfactuals:
+#' Counterfactual reference grids (see the documentation for \code{\link{ref_grid}})
+#' contain pairs of imputed and actual factor levels, and are handled in a special way.
+#' For starters, the \code{weights} argument is ignored and we always use 
+#' \code{"cells"} weights.
+#' Our understanding is that if factors \code{A, B} are specified as counterfactuals, 
+#' the marginal means for \code{A} should still be the same as if \code{A} were the only 
+#' counterfactual. Accordingly, in computing these marginal means, we
+#' exclude all cases where \code{B != actual_B}, because if \code{A} were the only
+#' counterfactual, \code{B} will stay at its actual level.
+#' We also take special pains to "remember" information about actual and 
+#' imputed levels of counterfactuals so that appropriate results are obtained when
+#' \code{emmeans} is applied to a previous \code{emmeans} result.
+#' 
 #' 
 #' @section Offsets:
 #' Unlike in \code{ref_grid}, an offset need not be scalar. If not enough values
@@ -315,6 +329,32 @@ emmeans = function(object, specs, by = NULL,
             warning("emmeans() results may be corrupted by removal of a nesting structure")
         }
         
+        ## Handle counterfactuals...
+        # cf.grid is either NULL, logical, or a "parent" grid that should replace RG
+        if(!is.null(cf.grid <- RG@misc$cf.grid)) {
+            weights = "cells"  # always use cells weights with counterfactuals
+            if (inherits(cf.grid, "emmGrid"))
+                RG = cf.grid
+            # which counterfactuals do we average over?
+            cf.ao = intersect(setdiff(names(RG@levels), facs), RG@roles$counterfactuals)
+            # zero-out weights for any cases where cf.ao levels differ from actual levels
+            for(f in cf.ao) {
+                fc = paste0("actual_", f)
+                excl = (RG@grid[ , f] != RG@grid[, fc])
+                RG@grid[excl, ".wgt."] = 0
+            }
+            # Fix up the returned grid to play along
+            cf.surv = intersect(facs, RG@roles$counterfactuals)
+            if(length(cf.surv) == 0) # no cfs remain
+                RG@misc$cf.grid = RG@roles$counterfactuals = NULL
+            else {
+                if(length(intersect(facs, paste0("actual_", cf.surv))) == length(cf.surv))
+                    RG@misc$cf.grid = TRUE
+                else
+                    RG@misc$cf.grid = RG   # save this as "parent" grid
+            }
+        }
+        
         # Ensure object is in standard order
         ord = .std.order(RG@grid, RG@levels) ###do.call(order, unname(RG@grid[rev(names(RG@levels))]))
         if(any(ord != seq_along(ord)))
@@ -445,8 +485,10 @@ emmeans = function(object, specs, by = NULL,
         RG@misc$by.vars = by
         RG@misc$avgd.over = union(RG@misc$avgd.over, avgd.over)
         RG@misc$methDesc = "emmeans"
-        RG@roles$predictors = names(levs)
-### Pass up 'new' as we're not changing its class  result = new("emmGrid", RG, linfct = linfct, levels = levs, grid = combs)
+        RG@roles$predictors = setdiff(names(levs), RG@roles$multresp)
+        if ((length(RG@roles$multresp) > 0) && !(RG@roles$multresp %in% names(levs))) 
+            RG@roles$multresp = character(0)
+
         result = as.emmGrid(RG)
         result@linfct = linfct
         result@levels = levs
diff --git a/R/ref-grid.R b/R/ref-grid.R
index 808057bb..b0a44f27 100644
--- a/R/ref-grid.R
+++ b/R/ref-grid.R
@@ -107,16 +107,13 @@
 #'   \code{sigma(object)}, if available, and \code{NULL} otherwise.
 #'   Note: This applies only when the family is \code{"gaussian"}; for other families,
 #'   \code{sigma} is set to \code{NA} and cannot be overridden.
-#' @param counterfactuals,wt.counter,avg.counter \code{counterfactuals} specifies character
+#' @param counterfactuals \code{counterfactuals} specifies character
 #'   names of counterfactual factors. If this is non-missing, a reference grid
-#'   is created consisting of combinations of counterfactual levels and a constructed
-#'   factor \code{.obs.no.} having a level for each observation in the dataset.
-#'   By default, this grid is re-gridded with the response transformation
-#'   and averaged over \code{.obs.no.} (by default, with equal weights, but
-#'   a vector of weights may be specified in \code{wt.counter}; it must be
-#'   of length equal to the number of observations in the dataset).
-#'   If \code{avg.counter} is set to \code{FALSE}, this averaging is disabled.
-#'   See the section below on counterfactuals.
+#'   is created consisting of combinations of counterfactual levels 
+#'   and the actual levels of those same factors.
+#'   This grid is always converted to the response transformation scale
+#'   and averaged over the actual factor levels. See the section below 
+#'   on counterfactuals. 
 #' @param nuisance,non.nuisance,wt.nuis If \code{nuisance} is a vector of predictor names,
 #'   those predictors are omitted from the reference grid. Instead, the result 
 #'   will be as if we had averaged over the levels of those factors, with either 
@@ -318,23 +315,29 @@
 #'
 #' @section Counterfactuals:
 #'   If \code{counterfactuals} is specified, the rows of the entire dataset
-#'   become a factor in the reference grid, and the other reference levels are
+#'   become part of the reference grid, and the other reference levels are
 #'   confined to those named in \code{counterfactuals}. In this type of analysis
-#'   (called G-computation), we substitute each combination of counterfactual
+#'   (called G-computation), we substitute (or impute) each combination of counterfactual
 #'   levels into the entire dataset. Thus, predictions from this grid are those
 #'   of each observation under each of the counterfactual levels. For this to
 #'   make sense, we require an assumption of exchangeability of these levels.
 #'   
-#'   By default, this grid is converted to the response scale (unless otherwise
-#'   specified in \code{regrid}) and averaged over the observations in the dataset.
-#'   Averaging can be disabled by setting \code{avg.counter = FALSE}, but
-#'   be warned that the resulting reference grid is potentially huge -- the
-#'   number of observations in the dataset times the number of counterfactual 
-#'   combinations, times the number of multivariate levels.
+#'   This grid is always converted to the response scale, as G-computation on
+#'   the linear-predictor scale produces the same results as ordinary weighted EMMs.
+#'   If we have counterfactual factors \code{A, B}, the reference grid also includes
+#'   factors \code{actual_A, actual_B} which are used to track which observations
+#'   originally had the \code{A, B} levels before they were changed by the 
+#'   counterfactuals code. We average the response-scale predictions for each
+#'   combination of actual levels and imputed levels (and multivariate levels, 
+#'   if any). See additional discussion of how \code{\link{emmeans}} handles
+#'   counterfactuals under that documentation.
 #'   
-#'   The counterfactuals code is still fairly rudimentary and we can't guarantee
-#'   it will always work, such as in cases of nested models. Sometimes, an error
-#'   can be averted by specifying \code{avg.counter = FALSE}.
+#'   Currently, counterfactuals are not supported when the reference grid
+#'   requires post-processing (e.g., ordinal models with \code{mode = "prob"}).
+#'   Cases where we have nested factor levels can be complicated if mixed-in with counterfactuals, 
+#'   and we make no guarantees. 
+#'   Note that past implementations included arguments \code{wt.counter} and \code{avg.counter},
+#'   which are now deprecated and are just ignored if specified.
 #' 
 #' @section Optional side effect: If the \code{save.ref_grid} option is set to
 #'   \code{TRUE} (see \code{\link{emm_options}}),
@@ -440,10 +443,16 @@ ref_grid <- function(object, at, cov.reduce = mean, cov.keep = get_emm_option("c
                      mult.names, mult.levs, 
                      options = get_emm_option("ref_grid"), data, df, type, 
                      regrid, nesting, offset, sigma, 
-                     counterfactuals, wt.counter, avg.counter = TRUE,
+                     counterfactuals, ## wt.counter, avg.counter = TRUE,
                      nuisance = character(0), non.nuisance, wt.nuis = "equal",
                       rg.limit = get_emm_option("rg.limit"), ...) 
 {
+    if(!missing(counterfactuals)) { # route this to a different routine
+        cl = match.call()
+        cl[[1]] = as.name(".cf.refgrid") # internal function for counterfactuals
+        return(eval(cl))
+    }
+    
     # hack to ignore 'tran' in dots arguments and interpret 'transform' as `regrid` :
     .foo = function(t,tr,tra,tran, transform = NULL, ...) transform
     .bar = .foo(...)
@@ -598,17 +607,17 @@ ref_grid <- function(object, at, cov.reduce = mean, cov.keep = get_emm_option("c
 
     # Now create the reference grid
     if(no.nuis <- (length(nuisance) == 0)) {
-        if (!missing(counterfactuals)) {
-            cfac = intersect(counterfactuals, names(ref.levels))
-            ref.levels = ref.levels[cfac]
-            ref.levels$.obs.no. = seq_len(nrow(data))
-            .check.grid(ref.levels, rg.limit)
-            grid = .setup.cf(ref.levels, data)
-        }
-        else {
-            .check.grid(ref.levels, rg.limit)
-            grid = do.call(expand.grid, ref.levels)
-        }
+        # if (!missing(counterfactuals)) {
+        #     cfac = intersect(counterfactuals, names(ref.levels))
+        #     ref.levels = ref.levels[cfac]
+        #     ref.levels$.obs.no. = seq_len(nrow(data))
+        #     .check.grid(ref.levels, rg.limit)
+        #     grid = .setup.cf(ref.levels, data)
+        # }
+        # ## else {
+        .check.grid(ref.levels, rg.limit)
+        grid = do.call(expand.grid, ref.levels)
+        ##}
     }
     else {
         nuis.info = .setup.nuis(nuisance, ref.levels, trms, rg.limit)
@@ -671,6 +680,7 @@ ref_grid <- function(object, at, cov.reduce = mean, cov.keep = get_emm_option("c
     
     # we've added args `misc` and `options` so emm_basis methods can access and use these if they want
     basis = emm_basis(object, trms, xl, grid, misc = attr(data, "misc"), options = options, ...)
+
     environment(basis$dffun) = baseenv()   # releases unnecessary storage
     if(length(basis$bhat) != ncol(basis$X))
         stop("Something went wrong:\n",
@@ -678,12 +688,12 @@ ref_grid <- function(object, at, cov.reduce = mean, cov.keep = get_emm_option("c
              call. = TRUE)
     
     collapse = NULL
-    if (!missing(counterfactuals)) {
-        grid = do.call(expand.grid, ref.levels)
-        if (missing(regrid)) 
-            regrid = "response"
-        if (avg.counter) collapse = ".obs.no."
-    }
+    # if (!missing(counterfactuals)) {
+    #     grid = do.call(expand.grid, ref.levels)
+    #     if (missing(regrid)) 
+    #         regrid = "response"
+    #     if (avg.counter) collapse = ".obs.no."
+    # }
     
     if(!no.nuis) {
         basis = .basis.nuis(basis, nuis.info, wt.nuis, ref.levels, data, grid, ref.levels)
@@ -904,7 +914,7 @@ ref_grid <- function(object, at, cov.reduce = mean, cov.keep = get_emm_option("c
         post.beta = matrix(NA)
     
     predictors = intersect(attr(data, "predictors"), names(grid))
-    if(!missing(counterfactuals)) predictors = c(predictors, ".obs.no.")
+    # if(!missing(counterfactuals)) predictors = c(predictors, ".obs.no.")
     
     simp.tbl = environment(trms)$.simplify.names.
     if (! is.null(simp.tbl)) {
@@ -962,7 +972,7 @@ ref_grid <- function(object, at, cov.reduce = mean, cov.keep = get_emm_option("c
         result = hook(result, ...)
     }
     if(!missing(regrid)) {
-        if(missing(wt.counter)) wt.counter = 1
+        # if(missing(wt.counter)) wt.counter = 1
         result = regrid(result, transform = regrid, sigma = sigma, 
                         .collapse = collapse, wt.counter = wt.counter, ...)
         if(!is.null(collapse))
@@ -1145,18 +1155,119 @@ ref_grid <- function(object, at, cov.reduce = mean, cov.keep = get_emm_option("c
     basis
 }
 
+# Internal function to do reference grid for counterfactuals
+.cf.refgrid = function(object, counterfactuals, data, options = list(), ...) {
+    if(missing(data))
+        data = recover_data(object, ...)
+    # Start with just the ordinary reference grid
+    rg = ref_grid(object, data = data, ...)
+    cfac = intersect(counterfactuals, names(rg@levels))
+    clevs = rg@levels[cfac]
+    cgrid = do.call(expand.grid, clevs)
+    
+    # Get the stuff we need for each main dataset step
+    link = .get.link(rg@misc)
+    if(is.null(link))
+        link = make.link("identity")
+    trms = rg@model.info$terms
+    xlev = rg@model.info$xlev
+    misc = list()
+    offset = .get.offset(object, grid = data)
+    k = ifelse(length(mr <- rg@roles$multresp) == 0, 1, length(rg@levels[[mr]])) # grid expansion factor
+    
+    # Index sets for combinations of factors
+    cidx = apply(cgrid, 1, function(x) {
+        flag = data[[cfac[1]]] == x[1]
+        if(length(x) > 1)
+            for (col in 2:length(x))
+                flag = flag & data[[cfac[col]]] == x[col]
+        which(flag)
+    }, simplify = FALSE)
+    # account for any NAs in bhat
+    nonNA = !is.na(rg@bhat)
+    # ensure we include all levels of cfacs with data
+    all.active = sort(unlist(cidx))
+    deadrows = sapply(cidx, function(x) x[1])
+    offset = c(offset, rep(mean(offset), length(deadrows)))
+    data = rbind(data, data[deadrows, ])
+    n = nrow(data)
+    mymean = function(x) ifelse(is.null(x), NA, mean(x))
+    
+    ## Compile the averaged results for delta method
+    DL = matrix(nrow = 0, ncol = sum(nonNA))
+    bh = numeric(0)
+    for (i in seq_len(nrow(cgrid))) {
+        g = data
+        for(j in cfac)
+            g[all.active, j] = cgrid[i, j]
+        bas = emm_basis(object, trms, xlev, g, ...)
+        if(!is.null(bas$misc$postGridHook))
+            stop("Sorry, we do not support counterfactuals for this situation.")
+        X = bas$X[, nonNA, drop = FALSE]
+        eta = X %*% bas$bhat[nonNA]
+        yhat = link$linkinv(eta + offset)
+        d = link$mu.eta(eta)
+        X = sweep(X, 1, d, "*")
+        
+        pos = 0
+        XX = matrix(nrow = 0, ncol = ncol(X))
+        for(I in 1:k) {
+            XX = sapply(cidx, \(i) apply(X[pos + i, , drop = FALSE], 2, mymean))
+            DL = rbind(DL, t(XX))
+            yy = sapply(cidx, \(i) ifelse(length(i) == 0, NA, mean(yhat[i + pos])))
+            bh = c(bh, yy)
+            pos = pos + n
+        }
+    }
+    RG = rg
+    RG@bhat = bh
+    nonNA = !is.na(bh)
+    RG@linfct = diag(nrow(DL))
+    RG@V = DL %*% rg@V %*% t(DL)
+    levs = rg@levels
+    levs[cfac] = NULL
+    alevs = clevs
+    names(alevs) = paste0("actual_", cfac)
+    levs = c(alevs, clevs)
+    if (k > 1)
+        levs = c(levs, rg@levels[length(rg@levels)])
+    RG@levels = levs
+    wts = sapply(cidx, length)
+    RG@grid = do.call("expand.grid", levs)
+    RG@grid$.wgt. = rep(wts, length(bh)/length(wts))
+    misc = rg@misc
+    if(!is.null(misc$inv.lbl))
+        misc$estName = misc$inv.lbl
+    misc[c("tran", "inv.lbl", "sigma")] = NULL
+    RG@misc = c(misc, famSize = length(bh), cf.grid = TRUE)
+    RG@model.info$model.matrix = NULL
+    RG@roles$predictors = c(names(alevs), names(clevs))
+    RG@roles$counterfactuals = cfac
+    if (all(nonNA))
+        RG@nbasis = estimability::all.estble
+    else {
+        RG@nbasis = matrix(0, ncol = sum(!nonNA), nrow = length(bh))
+        idx = which(!nonNA)
+        for (j in seq_len(ncol(RG@nbasis)))
+            RG@nbasis[idx[j], j] = 1
+        RG@V = RG@V[nonNA, nonNA]
+    }
+    RG
+}
+
 
+## OLD CODE...
 # Set up grid for counterfactuals - i.e., copies of whole dataset with
 # counterfactual levels substituted, with obs index varying the slowest
-.setup.cf = function(levs, data) {
-    lv = arg = levs[-length(levs)] # omit .obs.no.
-    arg$stringsAsFactors = FALSE
-    g = do.call(expand.grid, arg)
-    idx = rep(seq_len(nrow(data)), each = nrow(g))
-    xdata = data[idx, , drop = FALSE]
-    idx = matrix(seq_len(nrow(xdata)), nrow = nrow(g))
-    for (i in seq_along(g[, 1]))
-        for (j in names(lv))
-            xdata[idx[i, ], j] = g[i, j]
-    xdata
-}
+# .setup.cf = function(levs, data) {
+#     lv = arg = levs[-length(levs)] # omit .obs.no.
+#     arg$stringsAsFactors = FALSE
+#     g = do.call(expand.grid, arg)
+#     idx = rep(seq_len(nrow(data)), each = nrow(g))
+#     xdata = data[idx, , drop = FALSE]
+#     idx = matrix(seq_len(nrow(xdata)), nrow = nrow(g))
+#     for (i in seq_along(g[, 1]))
+#         for (j in names(lv))
+#             xdata[idx[i, ], j] = g[i, j]
+#     xdata
+# }
diff --git a/man/emmeans.Rd b/man/emmeans.Rd
index 5f196b23..f8dbe338 100644
--- a/man/emmeans.Rd
+++ b/man/emmeans.Rd
@@ -158,6 +158,22 @@ data for the factors involved. Using \code{"flat"} is similar to
 are ignored.
 }
 
+\section{Counterfactuals}{
+
+Counterfactual reference grids (see the documentation for \code{\link{ref_grid}})
+contain pairs of imputed and actual factor levels, and are handled in a special way.
+For starters, the \code{weights} argument is ignored and we always use 
+\code{"cells"} weights.
+Our understanding is that if factors \code{A, B} are specified as counterfactuals, 
+the marginal means for \code{A} should still be the same as if \code{A} were the only 
+counterfactual. Accordingly, in computing these marginal means, we
+exclude all cases where \code{B != actual_B}, because if \code{A} were the only
+counterfactual, \code{B} will stay at its actual level.
+We also take special pains to "remember" information about actual and 
+imputed levels of counterfactuals so that appropriate results are obtained when
+\code{emmeans} is applied to a previous \code{emmeans} result.
+}
+
 \section{Offsets}{
 
 Unlike in \code{ref_grid}, an offset need not be scalar. If not enough values
diff --git a/man/ref_grid.Rd b/man/ref_grid.Rd
index c04a1e03..d048ebef 100644
--- a/man/ref_grid.Rd
+++ b/man/ref_grid.Rd
@@ -7,9 +7,8 @@
 ref_grid(object, at, cov.reduce = mean,
   cov.keep = get_emm_option("cov.keep"), mult.names, mult.levs,
   options = get_emm_option("ref_grid"), data, df, type, regrid, nesting,
-  offset, sigma, counterfactuals, wt.counter, avg.counter = TRUE,
-  nuisance = character(0), non.nuisance, wt.nuis = "equal",
-  rg.limit = get_emm_option("rg.limit"), ...)
+  offset, sigma, counterfactuals, nuisance = character(0), non.nuisance,
+  wt.nuis = "equal", rg.limit = get_emm_option("rg.limit"), ...)
 }
 \arguments{
 \item{object}{An object produced by a supported model-fitting function, such
@@ -82,16 +81,13 @@ back-transformation bias adjustments. If not specified, we use
 Note: This applies only when the family is \code{"gaussian"}; for other families,
 \code{sigma} is set to \code{NA} and cannot be overridden.}
 
-\item{counterfactuals, wt.counter, avg.counter}{\code{counterfactuals} specifies character
+\item{counterfactuals}{\code{counterfactuals} specifies character
 names of counterfactual factors. If this is non-missing, a reference grid
-is created consisting of combinations of counterfactual levels and a constructed
-factor \code{.obs.no.} having a level for each observation in the dataset.
-By default, this grid is re-gridded with the response transformation
-and averaged over \code{.obs.no.} (by default, with equal weights, but
-a vector of weights may be specified in \code{wt.counter}; it must be
-of length equal to the number of observations in the dataset).
-If \code{avg.counter} is set to \code{FALSE}, this averaging is disabled.
-See the section below on counterfactuals.}
+is created consisting of combinations of counterfactual levels 
+and the actual levels of those same factors.
+This grid is always converted to the response transformation scale
+and averaged over the actual factor levels. See the section below 
+on counterfactuals.}
 
 \item{nuisance, non.nuisance, wt.nuis}{If \code{nuisance} is a vector of predictor names,
 those predictors are omitted from the reference grid. Instead, the result 
@@ -346,23 +342,29 @@ Some earlier versions of \pkg{emmeans} offer a \code{covnest} argument.
 \section{Counterfactuals}{
 
   If \code{counterfactuals} is specified, the rows of the entire dataset
-  become a factor in the reference grid, and the other reference levels are
+  become part of the reference grid, and the other reference levels are
   confined to those named in \code{counterfactuals}. In this type of analysis
-  (called G-computation), we substitute each combination of counterfactual
+  (called G-computation), we substitute (or impute) each combination of counterfactual
   levels into the entire dataset. Thus, predictions from this grid are those
   of each observation under each of the counterfactual levels. For this to
   make sense, we require an assumption of exchangeability of these levels.
   
-  By default, this grid is converted to the response scale (unless otherwise
-  specified in \code{regrid}) and averaged over the observations in the dataset.
-  Averaging can be disabled by setting \code{avg.counter = FALSE}, but
-  be warned that the resulting reference grid is potentially huge -- the
-  number of observations in the dataset times the number of counterfactual 
-  combinations, times the number of multivariate levels.
+  This grid is always converted to the response scale, as G-computation on
+  the linear-predictor scale produces the same results as ordinary weighted EMMs.
+  If we have counterfactual factors \code{A, B}, the reference grid also includes
+  factors \code{actual_A, actual_B} which are used to track which observations
+  originally had the \code{A, B} levels before they were changed by the 
+  counterfactuals code. We average the response-scale predictions for each
+  combination of actual levels and imputed levels (and multivariate levels, 
+  if any). See additional discussion of how \code{\link{emmeans}} handles
+  counterfactuals under that documentation.
   
-  The counterfactuals code is still fairly rudimentary and we can't guarantee
-  it will always work, such as in cases of nested models. Sometimes, an error
-  can be averted by specifying \code{avg.counter = FALSE}.
+  Currently, counterfactuals are not supported when the reference grid
+  requires post-processing (e.g., ordinal models with \code{mode = "prob"}).
+  Cases where we have nested factor levels can be complicated if mixed-in with counterfactuals, 
+  and we make no guarantees. 
+  Note that past implementations included arguments \code{wt.counter} and \code{avg.counter},
+  which are now deprecated and are just ignored if specified.
 }
 
 \section{Optional side effect}{
diff --git a/vignettes/FAQs.Rmd b/vignettes/FAQs.Rmd
index 564594f6..de98ada2 100644
--- a/vignettes/FAQs.Rmd
+++ b/vignettes/FAQs.Rmd
@@ -40,6 +40,7 @@ This vignette contains answers to questions received from users or posted on dis
  12. [`emmeans()` completely ignores my P-value adjustments](#noadjust)
  13. [`emmeans()` gives me pooled *t* tests, but I expected Welch's *t*](#nowelch)
  14. [I want to see more digits](#rounding)
+ 15. [I want to reproduce results from Stata's `margins` command](#stata)
 
 
 [Index of all vignette topics](vignette-topics.html)
@@ -428,5 +429,18 @@ digits, add `|> as.data.frame()` to the code line. In addition, piping
 `|> data.frame()` will disable formatting of test statistics and P values (but 
 unfortunately will also suppress messages below the tabular output).
 
+
+## I want to reproduce results from Stata's `margins` command {#stata}
+**Stata** is oriented towards observational data, and its `margins` command
+takes a counterfactuals approach. Thus, try adding a `counterfactuals`
+argument (see help for `ref_grid`). For example, to reproduce the results
+in Stata's Example 6 for `margins`, do:
+```r
+margex <- haven::read_dta("https://www.stata-press.com/data/r18/margex.dta")
+margex.glm <- glm(outcome ~ sex * factor(group) + age, data = margex)
+emmeans(margex.glm, "sex", counterfactuals = "sex")
+```
+
+
 [Index of all vignette topics](vignette-topics.html)