From 86783724836ca3ed17f1b99deeea270b12afc4f4 Mon Sep 17 00:00:00 2001 From: "Russell V. Lenth" Date: Wed, 8 Jan 2025 11:23:35 -0600 Subject: [PATCH] Fixes to regrid for non-est cases Also removed disabled code in ref_grid, regrid and corrected an error in FAQs --- DESCRIPTION | 4 +- NEWS.md | 2 + R/emmGrid-methods.R | 132 ++++++++++++++++---------------------------- R/ref-grid.R | 16 ------ vignettes/FAQs.Rmd | 3 +- 5 files changed, 54 insertions(+), 103 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1b5504e..43cfc1f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: emmeans Type: Package Title: Estimated Marginal Means, aka Least-Squares Means -Version: 1.10.6-090002 -Date: 2024-12-28 +Version: 1.10.6-090003 +Date: 2025-01-08 Authors@R: c(person("Russell V.", "Lenth", role = c("aut", "cre", "cph"), email = "russell-lenth@uiowa.edu"), person("Balazs", "Banfai", role = "ctb"), diff --git a/NEWS.md b/NEWS.md index fb6debb..c535ab0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,8 @@ title: "NEWS for the emmeans package" reference grids. Whenever we average over a counterfactual `B`, we only use the cases where `B == actual_B`, thus obtaining the same results as would be obtained when `B` is not regarded as a counterfactual. + * Tweaks to `regrid()` to create `@post.beta` slot correctly when there are + non-estimable cases. ## emmeans 1.10.6 diff --git a/R/emmGrid-methods.R b/R/emmGrid-methods.R index 80d2bf8..082437c 100644 --- a/R/emmGrid-methods.R +++ b/R/emmGrid-methods.R @@ -916,7 +916,8 @@ regrid = function(object, transform = c("response", "mu", "unlink", "none", "pas if (is.na(object@post.beta[1]) && !missing(N.sim)) { message("Simulating a sample of size ", N.sim, " of regression coefficients.") - object@post.beta = sim(N.sim, object@bhat, object@V) + bh = object@bhat[!is.na(object@bhat)] + object@post.beta = sim(N.sim, bh, object@V) } if (transform == "pass") @@ -926,65 +927,58 @@ regrid = function(object, transform = c("response", "mu", "unlink", "none", "pas if ((transform == "response") && (!is.null(object@misc$tran2))) object = regrid(object, transform = "mu") - .collapse = (\(.collapse = NULL, ...) .collapse)(...) # check counterfactuals hook - # Save post.beta stuff PB = object@post.beta NC = attr(PB, "n.chains") if (!is.na(PB[1])) { # fix up post.beta BEFORE we overwrite parameters - PB = PB %*% t(object@linfct) + PB = PB %*% t(object@linfct[ , !is.na(object@bhat), drop = FALSE]) if (".offset." %in% names(object@grid)) PB = t(apply(PB, 1, function(.) . + object@grid[[".offset."]])) } est = .est.se.df(object, do.se = TRUE) ###FALSE) - if(is.null(.collapse)) { - estble = !(is.na(est[[1]])) - object@V = vcov(object)[estble, estble, drop = FALSE] - object@bhat = est[[1]] - object@linfct = diag(1, length(estble)) - object@misc$regrid.flag = TRUE - pargs = object@grid[names(object@levels)] - lbls = do.call(paste, c(pargs, sep = ".")) - if (!is.null(disp <- object@misc$display)) { # fix up for the bookkeeping in nested models - object@V = object@V[estble, estble, drop = FALSE] - object@linfct = matrix(0, nrow = length(disp), ncol = length(estble)) - object@linfct[disp, ] = diag(1, length(estble)) - lbls = lbls[disp] - } - colnames(object@linfct) = lbls - if(all(estble)) - object@nbasis = estimability::all.estble - else - object@nbasis = object@linfct[, !estble, drop = FALSE] - - # override the df function - df = est$df - edf = df[estble] - if (length(edf) == 0) edf = NA - # note both NA/NA and Inf/Inf test is.na() = TRUE - prev.df.msg = attr(object@dffun, "mesg") - if (any(is.na(edf/edf)) || (diff(range(edf)) < .01)) { # use common value - object@dfargs = list(df = mean(edf, na.rm = TRUE)) - object@dffun = function(k, dfargs) dfargs$df - } - else { # use containment df - object@dfargs = list(df = df) - object@dffun = function(k, dfargs) { - idx = which(zapsmall(k) != 0) - ifelse(length(idx) == 0, NA, min(dfargs$df[idx], na.rm = TRUE)) - } - } - if(!is.null(prev.df.msg)) - attr(object@dffun, "mesg") = ifelse( - startsWith(prev.df.msg, "inherited"), prev.df.msg, - paste("inherited from", prev.df.msg, "when re-gridding")) + estble = !(is.na(est[[1]])) + object@V = vcov(object)[estble, estble, drop = FALSE] + object@bhat = est[[1]] + object@linfct = diag(1, length(estble)) + object@misc$regrid.flag = TRUE + pargs = object@grid[names(object@levels)] + lbls = do.call(paste, c(pargs, sep = ".")) + if (!is.null(disp <- object@misc$display)) { # fix up for the bookkeeping in nested models + object@V = object@V[estble, estble, drop = FALSE] + object@linfct = matrix(0, nrow = length(disp), ncol = length(estble)) + object@linfct[disp, ] = diag(1, length(estble)) + lbls = lbls[disp] } + colnames(object@linfct) = lbls + if(all(estble)) + object@nbasis = estimability::all.estble + else + object@nbasis = object@linfct[, !estble, drop = FALSE] - if(!is.null(.collapse) && is.null(object@misc$tran)) # need explicit link so we can collapse - object@misc$tran = attr(est, "link") = make.link("identity") + # override the df function + df = est$df + edf = df[estble] + if (length(edf) == 0) edf = NA + # note both NA/NA and Inf/Inf test is.na() = TRUE + prev.df.msg = attr(object@dffun, "mesg") + if (any(is.na(edf/edf)) || (diff(range(edf)) < .01)) { # use common value + object@dfargs = list(df = mean(edf, na.rm = TRUE)) + object@dffun = function(k, dfargs) dfargs$df + } + else { # use containment df + object@dfargs = list(df = df) + object@dffun = function(k, dfargs) { + idx = which(zapsmall(k) != 0) + ifelse(length(idx) == 0, NA, min(dfargs$df[idx], na.rm = TRUE)) + } + } + if(!is.null(prev.df.msg)) + attr(object@dffun, "mesg") = ifelse( + startsWith(prev.df.msg, "inherited"), prev.df.msg, + paste("inherited from", prev.df.msg, "when re-gridding")) if(transform %in% c("response", "mu", "unlink", links, "user") && !is.null(object@misc$tran)) { flink = link = attr(est, "link") @@ -1000,36 +994,11 @@ regrid = function(object, transform = c("response", "mu", "unlink", "none", "pas } if (!is.na(PB[1])) PB = matrix(flink$linkinv(PB), ncol = ncol(PB)) - if(is.null(.collapse)) { - D = flink$mu.eta(object@bhat[estble]) - object@bhat = flink$linkinv(object@bhat) - # efficient repl for D'VD with D <- diag(D) - object@V = sweep(sweep(object@V, 1, D, "*"), 2, D, "*") - } - else { # we'll average over the levels of .collapse (assume it varies slowest) - est = est[[1]] - nobs = length(object@levels[[.collapse]]) - idx = sapply(seq_len(nobs), \(i) which(object@grid[[.collapse]] == i)) - X = sweep(object@linfct, 1, flink$mu.eta(est), "*") - wt.counter = (\(wt.counter, ...) wt.counter)(...) - if (length(wt.counter) == 1) wt.counter = rep(1, nobs) - wmn = function(x) sum(wt.counter*x) / sum(wt.counter) - est = flink$linkinv(est) - object@bhat = sapply(seq_len(nrow(idx)), \(i) wmn(est[idx[i,]])) - L = matrix(0, nrow = nrow(idx), ncol = ncol(X)) - for (i in seq_len(nrow(idx))) - L[i, ] = apply(X[idx[i,], , drop = FALSE], 2, wmn) - object@V = L %*% tcrossprod(object@V, L) - object@linfct = diag(1, nrow(L)) - object@misc$regrid.flag = TRUE - if(!is.na(PB[1])) { - pb = matrix(0, ncol = nrow(L), nrow = nrow(PB)) - for (i in seq_len(nrow(idx))) - pb[, i] = apply(PB[, idx[i,], drop = FALSE], 1, wmn) - PB = pb - } - } - + D = flink$mu.eta(object@bhat[estble]) + object@bhat = flink$linkinv(object@bhat) + # efficient repl for D'VD with D <- diag(D) + object@V = sweep(sweep(object@V, 1, D, "*"), 2, D, "*") + inm = object@misc$inv.lbl if (!is.null(inm)) { object@misc$estName = inm @@ -1077,6 +1046,7 @@ regrid = function(object, transform = c("response", "mu", "unlink", "none", "pas } if(!is.na(PB[1])) { + PB = PB[ , !is.na(object@bhat), drop = FALSE] attr(PB, "n.chains") = NC object@post.beta = PB } @@ -1087,13 +1057,7 @@ regrid = function(object, transform = c("response", "mu", "unlink", "none", "pas object@model.info$model.matrix = "Submodels are not available with regridded objects" if(!missing(predict.type)) object = update(object, predict.type = predict.type) - if(!is.null(.collapse)) { - object@grid = object@grid[idx[,1], , drop = TRUE] - object@grid[[.collapse]] = object@levels[[.collapse]] = NULL - object@roles$predictors = setdiff(object@roles$predictors, .collapse) - object@misc$famSize = nrow(object@linfct) - } - + object } diff --git a/R/ref-grid.R b/R/ref-grid.R index b0a44f2..2477c68 100644 --- a/R/ref-grid.R +++ b/R/ref-grid.R @@ -607,17 +607,8 @@ ref_grid <- function(object, at, cov.reduce = mean, cov.keep = get_emm_option("c # Now create the reference grid if(no.nuis <- (length(nuisance) == 0)) { - # if (!missing(counterfactuals)) { - # cfac = intersect(counterfactuals, names(ref.levels)) - # ref.levels = ref.levels[cfac] - # ref.levels$.obs.no. = seq_len(nrow(data)) - # .check.grid(ref.levels, rg.limit) - # grid = .setup.cf(ref.levels, data) - # } - # ## else { .check.grid(ref.levels, rg.limit) grid = do.call(expand.grid, ref.levels) - ##} } else { nuis.info = .setup.nuis(nuisance, ref.levels, trms, rg.limit) @@ -688,13 +679,6 @@ ref_grid <- function(object, at, cov.reduce = mean, cov.keep = get_emm_option("c call. = TRUE) collapse = NULL - # if (!missing(counterfactuals)) { - # grid = do.call(expand.grid, ref.levels) - # if (missing(regrid)) - # regrid = "response" - # if (avg.counter) collapse = ".obs.no." - # } - if(!no.nuis) { basis = .basis.nuis(basis, nuis.info, wt.nuis, ref.levels, data, grid, ref.levels) grid = basis$grid diff --git a/vignettes/FAQs.Rmd b/vignettes/FAQs.Rmd index de98ada..a46d9c9 100644 --- a/vignettes/FAQs.Rmd +++ b/vignettes/FAQs.Rmd @@ -437,7 +437,8 @@ argument (see help for `ref_grid`). For example, to reproduce the results in Stata's Example 6 for `margins`, do: ```r margex <- haven::read_dta("https://www.stata-press.com/data/r18/margex.dta") -margex.glm <- glm(outcome ~ sex * factor(group) + age, data = margex) +margex.glm <- glm(outcome ~ sex * factor(group) + age, data = margex, + family = binomial) emmeans(margex.glm, "sex", counterfactuals = "sex") ```