Skip to content

Commit

Permalink
Merge pull request #14 from TsaiLintung/ddidinf
Browse files Browse the repository at this point in the history
Fix Double DiD inference
  • Loading branch information
TsaiLintung authored Nov 25, 2024
2 parents b3a501e + 3572225 commit 7778301
Show file tree
Hide file tree
Showing 26 changed files with 1,871 additions and 75 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: fastdid
Type: Package
Title: Fast Staggered Difference-in-Difference Estimators
Version: 1.0.2
Version: 1.0.3
Date: 2024-10-25
Authors@R: c(
person("Lin-Tung","Tsai",
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# fastdid 1.0.4

- Fixed Double DiD inference: account for stolastic part of the double DiD weight

# fastdid 1.0.3

- Fixed Check for CRAN

# fastdid 1.0.2

- Fixed Typo for CRAN
Expand Down
51 changes: 27 additions & 24 deletions R/aggregate_gt.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,24 @@ aggregate_gt_outcome <- function(gt_result, aux, p){

#get aggregation scheme from g-t to target parameters
agg_sch <- get_agg_sch(gt_result, aux, p)

if(!p$event_specific | is.na(p$cohortvar2)){
att <- gt_result$att
inf_func <- gt_result$inf_func %*% t(agg_sch$agg_weights)
} else {
att <- agg_sch$es_weight %*% gt_result$att
inf_func <- gt_result$inf_func %*% t(agg_sch$agg_weights %*% agg_sch$es_weight)

att <- gt_result$att
inf_func <- gt_result$inf_func

if(p$event_specific & !is.na(p$cohortvar2)){
es_weight <- agg_sch$es_sto_weight+agg_sch$es_det_weight
es_inf_weights <- get_weight_influence(att, agg_sch$pre_es_group_time, agg_sch$es_sto_weight, aux, p)
att <- (es_weight) %*% att
inf_func <- (inf_func %*% t(es_weight)) + es_inf_weights
}

#get att
agg_att <- agg_sch$agg_weights %*% att

#get influence function matrix

inf_weights <- get_weight_influence(att, agg_sch, aux, p)
inf_matrix <- inf_func + inf_weights
inf_weights <- get_weight_influence(att, agg_sch$group_time, agg_sch$agg_weights, aux, p)
inf_matrix <- (inf_func %*% t(agg_sch$agg_weights)) + inf_weights

#get se
agg_se <- get_se(inf_matrix, aux, p)
Expand Down Expand Up @@ -63,36 +65,40 @@ get_agg_sch <- function(gt_result, aux, p){
#get the event-specific matrix, and available ggts
if(p$event_specific & !is.na(p$cohortvar2)){
es <- get_es_scheme(group_time, aux, p)
pre_es_group_time <- group_time
pre_es_group_time[, pg := NULL]
group_time <- es$group_time #some gt may not have availble effect (ex: g1 == g2)
es_weight <- as.matrix(es$es_weight)
es_det_weight <- as.matrix(es$es_det_weight)
es_sto_weight <- as.matrix(es$es_sto_weight)
} else {
es_weight <- NULL
es_det_weight <- NULL
es_sto_weight <- NULL
pre_es_group_time <- NULL
}

#choose the target based on aggregation type
tg <- get_agg_targets(group_time, p)
group_time <- tg$group_time
targets <- tg$targets


#get aggregation weights
agg_weights <- data.table()
for(tar in targets){ #the order matters

group_time[, weight := 0] #weight is 0 if not a target
group_time[target == tar & used, weight := pg/sum(pg)]
target_weights <- group_time[, .(weight)] |> transpose()

agg_weights <- rbind(agg_weights, target_weights)
}
group_time[, pg := NULL]

agg_weights <- as.matrix(agg_weights)

return(list(agg_weights = agg_weights, #a matrix of each target and gt's weight in it
targets = targets,
group_time = group_time,
es_weight = es_weight))
pre_es_group_time = pre_es_group_time,
es_det_weight = es_det_weight,
es_sto_weight = es_sto_weight))
}

#get the target parameters
Expand Down Expand Up @@ -135,9 +141,7 @@ get_agg_targets <- function(group_time, p){

# influence function ------------------------------------------------------------

get_weight_influence <- function(att, agg_sch, aux, p){

group <- agg_sch$group_time
get_weight_influence <- function(att, group, agg_weights, aux, p){

id_dt <- data.table(weight = aux$weights/sum(aux$weights), G = aux$dt_inv[, G])
pg_dt <- id_dt[, .(pg = sum(weight)), by = "G"]
Expand All @@ -156,25 +160,25 @@ get_weight_influence <- function(att, agg_sch, aux, p){
}

if(!p$parallel){
inf_weights <- sapply(asplit(agg_sch$agg_weights, 1), function (x){
inf_weights <- sapply(asplit(agg_weights, 1), function (x){
get_weight_influence_param(x, group, att, aux, p)
})
} else {
inf_weights <- matrix(unlist(mclapply(asplit(agg_sch$agg_weights, 1), function (x){
inf_weights <- matrix(unlist(mclapply(asplit(agg_weights, 1), function (x){
get_weight_influence_param(x, group, att, aux, p)
})), ncol = length(agg_sch$targets))
})), ncol = dim(agg_weights)[1])
}


return(inf_weights)

}

#influence from weight calculation
get_weight_influence_param <- function(agg_weights, group, gt_att, aux, p) {

keepers <- which(agg_weights != 0)
group <- group[keepers,]
if(nrow(group) == 0){return(rep(0, length(aux$weights)))} #for direct double did

#moving this outside will create a g*t*id matrix, not really worth the memory
keepers_matrix <- as.matrix(aux$weights*sapply(1:nrow(group), function(g){as.integer(aux$dt_inv[, G] == group[g,G]) - group[g,pg]}))

Expand All @@ -185,7 +189,6 @@ get_weight_influence_param <- function(agg_weights, group, gt_att, aux, p) {
# return the influence function for the weights
inf_weight <- (if1 - if2) %*% as.vector(gt_att[keepers])
inf_weight[abs(inf_weight) < sqrt(.Machine$double.eps)*10] <- 0 #fill zero

return(inf_weight)
}

Expand Down
28 changes: 15 additions & 13 deletions R/double_did.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,10 @@ get_es_scheme <- function(group_time, aux, p){
valid_ggt <- which(!sapply(es_weight_list, is.null))
es_group_time <- es_group_time[valid_ggt] #remove the ones without
es_weight_list <- es_weight_list[valid_ggt]
es_weight <- do.call(rbind, es_weight_list) #not sure if the dim is right
es_det_weight <- do.call(rbind, lapply(es_weight_list, \(x){x$det}))
es_sto_weight <- do.call(rbind, lapply(es_weight_list, \(x){x$sto}))

return(list(group_time = es_group_time, es_weight = es_weight))
return(list(group_time = es_group_time, es_det_weight = es_det_weight, es_sto_weight = es_sto_weight))

}

Expand All @@ -115,7 +116,8 @@ get_es_ggt_weight <- function(ggt, group_time, aux, p){

group_time <- copy(group_time) #avoid accidental modification

group_time[, weight := 0] #reset
group_time[, det_weight := 0] #reset
group_time[, sto_weight := 0] #reset
t <- group_time[ggt, time]
g1 <- group_time[ggt, G1]
g2 <- group_time[ggt, G2]
Expand All @@ -125,7 +127,7 @@ get_es_ggt_weight <- function(ggt, group_time, aux, p){

if(t < g2){ #direct pure effect

group_time[ggt, weight := 1] #just use the observed effect
group_time[ggt, det_weight := 1] #just use the observed effect

} else if(g1 < g2) { #imputation = treat-pre + (control-post - control-pre)

Expand All @@ -146,9 +148,9 @@ get_es_ggt_weight <- function(ggt, group_time, aux, p){
if(sum(tb) == 0 | sum(cp) == 0 | sum(cb) == 0){return(NULL)}

#assign the weights
group_time[tb, weight := pg/sum(pg)]
group_time[cp, weight := pg/sum(pg)]
group_time[cb, weight := -pg/sum(pg)]
group_time[tb, det_weight := 1]
group_time[cp, sto_weight := pg/sum(pg)]
group_time[cb, sto_weight := -pg/sum(pg)]


} else if (g1 > g2) { #double did = (treat-post - treat-base) - (control-post - control-pre)
Expand All @@ -171,14 +173,14 @@ get_es_ggt_weight <- function(ggt, group_time, aux, p){
if(sum(tp) == 0 | sum(tb) == 0 | sum(cp) == 0 | sum(cb) == 0){return(NULL)}

#assign the weights
group_time[tp, weight := pg/sum(pg)]
group_time[tb, weight := -pg/sum(pg)]
group_time[cp, weight := -pg/sum(pg)]
group_time[cb, weight := pg/sum(pg)]
group_time[tp, det_weight := 1]
group_time[tb, det_weight := -1]
group_time[cp, sto_weight := -pg/sum(pg)]
group_time[cb, sto_weight := pg/sum(pg)]

}

if(all(group_time[, weight] == 0)){return(NULL)}
return(group_time[, weight])
if(all(group_time[, det_weight+sto_weight] == 0)){return(NULL)} #not redundant!
return(list(det = group_time[, det_weight], sto = group_time[, sto_weight]))

}
3 changes: 2 additions & 1 deletion R/global.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ utils::globalVariables(c('.','agg_weight','att','att_cont','att_treat','attgt','
"copy", "validate", "max_control_cohort_diff", "anticipation", "min_control_cohort_diff", "base_period", "post", "att_ciub", "att_cilb", "cband", "alpha",
"G2", "G1", "mg", "cohort1", "cohort2", "event_time_1", "event_time_2",
"D2", "attgt2", "event", "atu2", "y01", "y10", "y11", "tau2", "parallel",
"tp", "cp", "tb", "cb", "no_na", "event_stagger", "double_control_option"))
"tp", "cp", "tb", "cb", "no_na", "event_stagger", "double_control_option",
"det_weight", "sto_weight"))

13 changes: 4 additions & 9 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
## Resubmission
This is a resubmission. In this version I have:

* Update the package name in DESCRIPTION.

* Change the titles of vignette to be more meaningful.

* Re-scale the image in vignette misc.

## R CMD check results

0 errors | 0 warnings | 0 note

## Fix CRAN Check

Remove tracemem() call in tests to avoid failing CRAN check on linux.
Binary file added development/fastdid_1.0.3.tar.gz
Binary file not shown.
Loading

0 comments on commit 7778301

Please sign in to comment.