Skip to content

Commit 2d8a94d

Browse files
committed
rough first cut of dgp for metric agg
1 parent e67bff7 commit 2d8a94d

File tree

1 file changed

+180
-0
lines changed

1 file changed

+180
-0
lines changed

econml/data/dgps.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pandas as pd
44
from sklearn.preprocessing import StandardScaler
55
from sklearn.utils import check_random_state
6+
from scipy.special import expit
67

78
_ihdp_sim_file = os.path.join(os.path.dirname(__file__), "ihdp", "sim.csv")
89
_ihdp_sim_data = pd.read_csv(_ihdp_sim_file)
@@ -93,3 +94,182 @@ def _process_ihdp_sim_data():
9394
# Append a column of ones as intercept
9495
X = np.insert(X, 0, np.ones(X.shape[0]), axis=1)
9596
return T, X
97+
98+
99+
class StandardDGP():
100+
def __init__(self,
101+
n=1000,
102+
d_t=1,
103+
d_y=1,
104+
d_x=5,
105+
d_z=None,
106+
discrete_treatment=False,
107+
discrete_instrument=False,
108+
squeeze_T=False,
109+
squeeze_Y=False,
110+
nuisance_Y=None,
111+
nuisance_T=None,
112+
nuisance_TZ=None,
113+
theta=None,
114+
y_of_t=None,
115+
x_eps=1,
116+
y_eps=1,
117+
t_eps=1
118+
):
119+
self.n = n
120+
self.d_t = d_t
121+
self.d_y = d_y
122+
self.d_x = d_x
123+
self.d_z = d_z
124+
125+
self.discrete_treatment = discrete_treatment
126+
self.discrete_instrument = discrete_instrument
127+
self.squeeze_T = squeeze_T
128+
self.squeeze_Y = squeeze_Y
129+
130+
if callable(nuisance_Y):
131+
self.nuisance_Y = nuisance_Y
132+
else: # else must be dict
133+
if nuisance_Y is None:
134+
nuisance_Y = {'support': self.d_x, 'degree': 1}
135+
nuisance_Y['k'] = self.d_x
136+
self.nuisance_Y, self.nuisance_Y_coefs = self.gen_nuisance(**nuisance_Y)
137+
138+
if callable(nuisance_T):
139+
self.nuisance_T = nuisance_T
140+
else: # else must be dict
141+
if nuisance_T is None:
142+
nuisance_T = {'support': self.d_x, 'degree': 1}
143+
nuisance_T['k'] = self.d_x
144+
self.nuisance_T, self.nuisance_T_coefs = self.gen_nuisance(**nuisance_T)
145+
146+
if self.d_z:
147+
if callable(nuisance_TZ):
148+
self.nuisance_TZ = nuisance_TZ
149+
else: # else must be dict
150+
if nuisance_TZ is None:
151+
nuisance_TZ = {'support': self.d_z, 'degree': 1}
152+
nuisance_TZ['k'] = self.d_z
153+
self.nuisance_TZ, self.nuisance_TZ_coefs = self.gen_nuisance(**nuisance_TZ)
154+
else:
155+
self.nuisance_TZ = lambda x: 0
156+
157+
if callable(theta):
158+
self.theta = theta
159+
else: # else must be dict
160+
if theta is None:
161+
theta = {'support': self.d_x, 'degree': 1, 'bounds': [1, 2], 'intercept': True}
162+
theta['k'] = self.d_x
163+
self.theta, self.theta_coefs = self.gen_nuisance(**theta)
164+
165+
if callable(y_of_t):
166+
self.y_of_t = y_of_t
167+
else: # else must be dict
168+
if y_of_t is None:
169+
y_of_t = {'support': self.d_t, 'degree': 1, 'bounds': [1, 1]}
170+
y_of_t['k'] = self.d_t
171+
self.y_of_t, self.y_of_t_coefs = self.gen_nuisance(**y_of_t)
172+
173+
self.x_eps = x_eps
174+
self.y_eps = y_eps
175+
self.t_eps = t_eps
176+
177+
def gen_Y(self):
178+
self.y_noise = np.random.normal(size=(self.n, self.d_y), scale=self.y_eps)
179+
self.Y = self.theta(self.X) * self.y_of_t(self.T) + self.nuisance_Y(self.X) + self.y_noise
180+
return self.Y
181+
182+
def gen_X(self):
183+
self.X = np.random.normal(size=(self.n, self.d_x), scale=self.x_eps)
184+
return self.X
185+
186+
def gen_T(self):
187+
noise = np.random.normal(size=(self.n, self.d_t), scale=self.t_eps)
188+
self.T_noise = noise
189+
190+
if self.discrete_treatment:
191+
prob_T = expit(self.nuisance_T(self.X) + self.nuisance_TZ(self.Z) + self.T_noise)
192+
self.T = np.random.binomial(1, prob_T)
193+
return self.T
194+
195+
else:
196+
self.T = self.nuisance_T(self.X) + self.nuisance_TZ(self.Z) + self.T_noise
197+
return self.T
198+
199+
def gen_Z(self):
200+
if self.d_z:
201+
if self.discrete_instrument:
202+
# prob_Z = expit(np.random.normal(size=(self.n, self.d_z)))
203+
# self.Z = np.random.binomial(1, prob_Z, size=(self.n, 1))
204+
# self.Z = np.random.binomial(1, prob_Z)
205+
self.Z = np.random.binomial(1, 0.5, size=(self.n, self.d_z))
206+
return self.Z
207+
208+
else:
209+
Z_noise = np.random.normal(size=(self.n, self.d_z), loc=3, scale=3)
210+
self.Z = Z_noise
211+
return self.Z
212+
213+
else:
214+
self.Z = None
215+
return self.Z
216+
217+
def gen_nuisance(self, k=None, support=1, bounds=[-1, 1], degree=1, intercept=False):
218+
if not k:
219+
k = self.d_x
220+
221+
coefs = np.random.uniform(low=bounds[0], high=bounds[1], size=k)
222+
supports = np.random.choice(k, size=support, replace=False)
223+
mask = np.zeros(shape=k)
224+
mask[supports] = 1
225+
coefs = coefs * mask
226+
227+
# orders = np.random.randint(1, degree, k) if degree!=1 else np.ones(shape=(k,))
228+
orders = np.ones(shape=(k,)) * degree # enforce all to be the degree for now
229+
230+
if intercept:
231+
intercept = np.random.uniform(low=1, high=2)
232+
else:
233+
intercept = 0
234+
235+
def calculate_nuisance(W):
236+
W2 = np.copy(W)
237+
for i in range(0, k):
238+
W2[:, i] = W[:, i]**orders[i]
239+
out = W2.dot(coefs)
240+
return out.reshape(-1, 1) + intercept
241+
242+
return calculate_nuisance, coefs
243+
244+
def effect(self, X, T0, T1):
245+
if T0 is None or T0 == 0:
246+
T0 = np.zeros(shape=(T1.shape[0], self.d_t))
247+
248+
effect_t1 = self.theta(X) * self.y_of_t(T1)
249+
effect_t0 = self.theta(X) * self.y_of_t(T0)
250+
return effect_t1 - effect_t0
251+
252+
def const_marginal_effect(self, X):
253+
return self.theta(X)
254+
255+
def gen_data(self):
256+
X = self.gen_X()
257+
Z = self.gen_Z()
258+
T = self.gen_T()
259+
Y = self.gen_Y()
260+
261+
if self.squeeze_T:
262+
T = T.squeeze()
263+
if self.squeeze_Y:
264+
Y = Y.squeeze()
265+
266+
data_dict = {
267+
'Y': Y,
268+
'T': T,
269+
'X': X
270+
}
271+
272+
if self.d_z:
273+
data_dict['Z'] = Z
274+
275+
return data_dict

0 commit comments

Comments
 (0)