|
3 | 3 | import pandas as pd
|
4 | 4 | from sklearn.preprocessing import StandardScaler
|
5 | 5 | from sklearn.utils import check_random_state
|
| 6 | +from scipy.special import expit |
6 | 7 |
|
7 | 8 | _ihdp_sim_file = os.path.join(os.path.dirname(__file__), "ihdp", "sim.csv")
|
8 | 9 | _ihdp_sim_data = pd.read_csv(_ihdp_sim_file)
|
@@ -93,3 +94,182 @@ def _process_ihdp_sim_data():
|
93 | 94 | # Append a column of ones as intercept
|
94 | 95 | X = np.insert(X, 0, np.ones(X.shape[0]), axis=1)
|
95 | 96 | return T, X
|
| 97 | + |
| 98 | + |
| 99 | +class StandardDGP(): |
| 100 | + def __init__(self, |
| 101 | + n=1000, |
| 102 | + d_t=1, |
| 103 | + d_y=1, |
| 104 | + d_x=5, |
| 105 | + d_z=None, |
| 106 | + discrete_treatment=False, |
| 107 | + discrete_instrument=False, |
| 108 | + squeeze_T=False, |
| 109 | + squeeze_Y=False, |
| 110 | + nuisance_Y=None, |
| 111 | + nuisance_T=None, |
| 112 | + nuisance_TZ=None, |
| 113 | + theta=None, |
| 114 | + y_of_t=None, |
| 115 | + x_eps=1, |
| 116 | + y_eps=1, |
| 117 | + t_eps=1 |
| 118 | + ): |
| 119 | + self.n = n |
| 120 | + self.d_t = d_t |
| 121 | + self.d_y = d_y |
| 122 | + self.d_x = d_x |
| 123 | + self.d_z = d_z |
| 124 | + |
| 125 | + self.discrete_treatment = discrete_treatment |
| 126 | + self.discrete_instrument = discrete_instrument |
| 127 | + self.squeeze_T = squeeze_T |
| 128 | + self.squeeze_Y = squeeze_Y |
| 129 | + |
| 130 | + if callable(nuisance_Y): |
| 131 | + self.nuisance_Y = nuisance_Y |
| 132 | + else: # else must be dict |
| 133 | + if nuisance_Y is None: |
| 134 | + nuisance_Y = {'support': self.d_x, 'degree': 1} |
| 135 | + nuisance_Y['k'] = self.d_x |
| 136 | + self.nuisance_Y, self.nuisance_Y_coefs = self.gen_nuisance(**nuisance_Y) |
| 137 | + |
| 138 | + if callable(nuisance_T): |
| 139 | + self.nuisance_T = nuisance_T |
| 140 | + else: # else must be dict |
| 141 | + if nuisance_T is None: |
| 142 | + nuisance_T = {'support': self.d_x, 'degree': 1} |
| 143 | + nuisance_T['k'] = self.d_x |
| 144 | + self.nuisance_T, self.nuisance_T_coefs = self.gen_nuisance(**nuisance_T) |
| 145 | + |
| 146 | + if self.d_z: |
| 147 | + if callable(nuisance_TZ): |
| 148 | + self.nuisance_TZ = nuisance_TZ |
| 149 | + else: # else must be dict |
| 150 | + if nuisance_TZ is None: |
| 151 | + nuisance_TZ = {'support': self.d_z, 'degree': 1} |
| 152 | + nuisance_TZ['k'] = self.d_z |
| 153 | + self.nuisance_TZ, self.nuisance_TZ_coefs = self.gen_nuisance(**nuisance_TZ) |
| 154 | + else: |
| 155 | + self.nuisance_TZ = lambda x: 0 |
| 156 | + |
| 157 | + if callable(theta): |
| 158 | + self.theta = theta |
| 159 | + else: # else must be dict |
| 160 | + if theta is None: |
| 161 | + theta = {'support': self.d_x, 'degree': 1, 'bounds': [1, 2], 'intercept': True} |
| 162 | + theta['k'] = self.d_x |
| 163 | + self.theta, self.theta_coefs = self.gen_nuisance(**theta) |
| 164 | + |
| 165 | + if callable(y_of_t): |
| 166 | + self.y_of_t = y_of_t |
| 167 | + else: # else must be dict |
| 168 | + if y_of_t is None: |
| 169 | + y_of_t = {'support': self.d_t, 'degree': 1, 'bounds': [1, 1]} |
| 170 | + y_of_t['k'] = self.d_t |
| 171 | + self.y_of_t, self.y_of_t_coefs = self.gen_nuisance(**y_of_t) |
| 172 | + |
| 173 | + self.x_eps = x_eps |
| 174 | + self.y_eps = y_eps |
| 175 | + self.t_eps = t_eps |
| 176 | + |
| 177 | + def gen_Y(self): |
| 178 | + self.y_noise = np.random.normal(size=(self.n, self.d_y), scale=self.y_eps) |
| 179 | + self.Y = self.theta(self.X) * self.y_of_t(self.T) + self.nuisance_Y(self.X) + self.y_noise |
| 180 | + return self.Y |
| 181 | + |
| 182 | + def gen_X(self): |
| 183 | + self.X = np.random.normal(size=(self.n, self.d_x), scale=self.x_eps) |
| 184 | + return self.X |
| 185 | + |
| 186 | + def gen_T(self): |
| 187 | + noise = np.random.normal(size=(self.n, self.d_t), scale=self.t_eps) |
| 188 | + self.T_noise = noise |
| 189 | + |
| 190 | + if self.discrete_treatment: |
| 191 | + prob_T = expit(self.nuisance_T(self.X) + self.nuisance_TZ(self.Z) + self.T_noise) |
| 192 | + self.T = np.random.binomial(1, prob_T) |
| 193 | + return self.T |
| 194 | + |
| 195 | + else: |
| 196 | + self.T = self.nuisance_T(self.X) + self.nuisance_TZ(self.Z) + self.T_noise |
| 197 | + return self.T |
| 198 | + |
| 199 | + def gen_Z(self): |
| 200 | + if self.d_z: |
| 201 | + if self.discrete_instrument: |
| 202 | + # prob_Z = expit(np.random.normal(size=(self.n, self.d_z))) |
| 203 | + # self.Z = np.random.binomial(1, prob_Z, size=(self.n, 1)) |
| 204 | + # self.Z = np.random.binomial(1, prob_Z) |
| 205 | + self.Z = np.random.binomial(1, 0.5, size=(self.n, self.d_z)) |
| 206 | + return self.Z |
| 207 | + |
| 208 | + else: |
| 209 | + Z_noise = np.random.normal(size=(self.n, self.d_z), loc=3, scale=3) |
| 210 | + self.Z = Z_noise |
| 211 | + return self.Z |
| 212 | + |
| 213 | + else: |
| 214 | + self.Z = None |
| 215 | + return self.Z |
| 216 | + |
| 217 | + def gen_nuisance(self, k=None, support=1, bounds=[-1, 1], degree=1, intercept=False): |
| 218 | + if not k: |
| 219 | + k = self.d_x |
| 220 | + |
| 221 | + coefs = np.random.uniform(low=bounds[0], high=bounds[1], size=k) |
| 222 | + supports = np.random.choice(k, size=support, replace=False) |
| 223 | + mask = np.zeros(shape=k) |
| 224 | + mask[supports] = 1 |
| 225 | + coefs = coefs * mask |
| 226 | + |
| 227 | + # orders = np.random.randint(1, degree, k) if degree!=1 else np.ones(shape=(k,)) |
| 228 | + orders = np.ones(shape=(k,)) * degree # enforce all to be the degree for now |
| 229 | + |
| 230 | + if intercept: |
| 231 | + intercept = np.random.uniform(low=1, high=2) |
| 232 | + else: |
| 233 | + intercept = 0 |
| 234 | + |
| 235 | + def calculate_nuisance(W): |
| 236 | + W2 = np.copy(W) |
| 237 | + for i in range(0, k): |
| 238 | + W2[:, i] = W[:, i]**orders[i] |
| 239 | + out = W2.dot(coefs) |
| 240 | + return out.reshape(-1, 1) + intercept |
| 241 | + |
| 242 | + return calculate_nuisance, coefs |
| 243 | + |
| 244 | + def effect(self, X, T0, T1): |
| 245 | + if T0 is None or T0 == 0: |
| 246 | + T0 = np.zeros(shape=(T1.shape[0], self.d_t)) |
| 247 | + |
| 248 | + effect_t1 = self.theta(X) * self.y_of_t(T1) |
| 249 | + effect_t0 = self.theta(X) * self.y_of_t(T0) |
| 250 | + return effect_t1 - effect_t0 |
| 251 | + |
| 252 | + def const_marginal_effect(self, X): |
| 253 | + return self.theta(X) |
| 254 | + |
| 255 | + def gen_data(self): |
| 256 | + X = self.gen_X() |
| 257 | + Z = self.gen_Z() |
| 258 | + T = self.gen_T() |
| 259 | + Y = self.gen_Y() |
| 260 | + |
| 261 | + if self.squeeze_T: |
| 262 | + T = T.squeeze() |
| 263 | + if self.squeeze_Y: |
| 264 | + Y = Y.squeeze() |
| 265 | + |
| 266 | + data_dict = { |
| 267 | + 'Y': Y, |
| 268 | + 'T': T, |
| 269 | + 'X': X |
| 270 | + } |
| 271 | + |
| 272 | + if self.d_z: |
| 273 | + data_dict['Z'] = Z |
| 274 | + |
| 275 | + return data_dict |
0 commit comments