update iivm scripts

SvenKlaassen · SvenKlaassen · commit 6b3b92a8c570 · 2025-06-05T14:23:31.000+02:00
diff --git a/scripts/irm/iivm_late.py b/scripts/irm/iivm_late.py
@@ -0,0 +1,13 @@
+from montecover.irm import IIVMLATECoverageSimulation
+
+# Create and run simulation with config file
+sim = IIVMLATECoverageSimulation(
+    config_file="scripts/irm/iivm_late_config.yml",
+    log_level="INFO",
+    log_file="logs/irm/iivm_late_sim.log",
+)
+sim.run_simulation()
+sim.save_results(output_path="results/irm/", file_prefix="iivm_late")
+
+# Save config file for reproducibility
+sim.save_config("results/irm/iivm_late_config.yml")
diff --git a/scripts/irm/iivm_late_config.yml b/scripts/irm/iivm_late_config.yml
@@ -0,0 +1,80 @@
+# Simulation parameters for IIVM LATE Coverage
+
+simulation_parameters:
+  repetitions: 200
+  max_runtime: 19800 # 5.5 hours in seconds
+  random_seed: 42
+  n_jobs: -2
+
+dgp_parameters:
+  theta: [0.5] # Treatment effect
+  n_obs: [500] # Sample size
+  dim_x: [20] # Number of covariates
+  alpha_x: [1.0] # Covariate effect
+
+# Define reusable learner configurations
+learner_definitions:
+  lasso: &lasso
+    name: "LassoCV"
+
+  logit: &logit
+    name: "Logistic"
+
+  lgbmr: &lgbmr
+    name: "LGBM Regr."
+    params:
+      n_estimators: 100             # Fewer trees; with small data, fewer is often better
+      learning_rate: 0.05           # Reasonable speed without sacrificing much accuracy
+      num_leaves: 7                 # Smaller trees reduce overfitting risk
+      max_depth: 3                  # Shallow trees generalize better on tiny datasets
+      min_child_samples: 20         # Avoids splitting on noise
+      subsample: 1.0                # Use all rows — subsampling adds variance with small data
+      colsample_bytree: 0.8         # Still good to randomly drop some features per tree
+      reg_alpha: 0.1                # L1 regularization helps when there are many features
+      reg_lambda: 1.0               # Stronger L2 regularization improves generalization
+      random_state: 42              # Reproducibility
+
+  lgbmc: &lgbmc
+    name: "LGBM Clas."
+    params:
+      n_estimators: 100             # Fewer trees; with small data, fewer is often better
+      learning_rate: 0.05           # Reasonable speed without sacrificing much accuracy
+      num_leaves: 7                 # Smaller trees reduce overfitting risk
+      max_depth: 3                  # Shallow trees generalize better on tiny datasets
+      min_child_samples: 20         # Avoids splitting on noise
+      subsample: 1.0                # Use all rows — subsampling adds variance with small data
+      colsample_bytree: 0.8         # Still good to randomly drop some features per tree
+      reg_alpha: 0.1                # L1 regularization helps when there are many features
+      reg_lambda: 1.0               # Stronger L2 regularization improves generalization
+      random_state: 42              # Reproducibility
+
+dml_parameters:
+  learners:
+    - ml_g: *lasso
+      ml_m: *logit
+      ml_r: *logit
+    - ml_g: *lasso
+      ml_m: *logit
+      ml_r: *lgbmc
+    - ml_g: *lasso
+      ml_m: *lgbmc
+      ml_r: *logit
+    - ml_g: *lasso
+      ml_m: *lgbmc
+      ml_r: *lgbmc
+    - ml_g: *lgbmr
+      ml_m: *logit
+      ml_r: *logit
+    - ml_g: *lgbmr
+      ml_m: *logit
+      ml_r: *lgbmc
+    - ml_g: *lgbmr
+      ml_m: *lgbmc
+      ml_r: *logit
+    - ml_g: *lgbmr
+      ml_m: *lgbmc
+      ml_r: *lgbmc
+  
+
+confidence_parameters:
+  level: [0.95, 0.90] # Confidence levels