Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop events with no L1/L2/L3 impacts (or NULL impacts) #195

Merged
merged 6 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
42 changes: 41 additions & 1 deletion Database/fill_data_gap.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
logger.info(
f"Dropping any records of {e} in {name} for impact {impact}. Shape before: {level[impact].shape}"
)
level[impact] = level[impact][~(level[impact]["Event_ID"] == e)]
level[impact] = level[impact][~(level[impact][dg_utils.event_id] == e)]
logger.info(
f"Dropped any records of {e} in {name} for impact {impact}. Shape after: {level[impact].shape}"
)
Expand Down Expand Up @@ -322,6 +322,46 @@
for impact in level.keys():
level[impact].replace(float("nan"), None, inplace=True)

null_mask_total = l1[[x for x in l1.columns if "Total_" in x]].isnull().all(axis=1)
e_ids_missing_l1_impacts: list[str] = list(l1[null_mask_total].Event_ID.unique())
missing_event_ids_to_drop: list[str] = []
logger.info(f"Found {len(e_ids_missing_l1_impacts)} Event IDs without impacts")
for e_id in e_ids_missing_l1_impacts:
drop_l2, drop_l3 = True, True
for impact in l2.keys():
null_mask_l2 = l2[impact][[dg_utils.num_min, dg_utils.num_max]].isnull().all(axis=1)
l2_df = l2[impact][(~null_mask_l2) & (l2[impact][dg_utils.event_id] == e_id)]
if not l2_df.empty:
logger.warning(f"L2 {e_id} contains impacts not propagated to L1!\n{l2_df}")
drop_l2 = False
del l2_df

null_mask_l3 = l3[impact][[dg_utils.num_min, dg_utils.num_max]].isnull().all(axis=1)
l3_df = l3[impact][(~null_mask_l3) & (l3[impact][dg_utils.event_id] == e_id)]
if not l3_df.empty:
logger.warning(f"L3 {e_id} contains impacts not propagated to L1!\n{l3_df}")
drop_l3 = False
del l3_df

if drop_l3 and drop_l2:
missing_event_ids_to_drop.append(e_id)

missing_event_ids_to_drop = list(set(missing_event_ids_to_drop))

logger.warning(
f"Dropping the following {len(missing_event_ids_to_drop)} IDs from L1/L2/L3: {missing_event_ids_to_drop}"
)
l1 = l1[~l1[dg_utils.event_id].isin(missing_event_ids_to_drop)]
for level in [l2, l3]:
for impact in level.keys():
for e in missing_event_ids_to_drop:
level[impact] = level[impact][~(level[impact][dg_utils.event_id] == e)]

# Replace NaNs will NoneType
for level in [l2, l3]:
for impact in level.keys():
level[impact].replace(float("nan"), None, inplace=True)

logger.info(f"Storing results in {args.output_dir}")
pathlib.Path(args.output_dir).mkdir(parents=True, exist_ok=True)

Expand Down
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Loading