Skip to content

Commit

Permalink
Full run raw (#188)
Browse files Browse the repository at this point in the history
  • Loading branch information
i-be-snek authored Nov 9, 2024
1 parent 2e3bcb9 commit cbaf4d0
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 8 deletions.
6 changes: 0 additions & 6 deletions Database/insert_events.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import argparse
import ast
import os
import pathlib
import sqlite3
Expand Down Expand Up @@ -144,11 +143,6 @@
for col in event_levels[args.event_level]["location_columns"].keys():
logger.info(f"Processing GeoJson column {col}_GeoJson in {args.event_level}; File: {f}")

for i in ["GeoJson", "Norm"]:
data[f"{col}_{i}"] = data[f"{col}_{i}"].parallel_apply(
lambda x: ast.literal_eval(x) if isinstance(x, str) else []
)

data[f"{col}_GeoJson"] = data.parallel_apply(
lambda row: (
[
Expand Down
4 changes: 3 additions & 1 deletion Database/parse_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def parse_main_events(df: pd.DataFrame, target_columns: list):
if value and not isinstance(value, bool) and re.match(_yes, value)
else (False if value and not isinstance(value, bool) and re.match(_no, value) else value)
)
if not pd.isna(value)
else None
)
logger.info("STEP: Normalizing nulls")
events = utils.replace_nulls(events)
Expand Down Expand Up @@ -658,7 +660,7 @@ def get_target_cols() -> tuple[list]:


if __name__ == "__main__":
logger = Logging.get_logger("parse_events", level="INFO")
logger = Logging.get_logger("parse_events", level="INFO", filename="parse_events.log")
available_event_levels = ["l1", "l2", "l3"]
l1_target_columns, l2_target_columns, l3_target_columns = get_target_cols()

Expand Down
2 changes: 2 additions & 0 deletions Database/scr/normalize_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ def normalize_locations(
# TODO: add geojson for unsd regions
return [unsd_search_output.title(), "UNSD region", None]

# corner case
area = "China" if area == "Mainland China" else area
area = area.lower().strip()
if "_" in area:
area = area.replace("_", " ")
Expand Down
6 changes: 5 additions & 1 deletion Database/scr/normalize_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ def simple_country_check(c: str):
return False
return True if exists else False

@staticmethod
def df_to_parquet(
self,
df: pd.DataFrame,
target_dir: str,
chunk_size: int = 2000,
Expand All @@ -194,7 +194,9 @@ def df_to_parquet(
slc = df.iloc[i : i + chunk_size]
chunk = int(i / chunk_size) + begin_at
fname = os.path.join(target_dir, f"{chunk:04d}.parquet")

slc.to_parquet(fname, engine="fastparquet", **parquet_wargs)
self.logger.info(f"Output file stored in {fname}")

@staticmethod
def df_to_json(
Expand Down Expand Up @@ -679,6 +681,8 @@ def validate_currency_monetary_impact(self, row: dict) -> dict:
cols = ["Total_{}_Min", "Total_{}_Max", "Total_{}_Approx", "Total_{}_Unit", "Total_{}_Inflation_Adjusted"]

for category in ["Damage", "Insured_Damage"]:
if row[f"Total_{category}_Unit"] is None:
return row
try:
Currency(row[f"Total_{category}_Unit"])
except ValueError as err:
Expand Down
3 changes: 3 additions & 0 deletions impactdb.v1.1.raw.db
Git LFS file not shown

0 comments on commit cbaf4d0

Please sign in to comment.