Skip to content

Commit

Permalink
Currency Convesion and Inflation Adjustment (db release) (#191)
Browse files Browse the repository at this point in the history
Co-authored-by: liniiiiii <[email protected]>
  • Loading branch information
i-be-snek and liniiiiii authored Nov 26, 2024
1 parent 12ceb4a commit d7b4427
Show file tree
Hide file tree
Showing 4,347 changed files with 6,784,500 additions and 66 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
19 changes: 19 additions & 0 deletions Database/data/currency/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
In the Wikimpacts 1.0 database, we adjusted all monetary damage values for inflation to 2024. The original data source is [here](https://www.minneapolisfed.org/about-us/monetary-policy/inflation-calculator/consumer-price-index-1800-). The file `inflation_Index_original.csv` contains data directly from this website, while `inflation_Index_2024.csv` has been adjusted to a base year of 2024, which we applied in our process.

For currency conversion, we converted the majority of non-USD currencies. The count below reflects the number of currency records in the Wikimpacts 1.0 database. We obtained the original conversion rates on a monthly scale from [this source](https://fx.sauder.ubc.ca/fxdata.php), stored in `Database/data/Currency_Conversion.xlsx`. We then aggregated these to yearly rates in `Database/data/Currency_conversion_yearly_averages.xlsx`. For conversion, we used the yearly rate, and for currencies predating the years available in our table, we applied a constant rate from the earliest available year.
| Currency | Count |
|----------|-------|
| EUR | 151 |
| AUD | 186 |
| GBP | 98 |
| INR | 84 |
| CAD | 144 |
| NZD | 43 |
| JPY | 252 |
| PHP | 158 |
| CNY | 118 |
| VND | 51 |
| KRW | 13 |
| MXN | 57 |
| NTD | 20 |
| BRL | 10 |
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/AUD-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/BRL-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/CAD-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/CNY-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/EUR-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/GBP-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/INR-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/JPY-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/KRW-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/MXN-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/NZD-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/PHP-USD.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/currency_conversion/VND-USD.csv
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/inflation_Index.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions Database/data/currency/inflation_Index_2024.csv
Git LFS file not shown
116 changes: 99 additions & 17 deletions Database/fill_data_gap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

import pandas as pd

from Database.scr.normalize_data import DataGapUtils
from Database.scr.normalize_currency import CurrencyConversion, InflationAdjustment
from Database.scr.normalize_data import DataGapUtils, DataUtils
from Database.scr.normalize_utils import Logging, NormalizeUtils

if __name__ == "__main__":
Expand All @@ -26,19 +27,28 @@
)

timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
logger = Logging.get_logger("fill-data-gap", "INFO", f"data_gap_{timestamp}.log")
logger = Logging.get_logger("fill-data-gap", "DEBUG", f"data_gap_{timestamp}.log")
args = parser.parse_args()
dg_utils = DataGapUtils()
data_utils = DataUtils()
norm_utils = NormalizeUtils()
ia_utils = InflationAdjustment()
cc_utils = CurrencyConversion()

l1, l2, l3 = dg_utils.load_data(input_dir=args.input_dir)
l1, l2, l3 = data_utils.load_data(input_dir=args.input_dir)
logger.info("Data loaded!")

# Dropping all records with Event_ID or no Main_Event and purging the records from L2/L3 (assumption: irreelvant events have no Main_Event value)
logger.info(f"Dropping all records with no Event_ID or no Main_Event in L1. Shape before: {l1.shape}")
logger.info(
f"Dropping all rows with no {dg_utils.event_id} or no {dg_utils.main_event} in L1. Shape before: {l1.shape}"
)
event_ids_to_drop = l1[l1[dg_utils.main_event].isna()][dg_utils.event_id].tolist()
l1 = l1.dropna(how="any", subset=[dg_utils.event_id, dg_utils.main_event])
logger.info(f"Dropped all records with no Event_ID or no Main_Event records in L1. Shape after: {l1.shape}")
logger.info(
f"Dropped all rows with no {dg_utils.event_id} or no {dg_utils.main_event} records in L1. Shape after: {l1.shape}"
)
l1 = l1.dropna(how="all", subset=[dg_utils.s_y, dg_utils.e_y])
logger.info(f"Dropped all row with no {dg_utils.s_y} and no {dg_utils.e_y} records in L1. Shape after: {l1.shape}")

for name, level in {"L2": l2, "L3": l3}.items():
for impact in level.keys():
Expand Down Expand Up @@ -69,6 +79,71 @@
axis=1,
)

# Replace NaNs will NoneType
l1 = l1.replace(float("nan"), None)
for level in [l2, l3]:
for impact in level.keys():
level[impact].replace(float("nan"), None, inplace=True)

for name, level in {"L2": l2, "L3": l3}.items():
logger.info(f"Dropping all records with no {dg_utils.s_y} and no {dg_utils.e_y} in {name}.")
for impact in level.keys():
level[impact] = level[impact].dropna(how="all", subset=[dg_utils.s_y, dg_utils.e_y])

logger.info("Converting currencies to USD")
for cat in ia_utils.monetary_categories:
logger.info(f"Converting currencies in L1 for category {cat}")
l1 = l1.apply(lambda x: cc_utils.normalize_row_USD(x, l1_impact=cat, level="l1", impact=cat), axis=1)
logger.info(f"Converting currencies in L2 for category {cat}")
l2[cat] = l2[cat].apply(lambda x: cc_utils.normalize_row_USD(x, l1_impact=None, level="l2", impact=cat), axis=1)
logger.info(f"Converting currencies in L3 for category {cat}")
l3[cat] = l3[cat].apply(lambda x: cc_utils.normalize_row_USD(x, l1_impact=None, level="l3", impact=cat), axis=1)

# Replace NaNs will NoneType
l1 = l1.replace(float("nan"), None)
for level in [l2, l3]:
for impact in level.keys():
level[impact].replace(float("nan"), None, inplace=True)

for cat in ia_utils.monetary_categories:
try:
assert l1[(l1[cc_utils.t_num_unit.format(cat)] != cc_utils.usd)][
~l1[cc_utils.t_num_unit.format(cat)].isnull()
].empty
except AssertionError as err:
logger.error(f"Unconverted currencies for {cat} for L1")
logger.error(
f"\n{l1[(l1[cc_utils.t_num_unit.format(cat)] != cc_utils.usd)][~l1[cc_utils.t_num_unit.format(cat)].isnull()][[x for x in l1.columns if f'Total_{cat}_' in x]]}"
)
try:
assert l2[cat][(l2[cat][cc_utils.num_unit] != cc_utils.usd) & (~l2[cat][cc_utils.num_unit].isnull())].empty
except AssertionError as err:
logger.error(f"Unconverted currencies for {cat} for L2")
logger.error(
f"\n{l2[cat][(l2[cat][cc_utils.num_unit] != cc_utils.usd) & (~l2[cat][cc_utils.num_unit].isnull())][[x for x in l2[cat].columns if 'Areas' not in x]]}"
)
try:
assert l3[cat][(l3[cat][cc_utils.num_unit] != cc_utils.usd) & (~l3[cat][cc_utils.num_unit].isnull())].empty
except AssertionError as err:
logger.error(f"Unconverted currencies for {cat} for L3")
logger.error(
f"\n{l3[cat][(l3[cat][cc_utils.num_unit] != cc_utils.usd) & (~l3[cat][cc_utils.num_unit].isnull())][[x for x in l3[cat].columns if 'Area' not in x and 'Locations' not in x]]}"
)

for cat in ia_utils.monetary_categories:
logger.info(f"Adjusting inflation for USD values in L1 to 2024 for category {cat}")
l1 = l1.apply(
lambda x: ia_utils.adjust_inflation_row_USD_2024(x, l1_impact=cat, level="l1", impact=cat), axis=1
)
logger.info(f"Adjusting inflation for USD values in L2 to 2024 for category {cat}")
l2[cat] = l2[cat].apply(
lambda x: ia_utils.adjust_inflation_row_USD_2024(x, l1_impact=None, level="l2", impact=cat), axis=1
)
logger.info(f"Adjusting inflation for USD values in L3 to 2024 for category {cat}")
l3[cat] = l3[cat].apply(
lambda x: ia_utils.adjust_inflation_row_USD_2024(x, l1_impact=None, level="l3", impact=cat), axis=1
)

# Replace NaNs will NoneType
for level in [l2, l3]:
for impact in level.keys():
Expand All @@ -90,12 +165,12 @@
for impact in l3.keys():
l3_areas = l3[impact][l3[impact][dg_utils.event_id] == e_id][f"{dg_utils.admin_area}_Norm"].tolist()

l2_areas = dg_utils.flatten(
l2_areas_list: list = dg_utils.flatten(
l2[impact][l2[impact][dg_utils.event_id] == e_id][f"{dg_utils.admin_areas}_Norm"].tolist()
)

# check l3 impacts not found in l2
areas_not_in_l2 = [x for x in l3_areas if x not in l2_areas]
areas_not_in_l2 = [x for x in l3_areas if x not in l2_areas_list]
if areas_not_in_l2:
for area in areas_not_in_l2:
logger.info(
Expand All @@ -118,21 +193,27 @@
cols.extend([dg_utils.s_d, dg_utils.s_m, dg_utils.s_y, dg_utils.e_d, dg_utils.e_m, dg_utils.e_y])
if impact.lower() in dg_utils.monetary_categories:
cols.extend([dg_utils.num_unit, dg_utils.num_inflation_adjusted, dg_utils.num_inflation_adjusted_year])
group_by_cols = [
f"{dg_utils.admin_area}_Norm",
dg_utils.s_d,
dg_utils.s_m,
dg_utils.s_y,
dg_utils.e_d,
dg_utils.e_m,
dg_utils.e_y,
]
# assuming all currencies have been converted to USD, and all USD values have been adjusted to 2024
if impact.lower() in dg_utils.monetary_categories:
group_by_cols.extend(
[dg_utils.num_unit, dg_utils.num_inflation_adjusted, dg_utils.num_inflation_adjusted_year]
)

# check if l3 impact values > l2 impact values
l3[impact][cols] = l3[impact][cols].replace({None: float("nan")})
l3_impacts = (
l3[impact][(l3[impact][dg_utils.event_id] == e_id) & (~l3[impact][dg_utils.num_min].isna())][cols]
.groupby(
[
f"{dg_utils.admin_area}_Norm",
dg_utils.s_d,
dg_utils.s_m,
dg_utils.s_y,
dg_utils.e_d,
dg_utils.e_m,
dg_utils.e_y,
],
group_by_cols,
as_index=False,
dropna=False,
)
Expand All @@ -159,7 +240,8 @@

if not l2_series.empty:
for n in range(len(l2_series)):
l2_areas, l2_idx = {}, []
l2_areas: dict = {}
l2_idx: list = []

l2_list = l2_series.iloc[n]
l2_idx = [l2_list.index(area) for area in l2_list if area not in l1_areas]
Expand Down
4 changes: 2 additions & 2 deletions Database/insert_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
x for x in data.columns if "Area" in x or "Location" in x or "Event_Names" in x or x == "Hazards"
]:
data[c] = data[c].astype(str)
data[c] = data[c].replace("nan", float("nan"))
data[c] = data[c].apply(lambda x: x.replace("nan", "None"))

for i in tqdm(range(len(data)), desc=f"Inserting {f} into {args.database_name}"):
try:
Expand Down Expand Up @@ -260,7 +260,7 @@
if not args.dry_run:
for c in [x for x in data.columns if "Area" in x or "Location" in x]:
data[c] = data[c].astype(str)
data[c] = data[c].replace("nan", float("nan"))
data[c] = data[c].apply(lambda x: x.replace("nan", "None"))
for i in tqdm(range(len(data)), desc=f"Inserting {f} into {args.database_name}"):
try:
data.iloc[i : i + 1].to_sql(
Expand Down
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Loading

0 comments on commit d7b4427

Please sign in to comment.