Skip to content

Commit

Permalink
add nbstrip git-hook
Browse files Browse the repository at this point in the history
  • Loading branch information
elbeejay committed May 27, 2024
1 parent 081bf9a commit 398b7f2
Show file tree
Hide file tree
Showing 8 changed files with 361 additions and 265 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,6 @@ docs/notebooks/
# ignore the generated sphinx conf.py file
# we generate it during the doc build using jupyter-book from _conf.py
docs/conf.py

# ignore implementations output
notebooks/Implementations/output/
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,7 @@ repos:
args: [--fix]
- id: ruff-format
types_or: [python, pyi, jupyter]
- repo: https://github.com/kynan/nbstripout
rev: 0.7.1
hooks:
- id: nbstripout
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
"metadata": {},
"outputs": [],
"source": [
"import sys, os, json\n",
"import rasterio, geopy\n",
"import sys\n",
"import os\n",
"import json\n",
"import rasterio\n",
"\n",
"import pandas as pd\n",
"import geopandas as gpd\n",
Expand All @@ -28,7 +30,7 @@
"\n",
"# read in local important parameters\n",
"local_json = \"/home/wb411133/Code/urbanParameters.json\"\n",
"with open(local_json, 'r') as inJ:\n",
"with open(local_json, \"r\") as inJ:\n",
" important_vars = json.load(inJ)"
]
},
Expand All @@ -42,7 +44,7 @@
"output_dir = f\"/home/wb411133/data/Projects/{iso3}_Urbanization\"\n",
"if not os.path.exists(output_dir):\n",
" os.makedirs(output_dir)\n",
" \n",
"\n",
"population_file = f\"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/MOSAIC_ppp_prj_2020/ppp_prj_2020_{iso3}.tif\"\n",
"admin_bounds = \"/home/public/Data/COUNTRY/UKR/ADMIN/geoBoundaries-UKR-ADM3.geojson\"\n",
"GHSL_file = \"/home/public/Data/GLOBAL/GHSL/ghsl.vrt\"\n",
Expand All @@ -58,7 +60,7 @@
"final_folder = os.path.join(output_dir, \"Mapping_Data\")\n",
"if not os.path.exists(final_folder):\n",
" os.makedirs(final_folder)\n",
" \n",
"\n",
"admin_final = os.path.join(final_folder, \"admin_summarized.shp\")\n",
"urban_final = os.path.join(final_folder, \"urban_summarized.shp\")\n",
"urban_hd_final = os.path.join(final_folder, \"urban_hd_summarized.shp\")\n",
Expand Down Expand Up @@ -96,14 +98,24 @@
"# 1. Create urban extents\n",
"if not os.path.exists(urban_extents_file):\n",
" urban_calculator = urban.urbanGriddedPop(inP)\n",
" urban_extents = urban_calculator.calculateUrban(densVal=3, totalPopThresh=5000, \n",
" smooth=False, queen=False,\n",
" verbose=True, raster=urban_extents_raster_file)\n",
" urban_extents_hd = urban_calculator.calculateUrban(densVal=15, totalPopThresh=50000, \n",
" smooth=True, queen=False,\n",
" verbose=True, raster=, raster=urban_extents_raster_file)\n",
" urban_extents = urban_calculator.calculateUrban(\n",
" densVal=3,\n",
" totalPopThresh=5000,\n",
" smooth=False,\n",
" queen=False,\n",
" verbose=True,\n",
" raster=urban_extents_raster_file,\n",
" )\n",
" urban_extents_hd = urban_calculator.calculateUrban(\n",
" densVal=15,\n",
" totalPopThresh=50000,\n",
" smooth=True,\n",
" queen=False,\n",
" verbose=True,\n",
" raster=urban_extents_raster_file,\n",
" )\n",
" urban_extents.to_file(urban_extents_file, driver=\"GeoJSON\")\n",
" urban_extents_hd.to_file(urban_extents_hd_file, driver=\"GeoJSON\")\n"
" urban_extents_hd.to_file(urban_extents_hd_file, driver=\"GeoJSON\")"
]
},
{
Expand All @@ -113,12 +125,17 @@
"outputs": [],
"source": [
"# 2. Calculate urban population in admin areas\n",
"pop_worker = clippy.summarize_population(population_file, gpd.read_file(admin_bounds), urban_extents_raster_file, urban_extents_hd_raster_file)\n",
"pop_worker = clippy.summarize_population(\n",
" population_file,\n",
" gpd.read_file(admin_bounds),\n",
" urban_extents_raster_file,\n",
" urban_extents_hd_raster_file,\n",
")\n",
"summarized_urban = pop_worker.calculate_zonal()\n",
"urban_res = summarized_urban.loc[:,[x for x in summarized_urban.columns if \"SUM\" in x]]\n",
"urban_res.columns = ['TOTAL_POP', \"URBAN_POP\", \"URBAN_HD_POP\"]\n",
"urban_res['shapeID'] = inAdmin['shapeID']\n",
"urban_res['shapeName'] = inAdmin['shapeName']\n",
"urban_res = summarized_urban.loc[:, [x for x in summarized_urban.columns if \"SUM\" in x]]\n",
"urban_res.columns = [\"TOTAL_POP\", \"URBAN_POP\", \"URBAN_HD_POP\"]\n",
"urban_res[\"shapeID\"] = inAdmin[\"shapeID\"]\n",
"urban_res[\"shapeName\"] = inAdmin[\"shapeName\"]\n",
"urban_res.to_csv(urban_admin_summary)"
]
},
Expand Down Expand Up @@ -152,26 +169,25 @@
" urban_res_file = os.path.join(viirs_folder, f\"URBAN_{name}.csv\")\n",
" urban_hd_res_file = os.path.join(viirs_folder, f\"HD_URBAN_{name}.csv\")\n",
" admin_res_file = os.path.join(viirs_folder, f\"ADMIN_{name}.csv\")\n",
" \n",
"\n",
" # Urban Summary\n",
" if not os.path.exists(urban_res_file):\n",
" urban_res = rMisc.zonalStats(urbanD, inR, minVal=0.1)\n",
" col_names = [f'URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n",
" col_names = [f\"URBAN_{name}_{x}\" for x in [\"SUM\", \"MIN\", \"MAX\", \"MEAN\"]]\n",
" urban_df = pd.DataFrame(urban_res, columns=col_names)\n",
" urban_df.to_csv(urban_res_file)\n",
" # HD Urban Summary\n",
" if not os.path.exists(urban_hd_res_file):\n",
" hd_urban_res = rMisc.zonalStats(urbanHD, inR, minVal=0.1)\n",
" col_names = [f'HD_URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n",
" col_names = [f\"HD_URBAN_{name}_{x}\" for x in [\"SUM\", \"MIN\", \"MAX\", \"MEAN\"]]\n",
" hd_urban_df = pd.DataFrame(hd_urban_res, columns=col_names)\n",
" hd_urban_df.to_csv(urban_hd_res_file)\n",
" # admin Summary\n",
" if not os.path.exists(admin_res_file):\n",
" admin_res = rMisc.zonalStats(inAdmin, inR, minVal=0.1)\n",
" col_names = [f'ADM_URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n",
" col_names = [f\"ADM_URBAN_{name}_{x}\" for x in [\"SUM\", \"MIN\", \"MAX\", \"MEAN\"]]\n",
" admin_df = pd.DataFrame(admin_res, columns=col_names)\n",
" admin_df.to_csv(admin_res_file)\n",
" "
" admin_df.to_csv(admin_res_file)"
]
},
{
Expand All @@ -184,17 +200,17 @@
"urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"URBAN\")]\n",
"for x in urb_files:\n",
" tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n",
" urbanD[x[:-4]] = tempD.iloc[:,0]\n",
" urbanD[x[:-4]] = tempD.iloc[:, 0]\n",
"\n",
"hd_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"HD_URBAN\")]\n",
"for x in hd_urb_files:\n",
" tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n",
" urbanHD[x[:-4]] = tempD.iloc[:,0]\n",
" \n",
" urbanHD[x[:-4]] = tempD.iloc[:, 0]\n",
"\n",
"admin_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"ADMIN\")]\n",
"for x in admin_urb_files:\n",
" tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n",
" inAdmin[x[:-4]] = tempD.iloc[:,0]\n",
" inAdmin[x[:-4]] = tempD.iloc[:, 0]\n",
"\n",
"urbanD.to_csv(urban_viirs_summary)\n",
"urbanHD.to_csv(urban_hd_viirs_summary)\n",
Expand All @@ -208,45 +224,54 @@
"outputs": [],
"source": [
"# 4. Summarize GHSL in extents and admin\n",
"ghsl_cols = [f'c_{x}' for x in [1,2,3,4,5,6]]\n",
"ghsl_cols = [f\"c_{x}\" for x in [1, 2, 3, 4, 5, 6]]\n",
"admin_ghsl_summary = os.path.join(output_dir, \"admin_GHSL_summary.csv\")\n",
"urban_ghsl_summary = os.path.join(output_dir, \"urban_GHSL_summary.csv\")\n",
"urbanHD_ghsl_summary = os.path.join(output_dir, \"urbanhd_GHSL_summary.csv\")\n",
"\n",
"if not os.path.exists(admin_ghsl_summary):\n",
" res = rMisc.zonalStats(inAdmin, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)\n",
" res = pd.DataFrame(res, columns = ghsl_cols)\n",
" res['gID'] = inAdmin['shapeID']\n",
" res = rMisc.zonalStats(\n",
" inAdmin, inG, rastType=\"C\", unqVals=[1, 2, 3, 4, 5, 6], reProj=True\n",
" )\n",
" res = pd.DataFrame(res, columns=ghsl_cols)\n",
" res[\"gID\"] = inAdmin[\"shapeID\"]\n",
" res.to_csv(admin_ghsl_summary)\n",
" \n",
"\n",
"if not os.path.exists(urban_ghsl_summary):\n",
" res = rMisc.zonalStats(urbanD, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)\n",
" res = pd.DataFrame(res, columns = ghsl_cols)\n",
" res['gID'] = urbanD['ID']\n",
" res = rMisc.zonalStats(\n",
" urbanD, inG, rastType=\"C\", unqVals=[1, 2, 3, 4, 5, 6], reProj=True\n",
" )\n",
" res = pd.DataFrame(res, columns=ghsl_cols)\n",
" res[\"gID\"] = urbanD[\"ID\"]\n",
" res.to_csv(urban_ghsl_summary)\n",
" \n",
"\n",
"if not os.path.exists(urbanHD_ghsl_summary):\n",
" res = rMisc.zonalStats(urbanHD, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)\n",
" res = pd.DataFrame(res, columns = ghsl_cols)\n",
" res['gID'] = urbanHD['ID']\n",
" res = rMisc.zonalStats(\n",
" urbanHD, inG, rastType=\"C\", unqVals=[1, 2, 3, 4, 5, 6], reProj=True\n",
" )\n",
" res = pd.DataFrame(res, columns=ghsl_cols)\n",
" res[\"gID\"] = urbanHD[\"ID\"]\n",
" res.to_csv(urbanHD_ghsl_summary)\n",
" \n",
"\n",
"for ghsl_file in [admin_ghsl_summary, urban_ghsl_summary, urbanHD_ghsl_summary]:\n",
" adm_ghsl = pd.read_csv(ghsl_file, index_col=0)\n",
" adm_ghsl['b2014'] = adm_ghsl.apply(lambda x: x['c_3'] + x['c_4'] + x['c_5'] + x['c_6'], axis=1)\n",
" adm_ghsl['b2000'] = adm_ghsl.apply(lambda x: x['c_4'] + x['c_5'] + x['c_6'], axis=1)\n",
" adm_ghsl['b1990'] = adm_ghsl.apply(lambda x: x['c_5'] + x['c_6'], axis=1)\n",
" \n",
" adm_ghsl[\"b2014\"] = adm_ghsl.apply(\n",
" lambda x: x[\"c_3\"] + x[\"c_4\"] + x[\"c_5\"] + x[\"c_6\"], axis=1\n",
" )\n",
" adm_ghsl[\"b2000\"] = adm_ghsl.apply(lambda x: x[\"c_4\"] + x[\"c_5\"] + x[\"c_6\"], axis=1)\n",
" adm_ghsl[\"b1990\"] = adm_ghsl.apply(lambda x: x[\"c_5\"] + x[\"c_6\"], axis=1)\n",
"\n",
" def get_built(x):\n",
" cur_built = x['b2014']\n",
" base_built = x['b1990']\n",
" cur_built = x[\"b2014\"]\n",
" base_built = x[\"b1990\"]\n",
" if base_built == 0:\n",
" base_built = x['b2000']\n",
" base_built = x[\"b2000\"]\n",
" try:\n",
" return((cur_built - base_built)/base_built)\n",
" return (cur_built - base_built) / base_built\n",
" except:\n",
" return(-1)\n",
" adm_ghsl['g_14_90'] = adm_ghsl.apply(get_built, axis=1)\n",
" return -1\n",
"\n",
" adm_ghsl[\"g_14_90\"] = adm_ghsl.apply(get_built, axis=1)\n",
" adm_ghsl.to_csv(ghsl_file)"
]
},
Expand All @@ -266,26 +291,42 @@
"# Compile data\n",
"# [shapefile, population_summary, viirs_summary, ghsl_summary, out_file]\n",
"for cur_def in [\n",
" [admin_bounds, urban_admin_summary, admin_viirs_summary, admin_ghsl_summary, admin_final],\n",
" [urban_extents_file, '', urban_viirs_summary, urban_ghsl_summary, urban_final],\n",
" [urban_extents_hd_file, '', urban_hd_viirs_summary, urbanHD_ghsl_summary, urban_hd_final]\n",
" ]:\n",
" [\n",
" admin_bounds,\n",
" urban_admin_summary,\n",
" admin_viirs_summary,\n",
" admin_ghsl_summary,\n",
" admin_final,\n",
" ],\n",
" [urban_extents_file, \"\", urban_viirs_summary, urban_ghsl_summary, urban_final],\n",
" [\n",
" urban_extents_hd_file,\n",
" \"\",\n",
" urban_hd_viirs_summary,\n",
" urbanHD_ghsl_summary,\n",
" urban_hd_final,\n",
" ],\n",
"]:\n",
" curD = gpd.read_file(cur_def[0])\n",
" if cur_def[1] != '':\n",
" curPop = pd.read_csv(cur_def[1], index_col=0) \n",
" curD['Pop'] = curPop['TOTAL_POP']\n",
" curD['urbanPop'] = curPop.apply(lambda x: x['URBAN_POP']/x['TOTAL_POP'], axis=1)\n",
" curD['urbanPopHD'] = curPop.apply(lambda x: x['URBAN_HD_POP']/x['TOTAL_POP'], axis=1)\n",
" if cur_def[1] != \"\":\n",
" curPop = pd.read_csv(cur_def[1], index_col=0)\n",
" curD[\"Pop\"] = curPop[\"TOTAL_POP\"]\n",
" curD[\"urbanPop\"] = curPop.apply(\n",
" lambda x: x[\"URBAN_POP\"] / x[\"TOTAL_POP\"], axis=1\n",
" )\n",
" curD[\"urbanPopHD\"] = curPop.apply(\n",
" lambda x: x[\"URBAN_HD_POP\"] / x[\"TOTAL_POP\"], axis=1\n",
" )\n",
" viirsD = pd.read_csv(cur_def[2], index_col=0)\n",
" curD['NTL2013'] = viirsD.iloc[:,-8]\n",
" curD['NTL2020'] = viirsD.iloc[:,-1]\n",
" curD['NTL_g'] = curD.apply(lambda x: (x['NTL2020'] - x['NTL2013'])/x['NTL2013'], axis=1)\n",
" curD[\"NTL2013\"] = viirsD.iloc[:, -8]\n",
" curD[\"NTL2020\"] = viirsD.iloc[:, -1]\n",
" curD[\"NTL_g\"] = curD.apply(\n",
" lambda x: (x[\"NTL2020\"] - x[\"NTL2013\"]) / x[\"NTL2013\"], axis=1\n",
" )\n",
" ghslD = pd.read_csv(cur_def[3], index_col=0)\n",
" curD['b2014'] = ghslD['b2014']\n",
" curD['g_14_90'] = ghslD['g_14_90']\n",
" curD.to_file(cur_def[4])\n",
" \n",
" "
" curD[\"b2014\"] = ghslD[\"b2014\"]\n",
" curD[\"g_14_90\"] = ghslD[\"g_14_90\"]\n",
" curD.to_file(cur_def[4])"
]
},
{
Expand Down Expand Up @@ -352,10 +393,10 @@
"source": [
"if not os.path.exists(out_ntl_2013):\n",
" rMisc.clipRaster(rasterio.open(ntl_files[1]), inAdmin, out_ntl_2013)\n",
" \n",
"\n",
"if not os.path.exists(out_ntl_2014):\n",
" rMisc.clipRaster(rasterio.open(ntl_files[2]), inAdmin, out_ntl_2014)\n",
" \n",
"\n",
"if not os.path.exists(out_ntl_2020):\n",
" rMisc.clipRaster(rasterio.open(ntl_files[-1]), inAdmin, out_ntl_2020)"
]
Expand All @@ -382,7 +423,13 @@
"for idx, row in in_cities.iterrows():\n",
" out_file = os.path.join(final_folder, f\"ghsl_{row['Name']}.tif\")\n",
" if not os.path.exists(out_file):\n",
" rMisc.clipRaster(inG, gpd.GeoDataFrame(pd.DataFrame(row).transpose(), geometry='geometry', crs=in_cities.crs), out_file)\n",
" rMisc.clipRaster(\n",
" inG,\n",
" gpd.GeoDataFrame(\n",
" pd.DataFrame(row).transpose(), geometry=\"geometry\", crs=in_cities.crs\n",
" ),\n",
" out_file,\n",
" )\n",
" cnt += 1\n",
" if cnt >= max_cnt:\n",
" break"
Expand All @@ -397,8 +444,14 @@
}
],
"metadata": {
"kernelspec": {
"display_name": "worldbank",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"name": "python",
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 398b7f2

Please sign in to comment.