diff --git a/.gitignore b/.gitignore index 6355091..cc501b8 100644 --- a/.gitignore +++ b/.gitignore @@ -137,3 +137,6 @@ docs/notebooks/ # ignore the generated sphinx conf.py file # we generate it during the doc build using jupyter-book from _conf.py docs/conf.py + +# ignore implementations output +notebooks/Implementations/output/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ddb6da7..79a2964 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,3 +37,7 @@ repos: args: [--fix] - id: ruff-format types_or: [python, pyi, jupyter] + - repo: https://github.com/kynan/nbstripout + rev: 0.7.1 + hooks: + - id: nbstripout diff --git a/notebooks/Implementations/URB_SCAUR_UKR_B_I_urbanizationReview/URB_SCAUR_UKR_B_I_urbanizationReview.ipynb b/notebooks/Implementations/URB_SCAUR_UKR_B_I_urbanizationReview/URB_SCAUR_UKR_B_I_urbanizationReview.ipynb index 529be2a..4d6ab6d 100644 --- a/notebooks/Implementations/URB_SCAUR_UKR_B_I_urbanizationReview/URB_SCAUR_UKR_B_I_urbanizationReview.ipynb +++ b/notebooks/Implementations/URB_SCAUR_UKR_B_I_urbanizationReview/URB_SCAUR_UKR_B_I_urbanizationReview.ipynb @@ -6,8 +6,10 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, json\n", - "import rasterio, geopy\n", + "import sys\n", + "import os\n", + "import json\n", + "import rasterio\n", "\n", "import pandas as pd\n", "import geopandas as gpd\n", @@ -28,7 +30,7 @@ "\n", "# read in local important parameters\n", "local_json = \"/home/wb411133/Code/urbanParameters.json\"\n", - "with open(local_json, 'r') as inJ:\n", + "with open(local_json, \"r\") as inJ:\n", " important_vars = json.load(inJ)" ] }, @@ -42,7 +44,7 @@ "output_dir = f\"/home/wb411133/data/Projects/{iso3}_Urbanization\"\n", "if not os.path.exists(output_dir):\n", " os.makedirs(output_dir)\n", - " \n", + "\n", "population_file = f\"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/MOSAIC_ppp_prj_2020/ppp_prj_2020_{iso3}.tif\"\n", "admin_bounds = \"/home/public/Data/COUNTRY/UKR/ADMIN/geoBoundaries-UKR-ADM3.geojson\"\n", "GHSL_file = \"/home/public/Data/GLOBAL/GHSL/ghsl.vrt\"\n", @@ -58,7 +60,7 @@ "final_folder = os.path.join(output_dir, \"Mapping_Data\")\n", "if not os.path.exists(final_folder):\n", " os.makedirs(final_folder)\n", - " \n", + "\n", "admin_final = os.path.join(final_folder, \"admin_summarized.shp\")\n", "urban_final = os.path.join(final_folder, \"urban_summarized.shp\")\n", "urban_hd_final = os.path.join(final_folder, \"urban_hd_summarized.shp\")\n", @@ -96,14 +98,24 @@ "# 1. Create urban extents\n", "if not os.path.exists(urban_extents_file):\n", " urban_calculator = urban.urbanGriddedPop(inP)\n", - " urban_extents = urban_calculator.calculateUrban(densVal=3, totalPopThresh=5000, \n", - " smooth=False, queen=False,\n", - " verbose=True, raster=urban_extents_raster_file)\n", - " urban_extents_hd = urban_calculator.calculateUrban(densVal=15, totalPopThresh=50000, \n", - " smooth=True, queen=False,\n", - " verbose=True, raster=, raster=urban_extents_raster_file)\n", + " urban_extents = urban_calculator.calculateUrban(\n", + " densVal=3,\n", + " totalPopThresh=5000,\n", + " smooth=False,\n", + " queen=False,\n", + " verbose=True,\n", + " raster=urban_extents_raster_file,\n", + " )\n", + " urban_extents_hd = urban_calculator.calculateUrban(\n", + " densVal=15,\n", + " totalPopThresh=50000,\n", + " smooth=True,\n", + " queen=False,\n", + " verbose=True,\n", + " raster=urban_extents_raster_file,\n", + " )\n", " urban_extents.to_file(urban_extents_file, driver=\"GeoJSON\")\n", - " urban_extents_hd.to_file(urban_extents_hd_file, driver=\"GeoJSON\")\n" + " urban_extents_hd.to_file(urban_extents_hd_file, driver=\"GeoJSON\")" ] }, { @@ -113,12 +125,17 @@ "outputs": [], "source": [ "# 2. Calculate urban population in admin areas\n", - "pop_worker = clippy.summarize_population(population_file, gpd.read_file(admin_bounds), urban_extents_raster_file, urban_extents_hd_raster_file)\n", + "pop_worker = clippy.summarize_population(\n", + " population_file,\n", + " gpd.read_file(admin_bounds),\n", + " urban_extents_raster_file,\n", + " urban_extents_hd_raster_file,\n", + ")\n", "summarized_urban = pop_worker.calculate_zonal()\n", - "urban_res = summarized_urban.loc[:,[x for x in summarized_urban.columns if \"SUM\" in x]]\n", - "urban_res.columns = ['TOTAL_POP', \"URBAN_POP\", \"URBAN_HD_POP\"]\n", - "urban_res['shapeID'] = inAdmin['shapeID']\n", - "urban_res['shapeName'] = inAdmin['shapeName']\n", + "urban_res = summarized_urban.loc[:, [x for x in summarized_urban.columns if \"SUM\" in x]]\n", + "urban_res.columns = [\"TOTAL_POP\", \"URBAN_POP\", \"URBAN_HD_POP\"]\n", + "urban_res[\"shapeID\"] = inAdmin[\"shapeID\"]\n", + "urban_res[\"shapeName\"] = inAdmin[\"shapeName\"]\n", "urban_res.to_csv(urban_admin_summary)" ] }, @@ -152,26 +169,25 @@ " urban_res_file = os.path.join(viirs_folder, f\"URBAN_{name}.csv\")\n", " urban_hd_res_file = os.path.join(viirs_folder, f\"HD_URBAN_{name}.csv\")\n", " admin_res_file = os.path.join(viirs_folder, f\"ADMIN_{name}.csv\")\n", - " \n", + "\n", " # Urban Summary\n", " if not os.path.exists(urban_res_file):\n", " urban_res = rMisc.zonalStats(urbanD, inR, minVal=0.1)\n", - " col_names = [f'URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n", + " col_names = [f\"URBAN_{name}_{x}\" for x in [\"SUM\", \"MIN\", \"MAX\", \"MEAN\"]]\n", " urban_df = pd.DataFrame(urban_res, columns=col_names)\n", " urban_df.to_csv(urban_res_file)\n", " # HD Urban Summary\n", " if not os.path.exists(urban_hd_res_file):\n", " hd_urban_res = rMisc.zonalStats(urbanHD, inR, minVal=0.1)\n", - " col_names = [f'HD_URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n", + " col_names = [f\"HD_URBAN_{name}_{x}\" for x in [\"SUM\", \"MIN\", \"MAX\", \"MEAN\"]]\n", " hd_urban_df = pd.DataFrame(hd_urban_res, columns=col_names)\n", " hd_urban_df.to_csv(urban_hd_res_file)\n", " # admin Summary\n", " if not os.path.exists(admin_res_file):\n", " admin_res = rMisc.zonalStats(inAdmin, inR, minVal=0.1)\n", - " col_names = [f'ADM_URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n", + " col_names = [f\"ADM_URBAN_{name}_{x}\" for x in [\"SUM\", \"MIN\", \"MAX\", \"MEAN\"]]\n", " admin_df = pd.DataFrame(admin_res, columns=col_names)\n", - " admin_df.to_csv(admin_res_file)\n", - " " + " admin_df.to_csv(admin_res_file)" ] }, { @@ -184,17 +200,17 @@ "urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"URBAN\")]\n", "for x in urb_files:\n", " tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n", - " urbanD[x[:-4]] = tempD.iloc[:,0]\n", + " urbanD[x[:-4]] = tempD.iloc[:, 0]\n", "\n", "hd_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"HD_URBAN\")]\n", "for x in hd_urb_files:\n", " tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n", - " urbanHD[x[:-4]] = tempD.iloc[:,0]\n", - " \n", + " urbanHD[x[:-4]] = tempD.iloc[:, 0]\n", + "\n", "admin_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"ADMIN\")]\n", "for x in admin_urb_files:\n", " tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n", - " inAdmin[x[:-4]] = tempD.iloc[:,0]\n", + " inAdmin[x[:-4]] = tempD.iloc[:, 0]\n", "\n", "urbanD.to_csv(urban_viirs_summary)\n", "urbanHD.to_csv(urban_hd_viirs_summary)\n", @@ -208,45 +224,54 @@ "outputs": [], "source": [ "# 4. Summarize GHSL in extents and admin\n", - "ghsl_cols = [f'c_{x}' for x in [1,2,3,4,5,6]]\n", + "ghsl_cols = [f\"c_{x}\" for x in [1, 2, 3, 4, 5, 6]]\n", "admin_ghsl_summary = os.path.join(output_dir, \"admin_GHSL_summary.csv\")\n", "urban_ghsl_summary = os.path.join(output_dir, \"urban_GHSL_summary.csv\")\n", "urbanHD_ghsl_summary = os.path.join(output_dir, \"urbanhd_GHSL_summary.csv\")\n", "\n", "if not os.path.exists(admin_ghsl_summary):\n", - " res = rMisc.zonalStats(inAdmin, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)\n", - " res = pd.DataFrame(res, columns = ghsl_cols)\n", - " res['gID'] = inAdmin['shapeID']\n", + " res = rMisc.zonalStats(\n", + " inAdmin, inG, rastType=\"C\", unqVals=[1, 2, 3, 4, 5, 6], reProj=True\n", + " )\n", + " res = pd.DataFrame(res, columns=ghsl_cols)\n", + " res[\"gID\"] = inAdmin[\"shapeID\"]\n", " res.to_csv(admin_ghsl_summary)\n", - " \n", + "\n", "if not os.path.exists(urban_ghsl_summary):\n", - " res = rMisc.zonalStats(urbanD, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)\n", - " res = pd.DataFrame(res, columns = ghsl_cols)\n", - " res['gID'] = urbanD['ID']\n", + " res = rMisc.zonalStats(\n", + " urbanD, inG, rastType=\"C\", unqVals=[1, 2, 3, 4, 5, 6], reProj=True\n", + " )\n", + " res = pd.DataFrame(res, columns=ghsl_cols)\n", + " res[\"gID\"] = urbanD[\"ID\"]\n", " res.to_csv(urban_ghsl_summary)\n", - " \n", + "\n", "if not os.path.exists(urbanHD_ghsl_summary):\n", - " res = rMisc.zonalStats(urbanHD, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)\n", - " res = pd.DataFrame(res, columns = ghsl_cols)\n", - " res['gID'] = urbanHD['ID']\n", + " res = rMisc.zonalStats(\n", + " urbanHD, inG, rastType=\"C\", unqVals=[1, 2, 3, 4, 5, 6], reProj=True\n", + " )\n", + " res = pd.DataFrame(res, columns=ghsl_cols)\n", + " res[\"gID\"] = urbanHD[\"ID\"]\n", " res.to_csv(urbanHD_ghsl_summary)\n", - " \n", + "\n", "for ghsl_file in [admin_ghsl_summary, urban_ghsl_summary, urbanHD_ghsl_summary]:\n", " adm_ghsl = pd.read_csv(ghsl_file, index_col=0)\n", - " adm_ghsl['b2014'] = adm_ghsl.apply(lambda x: x['c_3'] + x['c_4'] + x['c_5'] + x['c_6'], axis=1)\n", - " adm_ghsl['b2000'] = adm_ghsl.apply(lambda x: x['c_4'] + x['c_5'] + x['c_6'], axis=1)\n", - " adm_ghsl['b1990'] = adm_ghsl.apply(lambda x: x['c_5'] + x['c_6'], axis=1)\n", - " \n", + " adm_ghsl[\"b2014\"] = adm_ghsl.apply(\n", + " lambda x: x[\"c_3\"] + x[\"c_4\"] + x[\"c_5\"] + x[\"c_6\"], axis=1\n", + " )\n", + " adm_ghsl[\"b2000\"] = adm_ghsl.apply(lambda x: x[\"c_4\"] + x[\"c_5\"] + x[\"c_6\"], axis=1)\n", + " adm_ghsl[\"b1990\"] = adm_ghsl.apply(lambda x: x[\"c_5\"] + x[\"c_6\"], axis=1)\n", + "\n", " def get_built(x):\n", - " cur_built = x['b2014']\n", - " base_built = x['b1990']\n", + " cur_built = x[\"b2014\"]\n", + " base_built = x[\"b1990\"]\n", " if base_built == 0:\n", - " base_built = x['b2000']\n", + " base_built = x[\"b2000\"]\n", " try:\n", - " return((cur_built - base_built)/base_built)\n", + " return (cur_built - base_built) / base_built\n", " except:\n", - " return(-1)\n", - " adm_ghsl['g_14_90'] = adm_ghsl.apply(get_built, axis=1)\n", + " return -1\n", + "\n", + " adm_ghsl[\"g_14_90\"] = adm_ghsl.apply(get_built, axis=1)\n", " adm_ghsl.to_csv(ghsl_file)" ] }, @@ -266,26 +291,42 @@ "# Compile data\n", "# [shapefile, population_summary, viirs_summary, ghsl_summary, out_file]\n", "for cur_def in [\n", - " [admin_bounds, urban_admin_summary, admin_viirs_summary, admin_ghsl_summary, admin_final],\n", - " [urban_extents_file, '', urban_viirs_summary, urban_ghsl_summary, urban_final],\n", - " [urban_extents_hd_file, '', urban_hd_viirs_summary, urbanHD_ghsl_summary, urban_hd_final]\n", - " ]:\n", + " [\n", + " admin_bounds,\n", + " urban_admin_summary,\n", + " admin_viirs_summary,\n", + " admin_ghsl_summary,\n", + " admin_final,\n", + " ],\n", + " [urban_extents_file, \"\", urban_viirs_summary, urban_ghsl_summary, urban_final],\n", + " [\n", + " urban_extents_hd_file,\n", + " \"\",\n", + " urban_hd_viirs_summary,\n", + " urbanHD_ghsl_summary,\n", + " urban_hd_final,\n", + " ],\n", + "]:\n", " curD = gpd.read_file(cur_def[0])\n", - " if cur_def[1] != '':\n", - " curPop = pd.read_csv(cur_def[1], index_col=0) \n", - " curD['Pop'] = curPop['TOTAL_POP']\n", - " curD['urbanPop'] = curPop.apply(lambda x: x['URBAN_POP']/x['TOTAL_POP'], axis=1)\n", - " curD['urbanPopHD'] = curPop.apply(lambda x: x['URBAN_HD_POP']/x['TOTAL_POP'], axis=1)\n", + " if cur_def[1] != \"\":\n", + " curPop = pd.read_csv(cur_def[1], index_col=0)\n", + " curD[\"Pop\"] = curPop[\"TOTAL_POP\"]\n", + " curD[\"urbanPop\"] = curPop.apply(\n", + " lambda x: x[\"URBAN_POP\"] / x[\"TOTAL_POP\"], axis=1\n", + " )\n", + " curD[\"urbanPopHD\"] = curPop.apply(\n", + " lambda x: x[\"URBAN_HD_POP\"] / x[\"TOTAL_POP\"], axis=1\n", + " )\n", " viirsD = pd.read_csv(cur_def[2], index_col=0)\n", - " curD['NTL2013'] = viirsD.iloc[:,-8]\n", - " curD['NTL2020'] = viirsD.iloc[:,-1]\n", - " curD['NTL_g'] = curD.apply(lambda x: (x['NTL2020'] - x['NTL2013'])/x['NTL2013'], axis=1)\n", + " curD[\"NTL2013\"] = viirsD.iloc[:, -8]\n", + " curD[\"NTL2020\"] = viirsD.iloc[:, -1]\n", + " curD[\"NTL_g\"] = curD.apply(\n", + " lambda x: (x[\"NTL2020\"] - x[\"NTL2013\"]) / x[\"NTL2013\"], axis=1\n", + " )\n", " ghslD = pd.read_csv(cur_def[3], index_col=0)\n", - " curD['b2014'] = ghslD['b2014']\n", - " curD['g_14_90'] = ghslD['g_14_90']\n", - " curD.to_file(cur_def[4])\n", - " \n", - " " + " curD[\"b2014\"] = ghslD[\"b2014\"]\n", + " curD[\"g_14_90\"] = ghslD[\"g_14_90\"]\n", + " curD.to_file(cur_def[4])" ] }, { @@ -352,10 +393,10 @@ "source": [ "if not os.path.exists(out_ntl_2013):\n", " rMisc.clipRaster(rasterio.open(ntl_files[1]), inAdmin, out_ntl_2013)\n", - " \n", + "\n", "if not os.path.exists(out_ntl_2014):\n", " rMisc.clipRaster(rasterio.open(ntl_files[2]), inAdmin, out_ntl_2014)\n", - " \n", + "\n", "if not os.path.exists(out_ntl_2020):\n", " rMisc.clipRaster(rasterio.open(ntl_files[-1]), inAdmin, out_ntl_2020)" ] @@ -382,7 +423,13 @@ "for idx, row in in_cities.iterrows():\n", " out_file = os.path.join(final_folder, f\"ghsl_{row['Name']}.tif\")\n", " if not os.path.exists(out_file):\n", - " rMisc.clipRaster(inG, gpd.GeoDataFrame(pd.DataFrame(row).transpose(), geometry='geometry', crs=in_cities.crs), out_file)\n", + " rMisc.clipRaster(\n", + " inG,\n", + " gpd.GeoDataFrame(\n", + " pd.DataFrame(row).transpose(), geometry=\"geometry\", crs=in_cities.crs\n", + " ),\n", + " out_file,\n", + " )\n", " cnt += 1\n", " if cnt >= max_cnt:\n", " break" @@ -397,8 +444,14 @@ } ], "metadata": { + "kernelspec": { + "display_name": "worldbank", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "name": "python", + "version": "3.10.13" } }, "nbformat": 4, diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb index 9d07232..606b965 100644 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3d5639a9", + "id": "0", "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "markdown", - "id": "79b8cf0a", + "id": "1", "metadata": {}, "source": [ "# Mosaic the DoU layers" @@ -44,7 +44,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b7e89905", + "id": "2", "metadata": {}, "outputs": [], "source": [ @@ -84,7 +84,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e1c94276", + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -103,7 +103,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5ad5ea55", + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -129,7 +129,7 @@ }, { "cell_type": "markdown", - "id": "25e95213", + "id": "5", "metadata": {}, "source": [ "# Mosaick data from PP" @@ -138,7 +138,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cd0122d2", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -177,7 +177,7 @@ { "cell_type": "code", "execution_count": null, - "id": "135aeb41", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -196,7 +196,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4b7350ea", + "id": "8", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/NGA_specific_results.ipynb b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/NGA_specific_results.ipynb index 024053c..dcd624a 100644 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/NGA_specific_results.ipynb +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/NGA_specific_results.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "713958e3", + "id": "0", "metadata": {}, "source": [ "# Summarizing Urbanization in Nigeria\n", @@ -13,40 +13,33 @@ { "cell_type": "code", "execution_count": null, - "id": "5f74a6af", + "id": "1", "metadata": {}, "outputs": [], "source": [ - "import sys, os, importlib, shutil, pathlib, datetime, math\n", - "import requests\n", - "import rasterio, elevation, richdem\n", + "import sys\n", + "import os\n", + "import rasterio\n", "import rasterio.warp\n", "\n", "import pandas as pd\n", "import geopandas as gpd\n", - "import numpy as np\n", "\n", - "from shapely.geometry import MultiPolygon, Polygon, box, Point\n", - "from rasterio import features\n", - "from datetime import datetime\n", + "from shapely.geometry import Point\n", "\n", "from tqdm.notebook import tqdm\n", "\n", - "#Import raster helpers\n", + "# Import raster helpers\n", "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")\n", "\n", - "import GOSTRocks.rasterMisc as rMisc\n", "import GOSTRocks.dataMisc as dataMisc\n", - "import GOSTRocks.metadataMisc as meta\n", "from GOSTRocks.misc import tPrint\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "sys.path.append(\"../../../src\")\n", - "import GOST_Urban.UrbanRaster as urban\n", "import GOST_Urban.urban_helper as helper\n", "\n", - "#Import local functions\n", - "import novelUrbanization as nu\n", + "# Import local functions\n", "from novelUrbanization import *\n", "\n", "%load_ext autoreload\n", @@ -56,7 +49,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1435e666", + "id": "2", "metadata": {}, "outputs": [], "source": [ @@ -66,7 +59,7 @@ }, { "cell_type": "markdown", - "id": "222a9dfe", + "id": "3", "metadata": {}, "source": [ "The urbanization files were downloaded from the GOST AWS bucket; but the whole thing could work directly off that." @@ -75,7 +68,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d692b6bc", + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -88,25 +81,29 @@ "\n", "for root, dirs, files in os.walk(urban_folder):\n", " for f in files:\n", - " if f.startswith('nga_'): # grab all the 250m resolution files\n", + " if f.startswith(\"nga_\"): # grab all the 250m resolution files\n", " if f.endswith(\"_urban.tif\") or f.endswith(\"_urban_hd.tif\"):\n", " dou_urban_files.append(os.path.join(root, f))\n", " if f.endswith(\"_cc.tif\") or f.endswith(\"_co.tif\") or f.endswith(\"_ur.tif\"):\n", " db_urban_files.append(os.path.join(root, f))\n", - " if f.startswith('nga1k_'): # grab all the 1km resolution files\n", + " if f.startswith(\"nga1k_\"): # grab all the 1km resolution files\n", " if f.endswith(\"_urban.tif\") or f.endswith(\"_urban_hd.tif\"):\n", " dou_urban_1k_files.append(os.path.join(root, f))\n", " if f.endswith(\"_cc.tif\") or f.endswith(\"_co.tif\") or f.endswith(\"_ur.tif\"):\n", " db_urban_1k_files.append(os.path.join(root, f))\n", - " \n", - "pop_files = list(set([\"_\".join(os.path.basename(x).split(\"_\")[:2]) + \".tif\" for x in dou_urban_files]))\n", + "\n", + "pop_files = list(\n", + " set(\n", + " [\"_\".join(os.path.basename(x).split(\"_\")[:2]) + \".tif\" for x in dou_urban_files]\n", + " )\n", + ")\n", "pop_files = [os.path.join(urban_folder, x) for x in pop_files]" ] }, { "cell_type": "code", "execution_count": null, - "id": "24559503", + "id": "5", "metadata": {}, "outputs": [], "source": [ @@ -116,7 +113,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b43d968f", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -127,35 +124,40 @@ { "cell_type": "code", "execution_count": null, - "id": "12198c1c", + "id": "7", "metadata": {}, "outputs": [], "source": [ - "hh_2018 = os.path.join(hh_folder, 'NGA_2018_to_GIS.csv')\n", - "hh_2022 = os.path.join(hh_folder, 'NGA_2022_to_GIS.csv')\n", + "hh_2018 = os.path.join(hh_folder, \"NGA_2018_to_GIS.csv\")\n", + "hh_2022 = os.path.join(hh_folder, \"NGA_2022_to_GIS.csv\")\n", + "\n", "\n", "def read_file(in_file):\n", " curD = pd.read_csv(in_file)\n", - " geoms = [Point(x) for x in zip(curD['hh_gps_longitude'], curD['hh_gps_latitude'])]\n", + " geoms = [Point(x) for x in zip(curD[\"hh_gps_longitude\"], curD[\"hh_gps_latitude\"])]\n", " curD = gpd.GeoDataFrame(curD, geometry=geoms, crs=4326)\n", - " return(curD)\n", + " return curD\n", + "\n", "\n", "hh_2018_data = read_file(hh_2018).to_crs(template_r.crs)\n", "hh_2022_data = read_file(hh_2022).to_crs(template_r.crs)\n", "\n", + "\n", "def get_pair(t):\n", " try:\n", - " return([t.x, t.y])\n", + " return [t.x, t.y]\n", " except:\n", - " return([0,0])\n", - "hh_2018_pairs = [get_pair(t) for t in hh_2018_data['geometry']]\n", - "hh_2022_pairs = [get_pair(t) for t in hh_2022_data['geometry']]" + " return [0, 0]\n", + "\n", + "\n", + "hh_2018_pairs = [get_pair(t) for t in hh_2018_data[\"geometry\"]]\n", + "hh_2022_pairs = [get_pair(t) for t in hh_2022_data[\"geometry\"]]" ] }, { "cell_type": "code", "execution_count": null, - "id": "394a87ba", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -168,39 +170,42 @@ " cur_name = os.path.basename(urban_file).replace(\".tif\", \"\")\n", " cur_res_2018 = [x[0] for x in list(curR.sample(hh_2018_pairs))]\n", " out_hh_2018[cur_name] = cur_res_2018\n", - " \n", + "\n", " cur_res_2022 = [x[0] for x in list(curR.sample(hh_2022_pairs))]\n", - " out_hh_2022[cur_name] = cur_res_2022\n", - " " + " out_hh_2022[cur_name] = cur_res_2022" ] }, { "cell_type": "code", "execution_count": null, - "id": "a1bf419b", + "id": "9", "metadata": {}, "outputs": [], "source": [ "out_hh_2018 = out_hh_2018.to_crs(4326)\n", - "out_hh_2018.to_file(os.path.join(hh_folder, 'hh_2018.geojson'), driver='GeoJSON')\n", - "pd.DataFrame(out_hh_2018.drop(['geometry'], axis=1)).to_csv(os.path.join(hh_folder, 'hh_2018_urban_attributed.csv'))\n", + "out_hh_2018.to_file(os.path.join(hh_folder, \"hh_2018.geojson\"), driver=\"GeoJSON\")\n", + "pd.DataFrame(out_hh_2018.drop([\"geometry\"], axis=1)).to_csv(\n", + " os.path.join(hh_folder, \"hh_2018_urban_attributed.csv\")\n", + ")\n", "\n", "out_hh_2022 = out_hh_2022.to_crs(4326)\n", - "out_hh_2022.to_file(os.path.join(hh_folder, 'hh_2022.geojson'), driver='GeoJSON')\n", - "pd.DataFrame(out_hh_2022.drop(['geometry'], axis=1)).to_csv(os.path.join(hh_folder, 'hh_2022_urban_attributed.csv'))" + "out_hh_2022.to_file(os.path.join(hh_folder, \"hh_2022.geojson\"), driver=\"GeoJSON\")\n", + "pd.DataFrame(out_hh_2022.drop([\"geometry\"], axis=1)).to_csv(\n", + " os.path.join(hh_folder, \"hh_2022_urban_attributed.csv\")\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "56c98b58", + "id": "10", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "475dc925", + "id": "11", "metadata": {}, "source": [ "# Zonal stats at administrative level 2" @@ -209,20 +214,20 @@ { "cell_type": "code", "execution_count": null, - "id": "3a3eedec", + "id": "12", "metadata": {}, "outputs": [], "source": [ "# Run zonal stats at admin 2\n", - "adm2_bounds = dataMisc.get_geoboundaries('NGA', 'ADM2')\n", - "#adm1_bounds = dataMisc.get_geoboundaries('NGA', 'ADM1')\n", + "adm2_bounds = dataMisc.get_geoboundaries(\"NGA\", \"ADM2\")\n", + "# adm1_bounds = dataMisc.get_geoboundaries('NGA', 'ADM1')\n", "adm1_bounds = gpd.read_file(os.path.join(urban_folder, \"new_lga_nigeria_2003.shp\"))" ] }, { "cell_type": "code", "execution_count": null, - "id": "aed59725", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -232,7 +237,7 @@ { "cell_type": "code", "execution_count": null, - "id": "88989155", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -242,7 +247,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d95c1f11", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -252,7 +257,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0a512913", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -262,70 +267,78 @@ { "cell_type": "code", "execution_count": null, - "id": "645ddb44", + "id": "17", "metadata": {}, "outputs": [], "source": [ "final_res = adm1_bounds.copy()\n", "for pop_layer in pop_files:\n", " # zonal stats on DOU filess\n", - " pop_name = os.path.basename(pop_layer)[:-4] \n", - " dou_urban_file = os.path.join(urban_folder, f'{pop_name}_urban.tif')\n", - " dou_hd_urban_file = os.path.join(urban_folder, f'{pop_name}_urban_hd.tif')\n", - " \n", - " help_xx = helper.summarize_population(pop_layer, adm1_bounds, dou_urban_file, dou_hd_urban_file)\n", + " pop_name = os.path.basename(pop_layer)[:-4]\n", + " dou_urban_file = os.path.join(urban_folder, f\"{pop_name}_urban.tif\")\n", + " dou_hd_urban_file = os.path.join(urban_folder, f\"{pop_name}_urban_hd.tif\")\n", + "\n", + " help_xx = helper.summarize_population(\n", + " pop_layer, adm1_bounds, dou_urban_file, dou_hd_urban_file\n", + " )\n", " zonal_res = help_xx.calculate_zonal()\n", - " zonal_res = zonal_res.loc[:,[x for x in zonal_res.columns if \"SUM\" in x]]\n", + " zonal_res = zonal_res.loc[:, [x for x in zonal_res.columns if \"SUM\" in x]]\n", " for col in zonal_res.columns:\n", " final_res[col] = zonal_res[col]\n", - " \n", + "\n", " # zonal stats on DB files\n", - " db_cc_file = os.path.join(urban_folder, f'{pop_name}d10b3000_cc.tif')\n", - " db_co_file = os.path.join(urban_folder, f'{pop_name}d10b3000_co.tif')\n", - " db_ur_file = os.path.join(urban_folder, f'{pop_name}d10b3000_ur.tif')\n", + " db_cc_file = os.path.join(urban_folder, f\"{pop_name}d10b3000_cc.tif\")\n", + " db_co_file = os.path.join(urban_folder, f\"{pop_name}d10b3000_co.tif\")\n", + " db_ur_file = os.path.join(urban_folder, f\"{pop_name}d10b3000_ur.tif\")\n", " if os.path.exists(db_cc_file):\n", - " help_xx = helper.summarize_population(pop_layer, adm1_bounds, db_cc_file, db_co_file)\n", + " help_xx = helper.summarize_population(\n", + " pop_layer, adm1_bounds, db_cc_file, db_co_file\n", + " )\n", " zonal_res = help_xx.calculate_zonal()\n", - " zonal_res = zonal_res.loc[:,[x for x in zonal_res.columns if \"SUM\" in x]]\n", + " zonal_res = zonal_res.loc[:, [x for x in zonal_res.columns if \"SUM\" in x]]\n", " for col in zonal_res.columns:\n", " final_res[col] = zonal_res[col]\n", "\n", - " help_xx = helper.summarize_population(pop_layer, adm1_bounds, db_ur_file, db_co_file)\n", + " help_xx = helper.summarize_population(\n", + " pop_layer, adm1_bounds, db_ur_file, db_co_file\n", + " )\n", " zonal_res = help_xx.calculate_zonal()\n", - " zonal_res = zonal_res.loc[:,[x for x in zonal_res.columns if \"SUM\" in x]]\n", + " zonal_res = zonal_res.loc[:, [x for x in zonal_res.columns if \"SUM\" in x]]\n", " for col in zonal_res.columns:\n", " final_res[col] = zonal_res[col]\n", " else:\n", " tPrint(f\"Cannot process {pop_name} for DB\")\n", - " \n", + "\n", " tPrint(pop_name)" ] }, { "cell_type": "code", "execution_count": null, - "id": "16eb2887", + "id": "18", "metadata": {}, "outputs": [], "source": [ - "final_res.to_file(os.path.join(urban_folder, \"new_lga_nigeria_2003_URBAN_POP.shp\" ))\n", - "pd.DataFrame(final_res.drop([\"geometry\"], axis=1)).to_csv(os.path.join(urban_folder, \"new_lga_nigeria_2003_URBAN_POP.csv\"))" + "final_res.to_file(os.path.join(urban_folder, \"new_lga_nigeria_2003_URBAN_POP.shp\"))\n", + "pd.DataFrame(final_res.drop([\"geometry\"], axis=1)).to_csv(\n", + " os.path.join(urban_folder, \"new_lga_nigeria_2003_URBAN_POP.csv\")\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "a33f2340", + "id": "19", "metadata": {}, "outputs": [], "source": [ - "[x for x in final_res.columns]attack" + "[x for x in final_res.columns]" ] }, { "cell_type": "code", "execution_count": null, - "id": "fb81ab3e", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -335,71 +348,75 @@ { "cell_type": "code", "execution_count": null, - "id": "be7af16d", + "id": "21", "metadata": {}, "outputs": [], "source": [ - "res_prefix = 'nga1k'\n", - "pop_layer = os.path.join(urban_folder, f'{res_prefix}_cpo15.tif')\n", - "urban_layer = os.path.join(urban_folder, f'{res_prefix}_cpo15_urban.tif')\n", - "hd_layer = os.path.join(urban_folder, f'{res_prefix}_cpo15_urban_hd.tif')\n", + "res_prefix = \"nga1k\"\n", + "pop_layer = os.path.join(urban_folder, f\"{res_prefix}_cpo15.tif\")\n", + "urban_layer = os.path.join(urban_folder, f\"{res_prefix}_cpo15_urban.tif\")\n", + "hd_layer = os.path.join(urban_folder, f\"{res_prefix}_cpo15_urban_hd.tif\")\n", "\n", "help_xx = helper.summarize_population(pop_layer, adm2_bounds, urban_layer, hd_layer)\n", "zonal_res = help_xx.calculate_zonal()\n", - "zonal_res = zonal_res.loc[:,[x for x in zonal_res.columns if \"SUM\" in x]]\n", - "zonal_res['shapeID'] = adm2_bounds['shapeID']\n", - "zonal_res['shapeName'] = adm2_bounds['shapeName']\n", + "zonal_res = zonal_res.loc[:, [x for x in zonal_res.columns if \"SUM\" in x]]\n", + "zonal_res[\"shapeID\"] = adm2_bounds[\"shapeID\"]\n", + "zonal_res[\"shapeName\"] = adm2_bounds[\"shapeName\"]\n", "\n", - "pop_layer = os.path.join(urban_folder, f'{res_prefix}_cpo20.tif')\n", - "urban_layer = os.path.join(urban_folder, f'{res_prefix}_cpo20_urban.tif')\n", - "hd_layer = os.path.join(urban_folder, f'{res_prefix}_cpo20_urban_hd.tif')\n", + "pop_layer = os.path.join(urban_folder, f\"{res_prefix}_cpo20.tif\")\n", + "urban_layer = os.path.join(urban_folder, f\"{res_prefix}_cpo20_urban.tif\")\n", + "hd_layer = os.path.join(urban_folder, f\"{res_prefix}_cpo20_urban_hd.tif\")\n", "\n", "help_xx = helper.summarize_population(pop_layer, adm2_bounds, urban_layer, hd_layer)\n", "zonal_res2 = help_xx.calculate_zonal()\n", - "zonal_res2 = zonal_res2.loc[:,[x for x in zonal_res2.columns if \"SUM\" in x]]\n", - "zonal_res2['shapeID'] = adm2_bounds['shapeID']\n", - "zonal_res2['shapeName'] = adm2_bounds['shapeName']" + "zonal_res2 = zonal_res2.loc[:, [x for x in zonal_res2.columns if \"SUM\" in x]]\n", + "zonal_res2[\"shapeID\"] = adm2_bounds[\"shapeID\"]\n", + "zonal_res2[\"shapeName\"] = adm2_bounds[\"shapeName\"]" ] }, { "cell_type": "code", "execution_count": null, - "id": "609d4e18", + "id": "22", "metadata": {}, "outputs": [], "source": [ - "zonal_res.merge(zonal_res2).to_csv(os.path.join(hh_folder, f'DOU_zonal_stats_{res_prefix}.csv'))" + "zonal_res.merge(zonal_res2).to_csv(\n", + " os.path.join(hh_folder, f\"DOU_zonal_stats_{res_prefix}.csv\")\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "7177da3a", + "id": "23", "metadata": {}, "outputs": [], "source": [ - "res_prefix = 'nga'\n", - "pop_layer = os.path.join(urban_folder, f'{res_prefix}_cpo15.tif')\n", - "co_layer = f'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_co.tif'\n", - "ur_layer = f'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_ur.tif'\n", - "cc_layer = f'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_cc.tif'\n", + "res_prefix = \"nga\"\n", + "pop_layer = os.path.join(urban_folder, f\"{res_prefix}_cpo15.tif\")\n", + "co_layer = f\"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_co.tif\"\n", + "ur_layer = f\"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_ur.tif\"\n", + "cc_layer = f\"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_cc.tif\"\n", "\n", "help_xx = helper.summarize_population(pop_layer, adm2_bounds, co_layer, ur_layer)\n", "zonal_res = help_xx.calculate_zonal(convert_urban_binary=True)\n", - "zonal_res = zonal_res.loc[:,[x for x in zonal_res.columns if \"SUM\" in x]]\n", + "zonal_res = zonal_res.loc[:, [x for x in zonal_res.columns if \"SUM\" in x]]\n", "\n", "help_xx2 = helper.summarize_population(pop_layer, adm2_bounds, cc_layer)\n", "zonal_res2 = help_xx2.calculate_zonal(convert_urban_binary=True)\n", - "zonal_res2 = zonal_res2.loc[:,[x for x in zonal_res2.columns if \"SUM\" in x]]\n", + "zonal_res2 = zonal_res2.loc[:, [x for x in zonal_res2.columns if \"SUM\" in x]]\n", "\n", - "zonal_res[f'_{res_prefix}_cpo15d10b3000_cc_SUM'] = zonal_res2[f'_{res_prefix}_cpo15d10b3000_cc_SUM']\n", - "zonal_res['shapeID'] = adm2_bounds['shapeID']\n" + "zonal_res[f\"_{res_prefix}_cpo15d10b3000_cc_SUM\"] = zonal_res2[\n", + " f\"_{res_prefix}_cpo15d10b3000_cc_SUM\"\n", + "]\n", + "zonal_res[\"shapeID\"] = adm2_bounds[\"shapeID\"]" ] }, { "cell_type": "code", "execution_count": null, - "id": "dc804404", + "id": "24", "metadata": {}, "outputs": [], "source": [ @@ -409,27 +426,27 @@ { "cell_type": "code", "execution_count": null, - "id": "6e0d03b0", + "id": "25", "metadata": {}, "outputs": [], "source": [ - "zonal_res.to_csv(os.path.join(hh_folder, f'DB_zonal_stats_{res_prefix}.csv'))" + "zonal_res.to_csv(os.path.join(hh_folder, f\"DB_zonal_stats_{res_prefix}.csv\"))" ] }, { "cell_type": "code", "execution_count": null, - "id": "4d55fbde", + "id": "26", "metadata": {}, "outputs": [], "source": [ - "adm2_bounds.to_file(os.path.join(hh_folder, 'adm2_geobounds.geojson'), driver='GeoJSON')" + "adm2_bounds.to_file(os.path.join(hh_folder, \"adm2_geobounds.geojson\"), driver=\"GeoJSON\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "724527e4", + "id": "27", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Zonal_statistics.ipynb b/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Zonal_statistics.ipynb index c0a35d2..bc1e41c 100644 --- a/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Zonal_statistics.ipynb +++ b/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Zonal_statistics.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "c39b4ad3", + "id": "0", "metadata": {}, "source": [ "# Mpumalanga spatial analysis\n", @@ -29,7 +29,7 @@ { "cell_type": "code", "execution_count": null, - "id": "58679d41", + "id": "1", "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cc27ef0a", + "id": "2", "metadata": {}, "outputs": [], "source": [ @@ -118,7 +118,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8ceaa32b", + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -129,7 +129,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0d1a596b", + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -154,7 +154,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0bc5cc95", + "id": "5", "metadata": {}, "outputs": [], "source": [ @@ -173,7 +173,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b9aea8cc", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -200,7 +200,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0973fdaa", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -214,7 +214,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cc610caf", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -230,7 +230,7 @@ { "cell_type": "code", "execution_count": null, - "id": "afbed7f8", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -248,7 +248,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1cfa99c8", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -271,7 +271,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c38664ec", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -285,7 +285,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b067e728", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -327,7 +327,7 @@ }, { "cell_type": "markdown", - "id": "2aff3729", + "id": "13", "metadata": {}, "source": [ "# Combine results" @@ -336,7 +336,7 @@ { "cell_type": "code", "execution_count": null, - "id": "747803c4", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -347,7 +347,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0e9befe3", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -363,7 +363,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e19f4112", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -376,7 +376,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a49a3105", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -388,7 +388,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6c9140cd", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -399,7 +399,7 @@ }, { "cell_type": "markdown", - "id": "eb8eaf7c", + "id": "19", "metadata": {}, "source": [ "# Calculate gravity of all MPs in Mpumalanga to major cities" @@ -408,7 +408,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8fc0426a", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -418,7 +418,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a7c97b92", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -435,7 +435,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24e7c092", + "id": "22", "metadata": {}, "outputs": [], "source": [ @@ -449,7 +449,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2a52bdd2", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -463,7 +463,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5bdb3e4f", + "id": "24", "metadata": {}, "outputs": [], "source": [] @@ -471,7 +471,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8ffc696e", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -481,7 +481,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7dbe0ef8", + "id": "26", "metadata": {}, "outputs": [], "source": [ @@ -498,7 +498,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ea125d01", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -515,7 +515,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a6a008b9", + "id": "28", "metadata": {}, "outputs": [], "source": [ @@ -532,7 +532,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8b4d39ac", + "id": "29", "metadata": {}, "outputs": [], "source": [ @@ -549,7 +549,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c95e435d", + "id": "30", "metadata": {}, "outputs": [], "source": [ @@ -565,7 +565,7 @@ }, { "cell_type": "markdown", - "id": "6ed65fcb", + "id": "31", "metadata": {}, "source": [ "# Debugging" @@ -574,7 +574,7 @@ { "cell_type": "code", "execution_count": null, - "id": "faa69d61", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -591,7 +591,7 @@ { "cell_type": "code", "execution_count": null, - "id": "932d8f5f", + "id": "33", "metadata": {}, "outputs": [], "source": [ @@ -604,7 +604,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bff375f3", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -614,7 +614,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9ea00aaa", + "id": "35", "metadata": {}, "outputs": [], "source": [] @@ -622,7 +622,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d03a91e8", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -635,7 +635,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3f02942c", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -646,7 +646,7 @@ { "cell_type": "code", "execution_count": null, - "id": "890024ac", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -657,7 +657,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fca21314", + "id": "39", "metadata": {}, "outputs": [], "source": [ @@ -667,7 +667,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f5e6573a", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -677,7 +677,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c2b20c5d", + "id": "41", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/Implementations/WSF_DECAT_B_ExploringIDCerrors.ipynb b/notebooks/Implementations/WSF_DECAT_B_ExploringIDCerrors.ipynb index ce537ce..b9c9d06 100755 --- a/notebooks/Implementations/WSF_DECAT_B_ExploringIDCerrors.ipynb +++ b/notebooks/Implementations/WSF_DECAT_B_ExploringIDCerrors.ipynb @@ -30,19 +30,20 @@ "metadata": {}, "outputs": [], "source": [ - "import os, sys, json, logging, importlib\n", + "import os\n", + "import sys\n", + "import json\n", + "import importlib\n", "\n", "import rasterio\n", "import pandas as pd\n", "import geopandas as gpd\n", "import numpy as np\n", - "import matplotlib.pyplot as plt\n", "\n", - "#Get reference to GOSTRocks\n", + "# Get reference to GOSTRocks\n", "sys.path.append(\"../../../gostrocks/src\")\n", - "sys.path.append('../../')\n", + "sys.path.append(\"../../\")\n", "\n", - "import GOSTRocks.rasterMisc as rMisc\n", "import src.WSF.wsfdata as wsfdata" ] }, @@ -64,13 +65,13 @@ "inWSF_folder = \"/home/public/Data/PROJECTS/LEI/DLR_V2\"\n", "outWSF_folder = \"/home/wb411133/data/Global/WSF\"\n", "\n", - "#Get a list of cities\n", + "# Get a list of cities\n", "allCities = {}\n", "for root, dirs, files in os.walk(inWSF_folder):\n", " for f in files:\n", " if f[-4:] == \".tif\":\n", " cityName = f.split(\"_\")[0]\n", - " if not cityName in allCities.keys():\n", + " if cityName not in allCities.keys():\n", " allCities[cityName] = [os.path.join(root, f)]\n", " else:\n", " allCities[cityName].append(os.path.join(root, f))" @@ -139,7 +140,7 @@ "outputs": [], "source": [ "builtData = wsfD.generate_evolution_plot()\n", - "builtData['idcQuality'] = wsfD.summarize_idc(thresh=thresh)\n", + "builtData[\"idcQuality\"] = wsfD.summarize_idc(thresh=thresh)\n", "\n", "outFolder = os.path.join(outWSF_folder, curCity)\n", "if not os.path.exists(outFolder):\n", @@ -148,13 +149,15 @@ "correctedEvolution = os.path.join(outFolder, \"corrected_evolution_%s.tif\" % thresh)\n", "\n", "if not os.path.exists(outQuality):\n", - " qualityRes = wsfD.analyze_idc(outFile=outQuality, badThreshold=thresh) \n", + " qualityRes = wsfD.analyze_idc(outFile=outQuality, badThreshold=thresh)\n", "\n", - "correctedRes = wsfD.correct_evolution_idc(outfile=correctedEvolution, badThreshold=thresh)\n", + "correctedRes = wsfD.correct_evolution_idc(\n", + " outfile=correctedEvolution, badThreshold=thresh\n", + ")\n", "\n", "basePlot = wsfD.generate_evolution_plot()\n", "correctedPlot = wsfD.generate_evolution_plot(dataset=correctedRes)\n", - "basePlot['corrected'] = correctedPlot['cumBuilt']" + "basePlot[\"corrected\"] = correctedPlot[\"cumBuilt\"]" ] }, { @@ -163,7 +166,7 @@ "metadata": {}, "outputs": [], "source": [ - "basePlot.drop('built', axis=1).plot()" + "basePlot.drop(\"built\", axis=1).plot()" ] }, { @@ -172,7 +175,7 @@ "metadata": {}, "outputs": [], "source": [ - "basePlot['cumBuilt'].plot()" + "basePlot[\"cumBuilt\"].plot()" ] }, { @@ -181,22 +184,26 @@ "metadata": {}, "outputs": [], "source": [ - "#Loop through all the cities to process the results\n", + "# Loop through all the cities to process the results\n", "importlib.reload(wsfdata)\n", "allRes = []\n", - "for thresh in [2,3,4,5,6]:\n", - " for curCity in ['Accra']allCities.keys(): \n", + "for thresh in [2, 3, 4, 5, 6]:\n", + " for curCity in allCities.keys():\n", " if len(allCities[curCity]) > 1:\n", " try:\n", " wsfD = wsfdata.wsf_dataset(allCities[curCity])\n", " builtData = wsfD.generate_evolution_plot()\n", - " builtData['idcQuality'] = wsfD.summarize_idc(thresh=thresh)\n", - " allData = list(builtData[builtData.columns[1]].append(builtData[builtData.columns[2]]))\n", + " builtData[\"idcQuality\"] = wsfD.summarize_idc(thresh=thresh)\n", + " allData = list(\n", + " builtData[builtData.columns[1]].append(\n", + " builtData[builtData.columns[2]]\n", + " )\n", + " )\n", " allData.append(curCity)\n", " allRes.append(allData)\n", " except:\n", - " print('Error processing: %s' % curCity)\n", - " ''' UNCOMMENT THIS to GENERATE PLOTS\n", + " print(\"Error processing: %s\" % curCity)\n", + " \"\"\" UNCOMMENT THIS to GENERATE PLOTS\n", " #Plotting and correcting below\n", " outFolder = os.path.join(outWSF_folder, curCity)\n", " if not os.path.exists(outFolder):\n", @@ -205,22 +212,24 @@ " correctedEvolution = os.path.join(outFolder, \"corrected_evolution_%s.tif\" % thresh)\n", "\n", " if not os.path.exists(outQuality):\n", - " qualityRes = wsfD.analyze_idc(outFile=outQuality, badThreshold=thresh) \n", + " qualityRes = wsfD.analyze_idc(outFile=outQuality, badThreshold=thresh)\n", "\n", " correctedRes = wsfD.correct_evolution_idc(outfile=correctedEvolution, badThreshold=thresh)\n", "\n", " basePlot = wsfD.generate_evolution_plot()\n", " correctedPlot = wsfD.generate_evolution_plot(dataset=correctedRes)\n", " basePlot['corrected'] = correctedPlot['cumBuilt']\n", - " '''\n", - " builtCols = [\"%s_%s\" % (builtData.columns[1], x) for x in builtData.index]\n", + " \"\"\"\n", + " builtCols = [\"%s_%s\" % (builtData.columns[1], x) for x in builtData.index]\n", " qualityCols = [\"%s_%s\" % (builtData.columns[2], x) for x in builtData.index]\n", - " columns = (builtCols + qualityCols)\n", + " columns = builtCols + qualityCols\n", " columns.append(\"CITY\")\n", "\n", " finalRes = pd.DataFrame(allRes)\n", " finalRes.columns = columns\n", - " finalRes.to_csv(os.path.join(outWSF_folder, \"compiled_WSF_quality_%s_v2.csv\" % thresh))" + " finalRes.to_csv(\n", + " os.path.join(outWSF_folder, \"compiled_WSF_quality_%s_v2.csv\" % thresh)\n", + " )" ] }, { @@ -265,9 +274,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "\n" - ] + "source": [] }, { "cell_type": "code", @@ -275,10 +282,10 @@ "metadata": {}, "outputs": [], "source": [ - "totalCells = (evolution[0,:,:] > 0).sum()\n", + "totalCells = (evolution[0, :, :] > 0).sum()\n", "allRes = []\n", "for idx in range(0, evolution_masked.shape[0]):\n", - " allRes.append((evolution_masked[idx,:,:] < thresh).sum() / totalCells)" + " allRes.append((evolution_masked[idx, :, :] < thresh).sum() / totalCells)" ] }, { @@ -287,7 +294,7 @@ "metadata": {}, "outputs": [], "source": [ - "allCities['Houston'][3]" + "allCities[\"Houston\"][3]" ] }, { @@ -298,9 +305,10 @@ "source": [ "from rasterio.features import shapes\n", "from shapely.geometry import shape\n", - "import geojson, json\n", + "import geojson\n", + "\n", "### Testing new WSF work using rasterio\n", - "inFile = allCities['Houston'][3]\n", + "inFile = allCities[\"Houston\"][3]\n", "inD = rasterio.open(inFile)\n", "x = inD.read()" ] @@ -318,9 +326,13 @@ "oldD = (x < oldDate) * (x > 0)\n", "newD = (x > oldDate) * (x < newDate)\n", "\n", - "newBuilt_shapes = shapes(newD.astype(np.int16), transform = inD.transform)\n", - "allGeoms = [shape(geojson.loads(json.dumps(xx[0]))) for xx in newBuilt_shapes if xx[1] != 0]\n", - "curDF = gpd.GeoDataFrame(pd.DataFrame({\"Date\":[newDate] * len(allGeoms)}), geometry=allGeoms) " + "newBuilt_shapes = shapes(newD.astype(np.int16), transform=inD.transform)\n", + "allGeoms = [\n", + " shape(geojson.loads(json.dumps(xx[0]))) for xx in newBuilt_shapes if xx[1] != 0\n", + "]\n", + "curDF = gpd.GeoDataFrame(\n", + " pd.DataFrame({\"Date\": [newDate] * len(allGeoms)}), geometry=allGeoms\n", + ")" ] }, { @@ -346,8 +358,14 @@ } ], "metadata": { + "kernelspec": { + "display_name": "worldbank", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "name": "python", + "version": "3.10.13" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 54c6e2b..10e49ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ docs = [ "sphinxcontrib-apidoc>=0.4.0,<1", "jupyter-cache>=0.6.0", "nbconvert", + "nbstripout" ] [project.urls]