Skip to content

Commit

Permalink
Logging improvements, linting
Browse files Browse the repository at this point in the history
  • Loading branch information
mvaaltola committed Jan 20, 2022
1 parent f95ca00 commit 0f25506
Show file tree
Hide file tree
Showing 10 changed files with 210 additions and 204 deletions.
37 changes: 14 additions & 23 deletions export.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
#!/usr/bin/env python

import argparse
import logging
import os
import sys
from datasets import DATASETS
from ipygis import get_connection_url, QueryResult, generate_map
from slugify import slugify
Expand All @@ -13,11 +11,12 @@
from notebooks.kepler_h3_config import config # we may use our own custom visualization config
from osm_tags import tag_filter

IMPORT_LOG_PATH = 'logs'
from util import create_logger

MAPS_PATH = "server/maps"

parser = argparse.ArgumentParser(description="Create result map for a given city")
parser.add_argument("city", default="Helsinki", help="City to import")
parser.add_argument("city_slug", default="helsinki", help="Slug of city to import")
parser.add_argument("--datasets",
default=" ".join([dataset for dataset in DATASETS]),
help="Datasets to include in analysis. Default is to use all imported data. E.g. \"osm access kontur\""
Expand All @@ -29,31 +28,23 @@
" The result map is independent from the analysis database, so you may save a lot of disk space"
" by deleting the data if you don't expect to create the map again.")
args = vars(parser.parse_args())

# slugify city name just in case export was called with non-slug
city = slugify(args["city"])
slug = slugify(args["city_slug"])
datasets_to_export = args["datasets"].split()
delete = args.get("delete", False)

# log each city separately
log_file = os.path.join(os.path.dirname(__loader__.path), IMPORT_LOG_PATH, f"{city}.log")
logging.basicConfig(
format='%(asctime)s %(levelname)-8s %(message)s',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger()
stdout_handler = logging.StreamHandler(sys.stdout)
file_handler = logging.FileHandler(log_file)
logger.addFilter(stdout_handler)
logger.addHandler(file_handler)
logger = create_logger("export", slug)

sql_url = get_connection_url(dbname='geoviz')
engine = create_engine(sql_url)
schema_engine = engine.execution_options(
schema_translate_map={'schema': city}
schema_translate_map={'schema': slug}
)
session = sessionmaker(bind=schema_engine)()

logger.info(f"Collecting results for {city} with {datasets_to_export}...")
logger.info(f"Collecting results for {slug} with {datasets_to_export}...")

queries = {
dataset: session.query(DATASETS[dataset]['model'])
Expand All @@ -63,7 +54,7 @@
if 'osm' in queries:
queries['osm'] = queries['osm'].filter(tag_filter)

logger.info(f"Running queries for {city} with {datasets_to_export}...")
logger.info(f"Running queries for {slug} with {datasets_to_export}...")
results = [
QueryResult.create(
query,
Expand All @@ -76,7 +67,7 @@
for dataset, query in queries.items()
]

logger.info(f"Creating map for {city} with {datasets_to_export}...")
logger.info(f"Creating map for {slug} with {datasets_to_export}...")
weights = [
DATASETS[dataset]['weight']
for dataset in datasets_to_export
Expand All @@ -90,11 +81,11 @@
map_path = os.path.join(os.path.dirname(__loader__.path), MAPS_PATH)
if not os.path.exists(map_path):
os.mkdir(map_path)
filename = os.path.join(map_path, f"{city}.html")
filename = os.path.join(map_path, f"{slug}.html")
result_map.save_to_html(file_name=filename)
# delete interim database at the end, we have all the data we need on the map
if delete:
logger.info(f"Deleting analysis database for {city}...")
engine.execute(DropSchema(city, cascade=True))
logger.info(f"Deleting analysis database for {slug}...")
engine.execute(DropSchema(slug, cascade=True))

logger.info(f"--- Datasets {datasets_to_export} for {city} exported to Kepler.gl ---")
logger.info(f"--- Datasets {datasets_to_export} for {slug} exported to Kepler.gl ---")
47 changes: 16 additions & 31 deletions import.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
import argparse
import copy
import datetime
import logging
import os
import requests
import sys
from dotenv import load_dotenv
from datasets import DATASETS
from geoalchemy2.shape import from_shape
Expand All @@ -26,8 +24,7 @@
from sqlalchemy_utils.functions import database_exists, create_database

from models import Analysis

IMPORT_LOG_PATH = 'logs'
from util import create_logger

load_dotenv()
osm_extracts_api_key = os.getenv("OSM_EXTRACTS_API_KEY")
Expand Down Expand Up @@ -64,27 +61,12 @@
delete = args.get("delete", False)

# log each city separately
log_path = os.path.join(os.path.dirname(__loader__.path), IMPORT_LOG_PATH)
if not os.path.exists(log_path):
os.mkdir(log_path)
log_file = os.path.join(log_path, f"{slug}.log")
logging.basicConfig(
format='%(asctime)s %(levelname)-8s %(message)s',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger()
stdout_handler = logging.StreamHandler(sys.stdout)
file_handler = logging.FileHandler(log_file)
logger.addHandler(stdout_handler)
logger.addHandler(file_handler)

logger = create_logger("import", slug)
logger.info(f"--- Importing datasets {datasets} for {city} ---")

if osmnames_url:
logger.info("Geocode using OSMNames...")
# Use our own geocoding service. It provides bbox and country for city.
logger.info(osmnames_url)
logger.info(city)
logger.info(f"Geocoding {city} using OSMNames service at {osmnames_url}...")
city_data = requests.get(
f"{osmnames_url}/q/{city}.js"
).json()["results"][0]
Expand All @@ -93,9 +75,9 @@
else:
bbox = city_data["boundingbox"]
else:
logger.info("Geocode using Nominatim...")
# Fall back to Nominatim. Their API doesn't always respond tho.
# Get bbox, centroid and country for the city
logger.info(f"Geocoding {city} using Nominatim...")
city_params = {"q": args["city"], "limit": 1, "format": "json"}
city_data = requests.get(
"https://nominatim.openstreetmap.org/search", params=city_params
Expand All @@ -107,7 +89,7 @@
# we want minx, miny, maxx, maxy
bbox = [city_data["boundingbox"][i] for i in [2, 0, 3, 1]]
centroid = [city_data["lon"], city_data["lat"]]
logger.info(f"{city} centroid {centroid}")
logger.info(f"{city} centroid: {centroid}")

# bbox must always be float
bbox = [float(coord) for coord in bbox]
Expand All @@ -130,6 +112,7 @@
parameters={'gtfs': {'url': gtfs_url}}
)
session.add(analysis)

try:
session.commit()
except IntegrityError:
Expand All @@ -145,6 +128,7 @@
analysis.datasets = copy.deepcopy(analysis.datasets)
analysis.datasets["selected"] = datasets
session.commit()

# create schema for the analysis
try:
engine.execute(CreateSchema(slug))
Expand All @@ -165,47 +149,48 @@ def mark_imported(dataset: str):

if "osm" in datasets:
logger.info(f"--- Importing OSM data for {city} ---")
osm_importer = OsmImporter({"slug": slug, "bbox": ", ".join([str(coord) for coord in bbox]),
"logger": logger})
osm_importer = OsmImporter({"slug": slug,
"bbox": ", ".join([str(coord) for coord in bbox])})
osm_importer.run()
mark_imported("osm")

if "flickr" in datasets:
logger.info(f"--- Importing Flickr data for {city} ---")
flick_importer = FlickrImporter(slug=slug, bbox=bbox, logger=logger)
flick_importer = FlickrImporter(slug=slug, bbox=bbox)
flick_importer.run()
mark_imported("flickr")

if "gtfs" in datasets:
# GTFS importer uses the provided URL or, failing that, default values for some cities
if gtfs_url:
logger.info(f"--- Importing GTFS data from {gtfs_url} ---")
gtfs_importer = GTFSImporter(slug=slug, url=gtfs_url, city=city, bbox=bbox, logger=logger)
gtfs_importer = GTFSImporter(slug=slug, url=gtfs_url, city=city, bbox=bbox)
else:
logger.info(f"--- Importing GTFS data for {city} ---")
gtfs_importer = GTFSImporter(slug=slug, city=city, bbox=bbox, logger=logger)
gtfs_importer = GTFSImporter(slug=slug, city=city, bbox=bbox)
gtfs_importer.run()
mark_imported("gtfs")

if "access" in datasets:
logger.info(f"--- Importing OSM walkability & accessibility data for {city} ---")
accessibility_importer = AccessibilityImporter(slug=slug, bbox=bbox, logger=logger)
accessibility_importer = AccessibilityImporter(slug=slug, bbox=bbox)
accessibility_importer.run()
mark_imported("access")

if "ookla" in datasets:
logger.info(f"--- Importing Ookla speedtest data for {city} ---")
ookla_importer = OoklaImporter(slug=slug, city=city, bbox=bbox, logger=logger)
ookla_importer = OoklaImporter(slug=slug, city=city, bbox=bbox)
ookla_importer.run()
mark_imported("ookla")

if "kontur" in datasets:
logger.info(f"--- Importing Kontur population data for {city} ---")
kontur_importer = KonturImporter(slug=slug, city=city, bbox=bbox, logger=logger)
kontur_importer = KonturImporter(slug=slug, city=city, bbox=bbox)
kontur_importer.run()
mark_imported("kontur")

logger.info(f"--- Datasets {datasets} for {city} imported to PostGIS ---")

if export:
logger.info(f"--- Creating result map for {city} ---")
export_string = f"export.py {slug} --datasets \'{dataset_string}\'"
Expand Down
Loading

0 comments on commit 0f25506

Please sign in to comment.