Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

173 filter invalid area names #184

Merged
merged 11 commits into from
Oct 31, 2024
13 changes: 8 additions & 5 deletions Database/scr/normalize_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,17 +146,20 @@ def normalize_locations(
try:
try:
if area:
assert isinstance(area, str), f"Area is not a string: {area}"
assert isinstance(area, str), f"Area `{area}` is not a string; type: {type(area)}"
if re.match(
"(country|location|area|adminarea|admin|admin_area|administrative area|administrative_area|none|null)(\s)*(s)*[0-9]*$",
area,
flags=re.IGNORECASE,
):
self.logger.error(f"Input `{area}` of type {type(area)} is not a valid area name")
return (None, None, None)
if in_country:
assert isinstance(in_country, str), f"Country is not a string: {in_country}"
assert not (
is_country and in_country
), f"An area cannot be a country (is_country={is_country}) and in a country (in_country={in_country}) simultaneously"

# if area is None, replace by country name
area = in_country if not area and in_country else area
assert isinstance(area, str), f"Area is {area}; in_country: {in_country}"

except BaseException as err:
self.logger.error(err)
return (None, None, None)
Expand Down