Skip to content

Commit 94dd970

Browse files
committed
Bug fix and performance improvements:
1. Fixed bug in city migration where if there were two cities in the same country with the same name only the first would be imported. This was due to not taking into account the region checking for the records existence before creating a new one. 2. Increase performance by caching region_id and country_id in migration function variables. 3. Added debugging flag '--verify-city-import' which cerifies city import and print out missing cities. For debug purposes only. 4. Fixed bug in city migration where if the name_ascii field is null it would set the slug name as 'city.' It now sets it to the geoname_id.
1 parent 4829a0d commit 94dd970

File tree

1 file changed

+85
-39
lines changed

1 file changed

+85
-39
lines changed

cities_light/management/commands/cities_light.py

+85-39
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,11 @@ class Command(BaseCommand):
7878
default=False,
7979
help='Set this if you intend to import translations a lot'
8080
),
81-
)
81+
optparse.make_option('--verify-city-import', action='store_true',
82+
default=False,
83+
help='Verify city import and print out missing cities. For debug purposes only.'
84+
),
85+
)
8286

8387
@transaction.commit_on_success
8488
def handle(self, *args, **options):
@@ -97,7 +101,7 @@ def handle(self, *args, **options):
97101
' Done: ',
98102
progressbar.Percentage(),
99103
progressbar.Bar(),
100-
]
104+
]
101105

102106
for url in SOURCES:
103107
destination_file_name = url.split('/')[-1]
@@ -156,10 +160,14 @@ def handle(self, *args, **options):
156160
progress.finish()
157161

158162
if url in TRANSLATION_SOURCES and options.get(
159-
'hack_translations', False):
163+
'hack_translations', False):
160164
with open(translation_hack_path, 'w+') as f:
161165
pickle.dump(self.translation_data, f)
162166

167+
if options.get('verify_city_import', False) and url in CITY_SOURCES:
168+
self.logger.info('Verifying city import')
169+
self.verify_city_import(geonames)
170+
163171
if options.get('hack_translations', False):
164172
with open(translation_hack_path, 'r') as f:
165173
self.translation_data = pickle.load(f)
@@ -197,12 +205,14 @@ def _get_region_id(self, country_code2, region_id):
197205
return self._region_codes[country_id][region_id]
198206

199207
def country_import(self, items):
208+
code2 = items[0]
209+
200210
try:
201-
country = Country.objects.get(code2=items[0])
211+
country = Country.objects.get(code2=code2)
202212
except Country.DoesNotExist:
203213
if self.noinsert:
204214
return
205-
country = Country(code2=items[0])
215+
country = Country(code2=code2)
206216

207217
country.name = force_unicode(items[4])
208218
country.code3 = items[1]
@@ -223,93 +233,129 @@ def region_import(self, items):
223233
name = items[1]
224234
if not items[1]:
225235
name = items[2]
236+
ascii_name = items[2]
237+
geoname_id = items[3]
226238

227-
code2, geoname_code = items[0].split('.')
239+
country_code2, geoname_code = items[0].split('.')
228240

229-
country_id = self._get_country_id(code2)
241+
try:
242+
country_id = self._get_country_id(country_code2)
243+
except Country.DoesNotExist:
244+
if self.noinsert:
245+
return
246+
else:
247+
raise
230248

231-
if items[3]:
232-
kwargs = dict(geoname_id=items[3])
249+
if geoname_id:
250+
kwargs = dict(geoname_id=geoname_id)
233251
else:
234-
try:
235-
kwargs = dict(name=name,
236-
country_id=country_id)
237-
except Country.DoesNotExist:
238-
if self.noinsert:
239-
return
240-
else:
241-
raise
252+
kwargs = dict(name=name,
253+
country_id=country_id)
242254

255+
save = False
243256
try:
244257
region = Region.objects.get(**kwargs)
245258
except Region.DoesNotExist:
246259
if self.noinsert:
247260
return
248-
region = Region(**kwargs)
261+
else:
262+
region = Region(**kwargs)
263+
save = True
249264

250265
if not region.name:
251266
region.name = name
267+
save = True
252268

253269
if not region.country_id:
254270
region.country_id = country_id
271+
save = True
255272

256273
if not region.geoname_code:
257274
region.geoname_code = geoname_code
275+
save = True
258276

259277
if not region.name_ascii:
260-
region.name_ascii = items[2]
278+
region.name_ascii = ascii_name
279+
save = True
280+
281+
region.geoname_id = geoname_id
282+
283+
if save:
284+
region.save()
285+
286+
def verify_city_import(self, geonames):
287+
for items in geonames.parse():
288+
try:
289+
City.objects.get(geoname_id=items[0])
290+
except City.DoesNotExist:
291+
self.logger.info(items)
292+
for duplicate_items in geonames.parse():
293+
if items[0] != duplicate_items[0] and items[1] == duplicate_items[1] and items[8] == duplicate_items[8] and items[10] == duplicate_items[10]:
294+
self.logger.info(duplicate_items)
261295

262-
region.geoname_id = items[3]
263-
region.save()
264296

265297
def city_import(self, items):
266298
try:
267299
city_items_pre_import.send(sender=self, items=items)
268300
except InvalidItems:
269301
return
270302

303+
geoname_id = items[0]
304+
country_code2 = items[8]
305+
region_geoname_code = items[10]
306+
name = items[1]
307+
271308
try:
272-
country_id = self._get_country_id(items[8])
309+
country_id = self._get_country_id(country_code2)
273310
except Country.DoesNotExist:
274311
if self.noinsert:
275312
return
276313
else:
277314
raise
278315

279316
try:
280-
kwargs = dict(name=force_unicode(items[1]),
281-
country_id=self._get_country_id(items[8]))
317+
region_id=self._get_region_id(country_code2, region_geoname_code)
318+
except Region.DoesNotExist:
319+
if self.noinsert:
320+
return
321+
else:
322+
region_id=None
323+
324+
try:
325+
kwargs = dict(name=force_unicode(name),
326+
region_id=region_id,
327+
country_id=country_id)
282328
except Country.DoesNotExist:
283329
if self.noinsert:
284330
return
285331
else:
286332
raise
287333

334+
save = False
288335
try:
289336
city = City.objects.get(**kwargs)
290337
except City.DoesNotExist:
291338
try:
292-
city = City.objects.get(geoname_id=items[0])
293-
city.name = force_unicode(items[1])
294-
city.country_id = self._get_country_id(items[8])
339+
city = City.objects.get(geoname_id=geoname_id)
340+
city.name = force_unicode(name)
341+
city.country_id = country_id
342+
city.region_id = region_id
343+
save = True
295344
except City.DoesNotExist:
296345
if self.noinsert:
297346
return
298347

299348
city = City(**kwargs)
300349

301-
save = False
302-
if not city.region_id:
303-
try:
304-
city.region_id = self._get_region_id(items[8], items[10])
305-
except Region.DoesNotExist:
306-
pass
307-
else:
308-
save = True
309-
310350
if not city.name_ascii:
311351
# useful for cities with chinese names
312-
city.name_ascii = items[2]
352+
name_ascii = items[2]
353+
354+
if name_ascii:
355+
city.name_ascii = name_ascii
356+
else:
357+
city.slug = geoname_id
358+
save = True
313359

314360
if not city.latitude:
315361
city.latitude = items[4]
@@ -325,7 +371,7 @@ def city_import(self, items):
325371

326372
if not city.geoname_id:
327373
# city may have been added manually
328-
city.geoname_id = items[0]
374+
city.geoname_id = geoname_id
329375
save = True
330376

331377
if save:
@@ -343,7 +389,7 @@ def translation_parse(self, items):
343389
Country: {},
344390
Region: {},
345391
City: {},
346-
}
392+
}
347393

348394
if len(items) > 4:
349395
# avoid shortnames, colloquial, and historic

0 commit comments

Comments
 (0)