Skip to content

A very roughly drawn up couple of possible approaches for simplifying… #60

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions apps/baseline/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import common.models as common_models
from common.fields import TranslatedField
from common.importers import LivelihoodStrategyImporter
from common.models import (
ClassifiedProduct,
Country,
Expand Down Expand Up @@ -204,8 +205,11 @@ class LivelihoodZoneBaseline(common_models.Model):
verbose_name=_("Population Estimate"),
help_text=_("The estimated population of the Livelihood Zone during the reference year"),
)
# Regardless of how we import, we may as well store back references to the original cell/s
spreadsheet_reference = models.ForeignKey("common.SpreadsheetReference", on_delete=models.DO_NOTHING)

objects = LivelihoodZoneBaselineManager()
importer = LivelihoodStrategyImporter

def natural_key(self):
try:
Expand All @@ -224,6 +228,20 @@ class Meta:
)
]

def bss_to_db(self):
instances = {
SourceOrganization: self.source_organization,
LivelihoodZone: self.livelihood_zone,
LivelihoodZoneBaseline: self,
}
for model in [
LivelihoodStrategy,
Community,
WealthGroup,
# ...
]:
instances[model] = model.importer.save_from_bss(instances)


# @TODO Can we have a better name.
class LivelihoodProductCategory(common_models.Model):
Expand Down
82 changes: 82 additions & 0 deletions apps/common/importers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from collections import defaultdict

from baseline.models import (
LivelihoodStrategy,
LivelihoodZone,
LivelihoodZoneBaseline,
SourceOrganization,
)
from metadata.models import ReferenceData


class LivelihoodStrategyImporter:
def strategy_type_lookup(self, val):
# some complexity
return val

def save_from_bss_option_a_bit_more_hacky(self, instances):
"""
Contains the business logic for extracting and saving a number of livelihood strategy objects from a BSS range.

It assumes we just add a sprinkling of spreadsheet locations where needed, then do very bespoke code here
making use of them.

Returns list of objects saved to db.
"""
lzb = instances[LivelihoodZoneBaseline]

# We could do this in a very bespoke way for each and every model and field like so:
map = {
ref.name: ref
for ref in lzb.spreadsheetreference_set.order_by("sequence").select_related("SpreadsheetPointOfInterest")
}
saved = []
# Bespoke logic very specific to each model, eg, we hardcode that these are every other row on sheet Data, every row on Data2, etc
i = 0
ref = map["FirstLSStrategyType"]
while ref.reference < map["LastLSStrategyType"]:
ref = map["FirstLSStrategyType"].below(i)
ls = LivelihoodStrategy(
source_organizaton=instances[SourceOrganization],
livelihood_zone=instances[LivelihoodZone],
)
ls.strategy_type = self.strategy_type_lookup(ref)
i += 1
saved.append(ls)
return saved

def save_from_bss_option_exhaustive_but_generic(self, instances):
"""
Contains the business logic for extracting and saving a number of livelihood strategy objects.

This approach assumes every field of every instance has a location (and optional regex) stored in
SpreadsheetPointOfInterest. But the code is entirely generic.

Whichever option we choose, we start by prepopulating these, and gradually automate their derivation.
Roger's code can probably already prepopulate a significant portion of them.

Returns list of objects saved to db.
"""
lzb = instances[LivelihoodZoneBaseline]
# Or we could do it generically - I like this idea although it is a lot of ref data (much identical between BSSes)
map = defaultdict(lambda: defaultdict(lambda: defaultdict))
for ref in lzb.spreadsheetreference_set.order_by("sequence", "instance_number").select_related(
"SpreadsheetPointOfInterest"
):
map[ref.spreadsheet_point_of_interest.model][ref.instance_number][
ref.spreadsheet_point_of_interest.field
] = ref

saved = []
for model in map.keys():
for instance_number in model.keys():
instance = model()
for field in instance_number.keys():
val = instance_number[field].get()
lookup_model = ReferenceData.get_model(model, field)
if lookup_model:
val = lookup_model.search(val, instance)
setattr(instance, field, val)
instance.save()
saved.append(instance)
return saved
38 changes: 38 additions & 0 deletions apps/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
import inspect
import logging
import operator
import re
from functools import reduce

import pandas as pd
from django.core import validators
from django.core.cache import cache
from django.core.exceptions import ObjectDoesNotExist, ValidationError
Expand Down Expand Up @@ -1004,3 +1006,39 @@ class Meta:
fields=["country", "product"], name="common_countryclassified_country_code_product_code_uniq"
)
]


# These would go into a dedicated app


class SpreadsheetPointOfInterest(Model):
name = NameField(max_length=200, unique=True, on_delete=models.RESTRICT)

# Or we could address every field in every instance - I like this as the import becomes completely generic
# Just iterate over the instances and fields and ref.get()
# Roger's existing code probably identifies most of these already
model = models.CharField(max_length=100, blank=True, null=True)
field = models.CharField(max_length=100, blank=True, null=True)


class SpreadsheetReference(Model):
bss = models.ForeignKey("baseline.LivelihoodZoneBaseline", on_delete=models.DO_NOTHING)
spreadsheet_point_of_interest = models.ForeignKey(SpreadsheetPointOfInterest, on_delete=models.DO_NOTHING)
sequence = models.PositiveIntegerField() # possibly not useful
reference = models.CharField(max_length=800)
regex = models.CharField(max_length=800) # only applicable if reference is a single cell

# if using exhaustive approach of addressing every field
instance_number = models.PositiveIntegerField()

def get(self):
ss = pd.read_excel(self.bss.bss)
val = ss[self.reference]
if self.regex:
val = re.findall(self.regex, val)
return val

def below(self, i):
# return a reference i rows below self.reference (currently assumes row index is 2 chars which obv needs fixing)
# but this is an example of locating this functionality somewhere intuitive
return self.reference[:-2] + str(self.reference[-2:] + i)
4 changes: 4 additions & 0 deletions apps/metadata/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ class Meta:
class ExtraMeta:
identifier = ["name_en"]

@classmethod
def get_model(cls, model, field):
return "look up appropriate model from foreign key field def"


class LivelihoodCategory(ReferenceData):
"""
Expand Down