FEWS-NET · chrispreee · Jan 30, 2024
diff --git a/apps/baseline/models.py b/apps/baseline/models.py
@@ -12,6 +12,7 @@
 
 import common.models as common_models
 from common.fields import TranslatedField
+from common.importers import LivelihoodStrategyImporter
 from common.models import (
     ClassifiedProduct,
     Country,
@@ -204,8 +205,11 @@ class LivelihoodZoneBaseline(common_models.Model):
         verbose_name=_("Population Estimate"),
         help_text=_("The estimated population of the Livelihood Zone during the reference year"),
     )
+    # Regardless of how we import, we may as well store back references to the original cell/s
+    spreadsheet_reference = models.ForeignKey("common.SpreadsheetReference", on_delete=models.DO_NOTHING)
 
     objects = LivelihoodZoneBaselineManager()
+    importer = LivelihoodStrategyImporter
 
     def natural_key(self):
         try:
@@ -224,6 +228,20 @@ class Meta:
             )
         ]
 
+    def bss_to_db(self):
+        instances = {
+            SourceOrganization: self.source_organization,
+            LivelihoodZone: self.livelihood_zone,
+            LivelihoodZoneBaseline: self,
+        }
+        for model in [
+            LivelihoodStrategy,
+            Community,
+            WealthGroup,
+            # ...
+        ]:
+            instances[model] = model.importer.save_from_bss(instances)
+
 
 # @TODO Can we have a better name.
 class LivelihoodProductCategory(common_models.Model):

diff --git a/apps/common/importers.py b/apps/common/importers.py
@@ -0,0 +1,82 @@
+from collections import defaultdict
+
+from baseline.models import (
+    LivelihoodStrategy,
+    LivelihoodZone,
+    LivelihoodZoneBaseline,
+    SourceOrganization,
+)
+from metadata.models import ReferenceData
+
+
+class LivelihoodStrategyImporter:
+    def strategy_type_lookup(self, val):
+        # some complexity
+        return val
+
+    def save_from_bss_option_a_bit_more_hacky(self, instances):
+        """
+        Contains the business logic for extracting and saving a number of livelihood strategy objects from a BSS range.
+
+        It assumes we just add a sprinkling of spreadsheet locations where needed, then do very bespoke code here
+        making use of them.
+
+        Returns list of objects saved to db.
+        """
+        lzb = instances[LivelihoodZoneBaseline]
+
+        # We could do this in a very bespoke way for each and every model and field like so:
+        map = {
+            ref.name: ref
+            for ref in lzb.spreadsheetreference_set.order_by("sequence").select_related("SpreadsheetPointOfInterest")
+        }
+        saved = []
+        # Bespoke logic very specific to each model, eg, we hardcode that these are every other row on sheet Data, every row on Data2, etc
+        i = 0
+        ref = map["FirstLSStrategyType"]
+        while ref.reference < map["LastLSStrategyType"]:
+            ref = map["FirstLSStrategyType"].below(i)
+            ls = LivelihoodStrategy(
+                source_organizaton=instances[SourceOrganization],
+                livelihood_zone=instances[LivelihoodZone],
+            )
+            ls.strategy_type = self.strategy_type_lookup(ref)
+            i += 1
+            saved.append(ls)
+        return saved
+
+    def save_from_bss_option_exhaustive_but_generic(self, instances):
+        """
+        Contains the business logic for extracting and saving a number of livelihood strategy objects.
+
+        This approach assumes every field of every instance has a location (and optional regex) stored in
+        SpreadsheetPointOfInterest. But the code is entirely generic.
+
+        Whichever option we choose, we start by prepopulating these, and gradually automate their derivation.
+        Roger's code can probably already prepopulate a significant portion of them.
+
+        Returns list of objects saved to db.
+        """
+        lzb = instances[LivelihoodZoneBaseline]
+        # Or we could do it generically - I like this idea although it is a lot of ref data (much identical between BSSes)
+        map = defaultdict(lambda: defaultdict(lambda: defaultdict))
+        for ref in lzb.spreadsheetreference_set.order_by("sequence", "instance_number").select_related(
+            "SpreadsheetPointOfInterest"
+        ):
+            map[ref.spreadsheet_point_of_interest.model][ref.instance_number][
+                ref.spreadsheet_point_of_interest.field
+            ] = ref
+
+        saved = []
+        for model in map.keys():
+            for instance_number in model.keys():
+                instance = model()
+                for field in instance_number.keys():
+                    val = instance_number[field].get()
+                    lookup_model = ReferenceData.get_model(model, field)
+                    if lookup_model:
+                        val = lookup_model.search(val, instance)
+                    setattr(instance, field, val)
+                instance.save()
+                saved.append(instance)
+        return saved
diff --git a/apps/common/models.py b/apps/common/models.py
@@ -6,8 +6,10 @@
 import inspect
 import logging
 import operator
+import re
 from functools import reduce
 
+import pandas as pd
 from django.core import validators
 from django.core.cache import cache
 from django.core.exceptions import ObjectDoesNotExist, ValidationError
@@ -1004,3 +1006,39 @@ class Meta:
                 fields=["country", "product"], name="common_countryclassified_country_code_product_code_uniq"
             )
         ]
+
+
+# These would go into a dedicated app
+
+
+class SpreadsheetPointOfInterest(Model):
+    name = NameField(max_length=200, unique=True, on_delete=models.RESTRICT)
+
+    # Or we could address every field in every instance - I like this as the import becomes completely generic
+    # Just iterate over the instances and fields and ref.get()
+    # Roger's existing code probably identifies most of these already
+    model = models.CharField(max_length=100, blank=True, null=True)
+    field = models.CharField(max_length=100, blank=True, null=True)
+
+
+class SpreadsheetReference(Model):
+    bss = models.ForeignKey("baseline.LivelihoodZoneBaseline", on_delete=models.DO_NOTHING)
+    spreadsheet_point_of_interest = models.ForeignKey(SpreadsheetPointOfInterest, on_delete=models.DO_NOTHING)
+    sequence = models.PositiveIntegerField()  # possibly not useful
+    reference = models.CharField(max_length=800)
+    regex = models.CharField(max_length=800)  # only applicable if reference is a single cell
+
+    # if using exhaustive approach of addressing every field
+    instance_number = models.PositiveIntegerField()
+
+    def get(self):
+        ss = pd.read_excel(self.bss.bss)
+        val = ss[self.reference]
+        if self.regex:
+            val = re.findall(self.regex, val)
+        return val
+
+    def below(self, i):
+        # return a reference i rows below self.reference (currently assumes row index is 2 chars which obv needs fixing)
+        # but this is an example of locating this functionality somewhere intuitive
+        return self.reference[:-2] + str(self.reference[-2:] + i)
diff --git a/apps/metadata/models.py b/apps/metadata/models.py
@@ -61,6 +61,10 @@ class Meta:
     class ExtraMeta:
         identifier = ["name_en"]
 
+    @classmethod
+    def get_model(cls, model, field):
+        return "look up appropriate model from foreign key field def"
+
 
 class LivelihoodCategory(ReferenceData):
     """