ServiceNow
diff --git a/‎azimuth/app.py‎
Lines changed: 27 additions & 3 deletions b/‎azimuth/app.py‎
Lines changed: 27 additions & 3 deletions
diff --git a/‎azimuth/config.py‎
Lines changed: 9 additions & 0 deletions b/‎azimuth/config.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎azimuth/modules/base_classes/artifact_manager.py‎
Lines changed: 60 additions & 26 deletions b/‎azimuth/modules/base_classes/artifact_manager.py‎
Lines changed: 60 additions & 26 deletions
diff --git a/‎azimuth/modules/base_classes/module.py‎
Lines changed: 1 addition & 4 deletions b/‎azimuth/modules/base_classes/module.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎azimuth/routers/config.py‎
Lines changed: 0 additions & 2 deletions b/‎azimuth/routers/config.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎azimuth/routers/export.py‎
Lines changed: 10 additions & 4 deletions b/‎azimuth/routers/export.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎azimuth/routers/utterances.py‎
Lines changed: 0 additions & 2 deletions b/‎azimuth/routers/utterances.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎azimuth/startup.py‎
Lines changed: 0 additions & 2 deletions b/‎azimuth/startup.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎azimuth/task_manager.py‎
Lines changed: 1 addition & 5 deletions b/‎azimuth/task_manager.py‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎azimuth/utils/project.py‎
Lines changed: 1 addition & 35 deletions b/‎azimuth/utils/project.py‎
Lines changed: 1 addition & 35 deletions
@@ -25,15 +25,14 @@
 
 from azimuth.config import AzimuthConfig, load_azimuth_config
 from azimuth.dataset_split_manager import DatasetSplitManager
-from azimuth.modules.base_classes import DaskModule
+from azimuth.modules.base_classes import ArtifactManager, DaskModule
 from azimuth.modules.utilities.validation import ValidationModule
 from azimuth.startup import startup_tasks
 from azimuth.task_manager import TaskManager
 from azimuth.types import DatasetSplitName, ModuleOptions
 from azimuth.utils.cluster import default_cluster
 from azimuth.utils.conversion import JSONResponseIgnoreNan
 from azimuth.utils.logs import set_logger_config
-from azimuth.utils.project import load_dataset_split_managers_from_config
 from azimuth.utils.validation import assert_not_none
 
 _dataset_split_managers: Dict[DatasetSplitName, Optional[DatasetSplitManager]] = {}
@@ -297,6 +296,32 @@ def create_app() -> FastAPI:
     return app
 
 
+def load_dataset_split_managers_from_config(
+    azimuth_config: AzimuthConfig,
+) -> Dict[DatasetSplitName, Optional[DatasetSplitManager]]:
+    """
+    Load all dataset splits for the application.
+
+    Args:
+        azimuth_config: Azimuth Configuration.
+
+    Returns:
+        For all DatasetSplitName, None or a dataset_split manager.
+
+    """
+    artifact_manager = ArtifactManager.instance()
+    dataset = artifact_manager.get_dataset_dict(azimuth_config)
+
+    return {
+        dataset_split_name: None
+        if dataset_split_name not in dataset
+        else artifact_manager.get_dataset_split_manager(
+            azimuth_config, DatasetSplitName[dataset_split_name]
+        )
+        for dataset_split_name in [DatasetSplitName.eval, DatasetSplitName.train]
+    }
+
+
 def initialize_managers(azimuth_config: AzimuthConfig, cluster: SpecCluster):
     """Initialize DatasetSplitManagers and TaskManagers.
 
@@ -348,7 +373,6 @@ def run_validation_module(pipeline_index=None):
     else:
         for pipeline_index in range(len(config.pipelines)):
             run_validation_module(pipeline_index)
-    task_manager.clear_worker_cache()
 
 
 def run_startup_tasks(azimuth_config: AzimuthConfig, cluster: SpecCluster):
 
@@ -355,6 +355,15 @@ def check_pipeline_names(cls, pipeline_definitions):
             raise ValueError(f"Duplicated pipeline names {pipeline_names}.")
         return pipeline_definitions
 
+    def get_model_contract_hash(self):
+        return md5_hash(
+            self.dict(
+                include=ModelContractConfig.__fields__.keys()
+                - CommonFieldsConfig.__fields__.keys(),
+                by_alias=True,
+            )
+        )
+
 
 class MetricsConfig(ModelContractConfig):
     # Custom HuggingFace metrics
 
@@ -1,9 +1,10 @@
 # Copyright ServiceNow, Inc. 2021 – 2022
 # This source code is licensed under the Apache 2.0 license found in the LICENSE file
 # in the root directory of this source tree.
-from multiprocessing import Lock
-from typing import Callable, Dict, Optional
+from collections import defaultdict
+from typing import Callable, Dict
 
+import structlog
 from datasets import DatasetDict
 
 from azimuth.config import AzimuthConfig
@@ -18,31 +19,66 @@
 Hash = int
 
 
+log = structlog.get_logger()
+
+
+class Singleton:
+    """
+    A non-thread-safe helper class to ease implementing singletons.
+    This should be used as a decorator -- not a metaclass -- to the
+    class that should be a singleton.
+
+    To get the singleton instance, use the `instance` method. Trying
+    to use `__call__` will result in a `TypeError` being raised.
+
+    Args:
+        decorated: Decorated class
+    """
+
+    def __init__(self, decorated):
+        self._decorated = decorated
+
+    def instance(self):
+        """
+        Returns the singleton instance. Upon its first call, it creates a
+        new instance of the decorated class and calls its `__init__` method.
+        On all subsequent calls, the already created instance is returned.
+
+        Returns:
+            Instance of the decorated class
+        """
+        try:
+            return self._instance
+        except AttributeError:
+            self._instance = self._decorated()
+            return self._instance
+
+    def __call__(self):
+        raise TypeError("Singletons must be accessed through `instance()`.")
+
+    def clear_instance(self):
+        """For test purposes only"""
+        if hasattr(self, "_instance"):
+            delattr(self, "_instance")
+
+
+@Singleton
 class ArtifactManager:
     """This class is a singleton which holds different artifacts.
 
     Artifacts include dataset_split_managers, datasets and models for each config, so they don't
     need to be reloaded many times for a same module.
     """
 
-    instance: Optional["ArtifactManager"] = None
-
     def __init__(self):
         # The keys of the dict are a hash of the config.
         self.dataset_dict_mapping: Dict[Hash, DatasetDict] = {}
         self.dataset_split_managers_mapping: Dict[
             Hash, Dict[DatasetSplitName, DatasetSplitManager]
-        ] = {}
-        self.models_mapping: Dict[Hash, Dict[int, Callable]] = {}
-        self.tokenizer = None
+        ] = defaultdict(dict)
+        self.models_mapping: Dict[Hash, Dict[int, Callable]] = defaultdict(dict)
         self.metrics = {}
-
-    @classmethod
-    def get_instance(cls):
-        with Lock():
-            if cls.instance is None:
-                cls.instance = cls()
-            return cls.instance
+        log.debug(f"Creating new Artifact Manager {id(self)}.")
 
     def get_dataset_split_manager(
         self, config: AzimuthConfig, name: DatasetSplitName
@@ -68,8 +104,6 @@ def get_dataset_split_manager(
                 f"Found {tuple(dataset_dict.keys())}."
             )
         project_hash: Hash = config.get_project_hash()
-        if project_hash not in self.dataset_split_managers_mapping:
-            self.dataset_split_managers_mapping[project_hash] = {}
         if name not in self.dataset_split_managers_mapping[project_hash]:
             self.dataset_split_managers_mapping[project_hash][name] = DatasetSplitManager(
                 name=name,
@@ -78,6 +112,7 @@ def get_dataset_split_manager(
                 initial_prediction_tags=ALL_PREDICTION_TAGS,
                 dataset_split=dataset_dict[name],
             )
+            log.debug(f"New {name} DM in Artifact Manager {id(self)}")
         return self.dataset_split_managers_mapping[project_hash][name]
 
     def get_dataset_dict(self, config) -> DatasetDict:
@@ -106,17 +141,16 @@ def get_model(self, config: AzimuthConfig, pipeline_idx: int):
         Returns:
             Loaded model.
         """
-
-        project_hash: Hash = config.get_project_hash()
-        if project_hash not in self.models_mapping:
-            self.models_mapping[project_hash] = {}
-        if pipeline_idx not in self.models_mapping[project_hash]:
+        # We only need to reload the pipeline if the model contract part of the config is changed.
+        model_contract_hash: Hash = config.get_model_contract_hash()
+        if pipeline_idx not in self.models_mapping[model_contract_hash]:
+            log.debug(f"Loading pipeline {pipeline_idx}.")
             pipelines = assert_not_none(config.pipelines)
-            self.models_mapping[project_hash][pipeline_idx] = load_custom_object(
+            self.models_mapping[model_contract_hash][pipeline_idx] = load_custom_object(
                 assert_not_none(pipelines[pipeline_idx].model), azimuth_config=config
             )
 
-        return self.models_mapping[project_hash][pipeline_idx]
+        return self.models_mapping[model_contract_hash][pipeline_idx]
 
     def get_metric(self, config, name: str, **kwargs):
         hash: Hash = md5_hash({"name": name, **kwargs})
@@ -125,6 +159,6 @@ def get_metric(self, config, name: str, **kwargs):
         return self.metrics[hash]
 
     @classmethod
-    def clear_cache(cls) -> None:
-        with Lock():
-            cls.instance = None
+    def instance(cls):
+        # Implemented in decorator
+        raise NotImplementedError
@@ -79,7 +79,7 @@ def get_indices(self, name: Optional[DatasetSplitName] = None) -> List[int]:
     def artifact_manager(self):
         """This is set as a property so the Module always have access to the current version of
         the ArtifactManager on the worker."""
-        return ArtifactManager.get_instance()
+        return ArtifactManager.instance()
 
     @property
     def available_dataset_splits(self) -> Set[DatasetSplitName]:
@@ -215,6 +215,3 @@ def get_pipeline_definition(self) -> PipelineDefinition:
         pipeline_index = assert_not_none(self.mod_options.pipeline_index)
         current_pipeline = pipelines[pipeline_index]
         return current_pipeline
-
-    def clear_cache(self):
-        self.artifact_manager.clear_cache()
@@ -98,8 +98,6 @@ def patch_config(
                 HTTP_500_INTERNAL_SERVER_ERROR, detail="Error when loading the new config."
             )
 
-    # Clear workers so that they load the correct config.
-    task_manager.clear_worker_cache()
     return new_config
 
 
 
@@ -5,7 +5,7 @@
 import os
 import time
 from os.path import join as pjoin
-from typing import Dict, Generator, List, Optional, cast
+from typing import Dict, Generator, List, Optional
 
 import pandas as pd
 from fastapi import APIRouter, Depends, HTTPException
@@ -155,7 +155,10 @@ def get_export_perturbed_set(
 
     output = list(
         make_utterance_level_result(
-            dataset_split_manager, task_result, pipeline_index=pipeline_index_not_null
+            dataset_split_manager,
+            task_result,
+            pipeline_index=pipeline_index_not_null,
+            config=config,
         )
     )
     with open(path, "w") as f:
@@ -164,20 +167,23 @@ def get_export_perturbed_set(
 
 
 def make_utterance_level_result(
-    dm: DatasetSplitManager, results: List[List[PerturbedUtteranceResult]], pipeline_index: int
+    dm: DatasetSplitManager,
+    results: List[List[PerturbedUtteranceResult]],
+    pipeline_index: int,
+    config: AzimuthConfig,
 ) -> Generator[Dict, None, None]:
     """Massage perturbation testing results for the frontend.
 
     Args:
         dm: Current DatasetSplitManager.
         results: Output of Perturbation Testing.
         pipeline_index: Index of the pipeline that made the results.
+        config: Azimuth config
 
     Returns:
         Generator that yield json-able object for the frontend.
 
     """
-    config = cast(AzimuthConfig, dm.config)
     for idx, (utterance, test_results) in enumerate(
         zip(
             dm.get_dataset_split(
 
@@ -229,7 +229,6 @@ def patch_utterances(
     utterances: List[UtterancePatch] = Body(...),
     config: AzimuthConfig = Depends(get_config),
     dataset_split_manager: DatasetSplitManager = Depends(get_dataset_split_manager),
-    task_manager: TaskManager = Depends(get_task_manager),
     ignore_not_found: bool = Query(False),
 ) -> List[UtterancePatch]:
     if ignore_not_found:
@@ -250,7 +249,6 @@ def patch_utterances(
 
     dataset_split_manager.add_tags(data_actions)
 
-    task_manager.clear_worker_cache()
     updated_tags = dataset_split_manager.get_tags(row_indices)
 
     return [
 
@@ -150,8 +150,6 @@ def on_end(fut: Future, module: DaskModule, dm: DatasetSplitManager, task_manage
         # Task is done, save the result.
         if isinstance(module, DatasetResultModule):
             module.save_result(module.result(), dm)
-            # We only need to clear cache when the dataset is modified.
-            task_manager.clear_worker_cache()
     else:
         log.exception("Error in", module=module, fut=fut, exc_info=fut.exception())
 
 
@@ -8,7 +8,7 @@
 from distributed import Client, SpecCluster
 
 from azimuth.config import AzimuthConfig
-from azimuth.modules.base_classes import ArtifactManager, DaskModule, ExpirableMixin
+from azimuth.modules.base_classes import DaskModule, ExpirableMixin
 from azimuth.modules.task_mapping import model_contract_methods, modules
 from azimuth.types import (
     DatasetSplitName,
@@ -67,7 +67,6 @@ def close(self):
                     mod.future.cancel()
                 except Exception:
                     pass
-        self.clear_worker_cache()
         self.client.close()
 
     def register_task(self, name, cls):
@@ -214,9 +213,6 @@ def status(self):
             **self.get_all_tasks_status(task=None),
         }
 
-    def clear_worker_cache(self):
-        self.client.run(ArtifactManager.clear_cache)
-
     def restart(self):
         log.info("Cluster restarted to free memory.")
         for task_name, module in self.current_tasks.items():
 
@@ -1,7 +1,7 @@
 # Copyright ServiceNow, Inc. 2021 – 2022
 # This source code is licensed under the Apache 2.0 license found in the LICENSE file
 # in the root directory of this source tree.
-from typing import Dict, Optional
+from typing import Dict
 
 import structlog
 from datasets import DatasetDict
@@ -12,9 +12,7 @@
     PerturbationTestingConfig,
     SimilarityConfig,
 )
-from azimuth.dataset_split_manager import DatasetSplitManager
 from azimuth.types import DatasetSplitName, SupportedModelContract
-from azimuth.types.tag import ALL_PREDICTION_TAGS, ALL_STANDARD_TAGS
 from azimuth.utils.object_loader import load_custom_object
 
 log = structlog.get_logger()
@@ -59,38 +57,6 @@ def update_config(old_config: AzimuthConfig, partial_config: Dict) -> AzimuthCon
     return old_config.copy(update=partial_config, deep=True)
 
 
-def load_dataset_split_managers_from_config(
-    azimuth_config: AzimuthConfig,
-) -> Dict[DatasetSplitName, Optional[DatasetSplitManager]]:
-    """
-    Load all dataset splits for the application.
-
-    Args:
-        azimuth_config: Azimuth Configuration.
-
-    Returns:
-        For all DatasetSplitName, None or a dataset_split manager.
-
-    """
-    dataset = load_dataset_from_config(azimuth_config)
-
-    def make_dataset_split_manager(name: DatasetSplitName):
-        return DatasetSplitManager(
-            name=name,
-            config=azimuth_config,
-            initial_tags=ALL_STANDARD_TAGS,
-            initial_prediction_tags=ALL_PREDICTION_TAGS,
-            dataset_split=dataset[name],
-        )
-
-    return {
-        dataset_split_name: None
-        if dataset_split_name not in dataset
-        else make_dataset_split_manager(DatasetSplitName[dataset_split_name])
-        for dataset_split_name in [DatasetSplitName.eval, DatasetSplitName.train]
-    }
-
-
 def predictions_available(config: ModelContractConfig) -> bool:
     return config.pipelines is not None
Original file line number	Diff line number	Diff line change
`@@ -98,8 +98,6 @@ def patch_config(`
`98`	`98`	`HTTP_500_INTERNAL_SERVER_ERROR, detail="Error when loading the new config."`
`99`	`99`	`)`
`100`	`100`
`101`		`- # Clear workers so that they load the correct config.`
`102`		`- task_manager.clear_worker_cache()`
`103`	`101`	`return new_config`
`104`	`102`
`105`	`103`