new condition for enabling don't-care action

randriu · Jan 15, 2025 · 8a62c5f · 8a62c5f
1 parent 81a9d8f
commit 8a62c5f
Show file tree

Hide file tree

Showing 12 changed files with 276 additions and 95 deletions.
diff --git a/paynt/quotient/mdp.py b/paynt/quotient/mdp.py
@@ -273,10 +273,12 @@ def to_graphviz(self):
 
 class MdpQuotient(paynt.quotient.quotient.Quotient):
 
+    # label for action executing a random action selection
+    DONT_CARE_ACTION_LABEL = "__random__"
     # if true, an explicit action executing a random choice of an available action will be added to each state
     add_dont_care_action = False
     # if true, irrelevant states will not be considered for tree mapping
-    filter_irrelevant_states = True
+    filter_deterministic_states = False
 
     @classmethod
     def get_state_valuations(cls, model):
@@ -304,30 +306,32 @@ def __init__(self, mdp, specification):
 
         # list of relevant variables: variables having at least two different options on relevant states
         self.variables = None
-        # for every state, a valuation of relevant variables; contains empty list for irrelevant states
+        # for every state, a valuation of relevant variables
         self.relevant_state_valuations = None
         # decision tree obtained after reset_tree
         self.decision_tree = None
 
         # deprecated
         # updated = payntbind.synthesis.restoreActionsInAbsorbingStates(mdp)
         # if updated is not None: mdp = updated
-        action_labels,_ = payntbind.synthesis.extractActionLabels(mdp)
-        if "__random__" not in action_labels and MdpQuotient.add_dont_care_action:
-            logger.debug("adding explicit don't-care action to every state...")
-            mdp = payntbind.synthesis.addDontCareAction(mdp)
 
         # identify relevant states
         self.state_is_relevant = [True for state in range(mdp.nr_states)]
-        if MdpQuotient.filter_irrelevant_states:
-            state_is_absorbing = self.identify_absorbing_states(mdp)
-            self.state_is_relevant = [self.state_is_relevant[state] and not absorbing for state,absorbing in enumerate(state_is_absorbing)]
+        state_is_absorbing = self.identify_absorbing_states(mdp)
+        self.state_is_relevant = [relevant and not state_is_absorbing[state] for state,relevant in enumerate(self.state_is_relevant)]
+
+        if MdpQuotient.filter_deterministic_states:
             state_has_actions = self.identify_states_with_actions(mdp)
-            self.state_is_relevant = [self.state_is_relevant[state] and has_actions for state,has_actions in enumerate(state_has_actions)]
+            self.state_is_relevant = [relevant and state_has_actions[state] for state,relevant in enumerate(self.state_is_relevant)]
         self.state_is_relevant_bv = stormpy.BitVector(mdp.nr_states)
         [self.state_is_relevant_bv.set(state,value) for state,value in enumerate(self.state_is_relevant)]
         logger.debug(f"MDP has {self.state_is_relevant_bv.number_of_set_bits()}/{self.state_is_relevant_bv.size()} relevant states")
 
+        action_labels,_ = payntbind.synthesis.extractActionLabels(mdp)
+        if MdpQuotient.DONT_CARE_ACTION_LABEL not in action_labels and MdpQuotient.add_dont_care_action:
+            logger.debug("adding explicit don't-care action to relevant states...")
+            mdp = payntbind.synthesis.addDontCareAction(mdp,self.state_is_relevant_bv)
+
         self.quotient_mdp = mdp
         self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(mdp)
         self.action_labels,self.choice_to_action = payntbind.synthesis.extractActionLabels(mdp)
@@ -359,9 +363,9 @@ def __init__(self, mdp, specification):
         logger.debug(f"found the following {len(self.variables)} variables: {[str(v) for v in self.variables]}")
 
 
-    def scheduler_json_to_choices(self, scheduler_json):
+    def scheduler_json_to_choices(self, scheduler_json, discard_unreachable_states=False):
         variable_name,state_valuations = self.get_state_valuations(self.quotient_mdp)
-        ndi = self.quotient_mdp.nondeterministic_choice_indices.copy()
+        nci = self.quotient_mdp.nondeterministic_choice_indices.copy()
         assert self.quotient_mdp.nr_states == len(scheduler_json)
         state_to_choice = self.empty_scheduler()
         for state_decision in scheduler_json:
@@ -377,25 +381,52 @@ def scheduler_json_to_choices(self, scheduler_json):
             action_labels = actions[0]["labels"]
             assert len(action_labels) <= 1
             if len(action_labels) == 0:
-                state_to_choice[state] = ndi[state]
+                state_to_choice[state] = nci[state]
                 continue
             action = self.action_labels.index(action_labels[0])
             # find a choice that executes this action
-            for choice in range(ndi[state],ndi[state+1]):
+            for choice in range(nci[state],nci[state+1]):
                 if self.choice_to_action[choice] == action:
                     state_to_choice[state] = choice
                     break
             else:
                 assert False, "action is not available in the state"
-        state_to_choice = self.discard_unreachable_choices(state_to_choice)
+        # enable implicit actions
+        for state,choice in enumerate(state_to_choice):
+            if choice is None:
+                logger.warning(f"WARNING: scheduler has no action for state {state}")
+                state_to_choice[state] = nci[state]
+
+        if discard_unreachable_states:
+            state_to_choice = self.discard_unreachable_choices(state_to_choice)
+        # keep only relevant states
+        state_to_choice = [choice if self.state_is_relevant[state] else None for state,choice in enumerate(state_to_choice)]
         choices = self.state_to_choice_to_choices(state_to_choice)
-        return choices
+
+        scheduler_json_relevant = []
+        for state_decision in scheduler_json:
+            valuation = [state_decision["s"][name] for name in variable_name]
+            for state,state_valuation in enumerate(state_valuations):
+                if valuation == state_valuation:
+                    break
+            if state_to_choice[state] is None:
+                continue
+            scheduler_json_relevant.append(state_decision)
+
+        return choices,scheduler_json_relevant
+
 
     def reset_tree(self, depth, enable_harmonization=True):
         '''
         Rebuild the decision tree template, the design space and the coloring.
         '''
         logger.debug(f"building tree of depth {depth}")
+
+        num_actions = len(self.action_labels)
+        dont_care_action = num_actions
+        if MdpQuotient.DONT_CARE_ACTION_LABEL in self.action_labels:
+            dont_care_action = self.action_labels.index(MdpQuotient.DONT_CARE_ACTION_LABEL)
+
         self.decision_tree = DecisionTree(self,self.variables)
         self.decision_tree.set_depth(depth)
 
@@ -405,6 +436,7 @@ def reset_tree(self, depth, enable_harmonization=True):
         tree_list = self.decision_tree.to_list()
         self.coloring = payntbind.synthesis.ColoringSmt(
             self.quotient_mdp.nondeterministic_choice_indices, self.choice_to_action,
+            num_actions, dont_care_action,
             self.quotient_mdp.state_valuations, self.state_is_relevant_bv,
             variable_name, variable_domain, tree_list, enable_harmonization
         )
@@ -453,10 +485,7 @@ def build(self, family):
             choices = self.coloring.selectCompatibleChoices(family.family)
         else:
             choices = self.coloring.selectCompatibleChoices(family.family, family.parent_info.selected_choices)
-        if choices.number_of_set_bits() == 0:
-            family.mdp = None
-            family.analysis_result = self.build_unsat_result()
-            return
+        assert choices.number_of_set_bits() > 0
 
         # proceed as before
         family.selected_choices = choices

diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py
@@ -96,6 +96,7 @@ def mdp_to_dtmc(mdp):
     def build_assignment(self, family):
         assert family.size == 1, "expecting family of size 1"
         choices = self.coloring.selectCompatibleChoices(family.family)
+        assert choices.number_of_set_bits() > 0
         mdp,state_map,choice_map = self.restrict_quotient(choices)
         model = Quotient.mdp_to_dtmc(mdp)
         return paynt.models.models.SubMdp(model,state_map,choice_map)

diff --git a/paynt/synthesizer/decision_tree.py b/paynt/synthesizer/decision_tree.py
@@ -64,10 +64,8 @@ def harmonize_inconsistent_scheduler(self, family):
     def verify_family(self, family):
         self.num_families_considered += 1
         self.quotient.build(family)
-        if family.mdp is None:
-            self.num_families_skipped += 1
-            return
 
+        self.stat.iteration(family.mdp)
         if family.parent_info is not None:
             for choice in family.parent_info.scheduler_choices:
                 if not family.selected_choices[choice]:
@@ -86,8 +84,6 @@ def verify_family(self, family):
         self.check_specification(family)
         if not family.analysis_result.can_improve:
             return
-        if SynthesizerDecisionTree.scheduler_path is not None:
-            return
         self.harmonize_inconsistent_scheduler(family)
 
 
@@ -204,10 +200,7 @@ def map_scheduler(self, scheduler_choices):
             if self.resource_limit_reached():
                 break
 
-        # self.counters_print()
-
     def run(self, optimum_threshold=None):
-
         scheduler_choices = None
         if SynthesizerDecisionTree.scheduler_path is None:
             paynt_mdp = paynt.models.models.Mdp(self.quotient.quotient_mdp)
@@ -216,7 +209,18 @@ def run(self, optimum_threshold=None):
             opt_result_value = None
             with open(SynthesizerDecisionTree.scheduler_path, 'r') as f:
                 scheduler_json = json.load(f)
-            scheduler_choices = self.quotient.scheduler_json_to_choices(scheduler_json)
+            scheduler_choices,scheduler_json_relevant = self.quotient.scheduler_json_to_choices(scheduler_json, discard_unreachable_states=True)
+
+            # export transformed scheduler
+            # import os
+            # directory = os.path.dirname(SynthesizerDecisionTree.scheduler_path)
+            # transformed_name = f"scheduler-reachable.storm.json"
+            # scheduler_relevant_path = os.path.join(directory, transformed_name)
+            # with open(scheduler_relevant_path, 'w') as f:
+            #     json.dump(scheduler_json_relevant, f, indent=4)
+            # logger.debug(f"stored transformed scheduler to {scheduler_relevant_path}")
+            # exit()
+
             submdp = self.quotient.build_from_choice_mask(scheduler_choices)
             mc_result = submdp.model_check_property(self.quotient.get_property())
         opt_result_value = mc_result.value
@@ -261,9 +265,9 @@ def run(self, optimum_threshold=None):
         time_total = round(paynt.utils.timer.GlobalTimer.read(),2)
         logger.info(f"synthesis finished after {time_total} seconds")
 
-        # print()
-        # for name,time in self.quotient.coloring.getProfilingInfo():
-        #     time_percent = round(time/time_total*100,1)
-        #     print(f"{name} = {time} s ({time_percent} %)")
+        print()
+        for name,time in self.quotient.coloring.getProfilingInfo():
+            time_percent = round(time/time_total*100,1)
+            print(f"{name} = {time} s ({time_percent} %)")
 
         return self.best_tree
diff --git a/paynt/synthesizer/synthesizer_ar.py b/paynt/synthesizer/synthesizer_ar.py
@@ -115,7 +115,6 @@ def update_optimum(self, family):
         if isinstance(self.quotient, paynt.quotient.pomdp.PomdpQuotient):
             self.stat.new_fsc_found(family.analysis_result.improving_value, ia, self.quotient.policy_size(ia))
 
-
     def synthesize_one(self, family):
         families = [family]
         while families:

diff --git a/paynt/verification/property_result.py b/paynt/verification/property_result.py
@@ -132,7 +132,7 @@ def evaluate(self, family=None, admissible_assignment=None):
             else:
                 self.improving_assignment = opt.improving_assignment
                 self.improving_value = opt.improving_value
-                self.can_improve = opt.can_improve        
+                self.can_improve = opt.can_improve
             return
 
         # constraints undecided