diff --git a/paynt/quotient/pomdp.py b/paynt/quotient/pomdp.py index 19bbc15fb..a90d127e6 100644 --- a/paynt/quotient/pomdp.py +++ b/paynt/quotient/pomdp.py @@ -373,28 +373,26 @@ def unfold_memory(self): - def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_values, expected_visits=None): + def estimate_scheduler_difference(self, mdp, quotient_choice_map, inconsistent_assignments, choice_values, expected_visits=None): if PomdpQuotient.posterior_aware: - return super().estimate_scheduler_difference(mdp,inconsistent_assignments,choice_values,expected_visits) + return super().estimate_scheduler_difference(mdp,quotient_choice_map,inconsistent_assignments,choice_values,expected_visits) - # note: the code below is optimized for a priori unfolding + # note: the code below is optimized for posterior-unaware unfolding - # create inverse map + # create inverse quotient-choice-to-mdp-choice map # TODO optimize this for multiple properties - if mdp.quotient_to_restricted_action_map is None: - quotient_to_restricted_action_map = [None] * self.quotient_mdp.nr_choices - for action in range(mdp.choices): - quotient_to_restricted_action_map[mdp.quotient_choice_map[action]] = action + quotient_to_restricted_action_map = [None] * self.quotient_mdp.nr_choices + for choice in range(mdp.nr_choices): + quotient_to_restricted_action_map[quotient_choice_map[choice]] = choice # map choices to their origin states choice_to_state = [] - tm = mdp.model.transition_matrix - for state in range(mdp.model.nr_states): + tm = mdp.transition_matrix + for state in range(mdp.nr_states): for choice in tm.get_rows_for_group(state): choice_to_state.append(state) - # for each hole, compute its difference sum and a number of affected states inconsistent_differences = {} for hole_index,options in inconsistent_assignments.items(): diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index a5c35e85c..783e3dc80 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -232,34 +232,15 @@ def expected_visits(self, mdp, prop, choices): return expected_visits - def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_values, expected_visits=None): + def estimate_scheduler_difference(self, mdp, quotient_choice_map, inconsistent_assignments, choice_values, expected_visits=None): if expected_visits is None: - expected_visits = [1] * mdp.model.nr_states + expected_visits = [1] * mdp.nr_states hole_variance = payntbind.synthesis.computeInconsistentHoleVariance( - self.design_space.family, mdp.model.nondeterministic_choice_indices, mdp.quotient_choice_map, choice_values, + self.design_space.family, mdp.nondeterministic_choice_indices, quotient_choice_map, choice_values, self.coloring, inconsistent_assignments, expected_visits) return hole_variance - def scheduler_selection_quantitative(self, mdp, prop, result): - ''' - Get hole options involved in the scheduler selection. - ''' - # get qualitative scheduler selection, filter inconsistent assignments - selection = self.scheduler_selection(mdp, result.scheduler) - inconsistent_assignments = {hole_index:options for hole_index,options in enumerate(selection) if len(options) > 1 } - if len(inconsistent_assignments) == 0: - return selection,None,None,None - - # extract choice values, compute expected visits and estimate scheduler difference - choice_values = self.choice_values(mdp.model, prop, result.get_values()) - choices = result.scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices) - expected_visits = self.expected_visits(mdp.model, prop, choices) - inconsistent_differences = self.estimate_scheduler_difference(mdp, inconsistent_assignments, choice_values, expected_visits) - - return selection,choice_values,expected_visits,inconsistent_differences - - def scheduler_consistent(self, mdp, prop, result): ''' Get hole assignment induced by this scheduler and fill undefined @@ -272,16 +253,26 @@ def scheduler_consistent(self, mdp, prop, result): selection = [[mdp.design_space.hole_options(hole)[0]] for hole in range(mdp.design_space.num_holes)] return selection, None, None, None, True - selection,choice_values,expected_visits,scores = self.scheduler_selection_quantitative(mdp, prop, result) - consistent = True - for hole in range(mdp.design_space.num_holes): - options = selection[hole] - if len(options) > 1: - consistent = False + # get qualitative scheduler selection, filter inconsistent assignments + selection = self.scheduler_selection(mdp, result.scheduler) + inconsistent_assignments = {hole:options for hole,options in enumerate(selection) if len(options) > 1 } + scheduler_is_consistent = len(inconsistent_assignments) == 0 + choice_values = None + expected_visits = None + inconsistent_differences = None + if not scheduler_is_consistent: + # extract choice values, compute expected visits and estimate scheduler difference + choice_values = self.choice_values(mdp.model, prop, result.get_values()) + choices = result.scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices) + expected_visits = self.expected_visits(mdp.model, prop, choices) + inconsistent_differences = self.estimate_scheduler_difference(mdp.model, mdp.quotient_choice_map, inconsistent_assignments, choice_values, expected_visits) + + for hole,options in enumerate(selection): if len(options) == 0: + # TODO why is this necessary? selection[hole] = [mdp.design_space.hole_options(hole)[0]] - return selection,choice_values,expected_visits,scores,consistent + return selection, choice_values, expected_visits, inconsistent_differences, scheduler_is_consistent def suboptions_half(self, mdp, splitter): diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 6773938ca..71617a404 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -590,8 +590,8 @@ def compute_scores(self, prop, scheduler_choices, state_values, inconsistent_ass expected_visits = None if self.quotient.compute_expected_visits: expected_visits = self.quotient.expected_visits(mdp, prop, scheduler_choices) - quotient_mdp_wrapped = self.quotient.design_space.mdp - scores = self.quotient.estimate_scheduler_difference(quotient_mdp_wrapped, inconsistent_assignments, choice_values, expected_visits) + quotient_choice_map = [choice for choice in self.quotient.quotient_mdp.nr_choices] + scores = self.quotient.estimate_scheduler_difference(self.quotient.quotient_mdp, quotient_choice_map, choice_values, expected_visits) return scores def split(self, family, prop, hole_selection, splitter): diff --git a/paynt/synthesizer/synthesizer.py b/paynt/synthesizer/synthesizer.py index 73eee2fed..f9af1d424 100644 --- a/paynt/synthesizer/synthesizer.py +++ b/paynt/synthesizer/synthesizer.py @@ -129,6 +129,7 @@ def synthesize(self, family=None, optimum_threshold=None, return_all=False, prin logger.debug(f"optimality threshold set to {optimum_threshold}") logger.info("synthesis initiated, design space: {}".format(family.size)) + self.quotient.discarded = 0 self.stat.start(family) assignment = self.synthesize_one(family) if assignment is not None and assignment.size > 1 and not return_all: diff --git a/paynt/synthesizer/synthesizer_ar.py b/paynt/synthesizer/synthesizer_ar.py index 7d454d690..0e1395376 100644 --- a/paynt/synthesizer/synthesizer_ar.py +++ b/paynt/synthesizer/synthesizer_ar.py @@ -31,7 +31,6 @@ def update_optimum(self, family): def synthesize_one(self, family): # return self.synthesize_one_experimental(family) - self.quotient.discarded = 0 satisfying_assignment = None families = [family] diff --git a/paynt/synthesizer/synthesizer_multicore_ar.py b/paynt/synthesizer/synthesizer_multicore_ar.py index 74eb85392..5b9811651 100644 --- a/paynt/synthesizer/synthesizer_multicore_ar.py +++ b/paynt/synthesizer/synthesizer_multicore_ar.py @@ -1,54 +1,64 @@ -from .statistic import Statistic -from .synthesizer import Synthesizer -from .synthesizer_ar import SynthesizerAR +from paynt.synthesizer.synthesizer import Synthesizer +from paynt.synthesizer.synthesizer_ar import SynthesizerAR -import multiprocessing as mp import os import time +import multiprocessing import logging logger = logging.getLogger(__name__) -# global variable containing the quotient -# when a process is spawned (forked), it will inherit this quotient from the parent +import cProfile, pstats + +# global variables +# when a new process is spawned (forked), it will inherit these variables from the parent quotient = None +profiler = None -import cProfile, pstats -profile = None +# helper functions for family serialization +def family_to_hole_options(family): + return [family.hole_options(hole) for hole in range(family.num_holes)] + +def hole_options_to_family(hole_options): + family = quotient.design_space.copy() + for hole,options in enumerate(hole_options): + family.hole_set_options(hole,options) + return family def solve_family(args): ''' Build the quotient, analyze it and, if necessary, split into subfamilies. ''' - try: - family, optimum = args + if args is None: + pstats.Stats(profiler).sort_stats('tottime').print_stats(10) - if family is None: - pstats.Stats(profile).sort_stats('tottime').print_stats(50) - return + hole_options, optimum = args + # re-construct the family + family = hole_options_to_family(hole_options) # synchronize optimum if optimum is not None: quotient.specification.optimality.optimum = optimum quotient.build(family) - # self.stat.iteration_mdp(family.mdp.states) res = family.mdp.check_specification(quotient.specification, constraint_indices = family.constraint_indices, short_evaluation = True) family.analysis_result = res + improving_value = res.improving_value + improving_assignment = res.improving_assignment + if improving_assignment is not None: + improving_assignment = family_to_hole_options(improving_assignment) subfamilies = [] - if family.analysis_result.can_improve: + if res.can_improve: subfamilies = quotient.split(family, Synthesizer.incomplete_search) - # remove parent info since Property is not pickleable - for subfamily in subfamilies: - subfamily.parent_info = None + subfamilies = [ family_to_hole_options(family) for family in subfamilies ] - return ([family.mdp.states], family.analysis_result.improving_value, family.analysis_result.improving_assignment, subfamilies) + return (family.mdp.states, improving_value, improving_assignment, subfamilies) except: logger.error("Worker sub-process encountered an error.") @@ -64,23 +74,21 @@ def method_name(self): def synthesize_one(self, family): - self.quotient.discarded = 0 - satisfying_assignment = None families = [family] - start_time = time.perf_counter() - global quotient quotient = self.quotient - global profile - profile = cProfile.Profile() - # profile.enable() + profiling = False + if profiling: + global profiler + profiler = cProfile.Profile() + profiler.enable() # create a pool of processes # by default, os.cpu_count() processes will be spawned - with mp.Pool( - # processes = 1 + with multiprocessing.Pool( + # processes=1 ) as pool: while families: @@ -97,7 +105,8 @@ def synthesize_one(self, family): input_families = families[-split:] input_families_size = sum([family.size for family in input_families]) remaining_families = families[:-split] - + input_families = [family_to_hole_options(family) for family in input_families] + inputs = zip(input_families, [optimum] * len(input_families)) results = pool.map(solve_family, inputs) @@ -110,16 +119,16 @@ def synthesize_one(self, family): if r is None: logger.error("Worker sub-process encountered an error.") exit() - mdp_states, improving_value, improving_assignment, subfamilies = r - - for entry in mdp_states: - self.stat.iteration_mdp(entry) + mdp_states, improving_value, improving_assignment, subfamilies_hole_options = r + self.stat.iteration_mdp(mdp_states) if improving_value is not None: if self.quotient.specification.optimality.improves_optimum(improving_value): self.quotient.specification.optimality.update_optimum(improving_value) + improving_assignment = hole_options_to_family(improving_assignment) satisfying_assignment = improving_assignment - + + subfamilies = [hole_options_to_family(hole_options) for hole_options in subfamilies_hole_options] new_families += subfamilies new_families_size = sum([family.size for family in new_families]) @@ -127,65 +136,9 @@ def synthesize_one(self, family): families = remaining_families + new_families - # pool.apply(solve_family, ((None,None),)) - - - finish_time = time.perf_counter() - total_time = round(finish_time-start_time, 3) - - logger.info("Synthesis finished in {} s (real time).".format(total_time)) - - # pstats.Stats(profile).sort_stats('tottime').print_stats(10) + if profiling: + pool.apply(solve_family, (None,)) + if profiling: + pstats.Stats(profiler).sort_stats('tottime').print_stats(10) return satisfying_assignment - - - -def solve_batch(args): - ''' - When an undecidable family is encountered, the subfamilies are processed - up to a limit specified below. - ''' - - try: - - family, optimum = args - - # synchronize optimum - if optimum is not None: - quotient.specification.optimality.optimum = optimum - - mdp_states = [] - improving_value = None - improving_assignment = None - subfamilies = [family] - - # analyze the batch - limit = 1 - for x in range(limit): - - if not subfamilies: - break - - family = subfamilies.pop(-1) - quotient.build(family) - res = family.mdp.check_specification(quotient.specification, constraint_indices = family.constraint_indices, short_evaluation = True) - family.analysis_result = res - mdp_states.append(family.mdp.states) - - if family.analysis_result.can_improve: - subfamilies += quotient.split(family, Synthesizer.incomplete_search) - - if family.analysis_result.improving_value is not None: - break - - - # remove parent info since Property is not pickleable - for subfamily in subfamilies: - subfamily.parent_info = None - - return (mdp_states, family.analysis_result.improving_value, family.analysis_result.improving_assignment, subfamilies) - - except: - print("meaningful error message") - return None \ No newline at end of file diff --git a/paynt/utils/profiler.py b/paynt/utils/profiler.py index c74d8bf1d..30abb527d 100644 --- a/paynt/utils/profiler.py +++ b/paynt/utils/profiler.py @@ -10,7 +10,7 @@ def __init__(self): @staticmethod def timestamp(): #return time.process_time() # cpu time - return time.time() + return time.perf_counter() def reset(self): self.__init__()