Removed self.P and self.R since they caused issues with copy.deepcopy()

automl · Dec 17, 2024 · 071dffe · 071dffe
1 parent 3411bf7
commit 071dffe
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 7 deletions.
diff --git a/mdp_playground/envs/rl_toy_env.py b/mdp_playground/envs/rl_toy_env.py
@@ -557,7 +557,8 @@ def __init__(self, **config):
 
         # ##TODO Move these to the individual env types' defaults section above?
         if config["state_space_type"] == "discrete":
-            self.dtype_s = np.int32 if "dtype_s" not in config else config["dtype_s"]
+            # I think Gymnasium wants int64, but Dreamer-V3 (~June 2024) code may prefer int32
+            self.dtype_s = np.int64 if "dtype_s" not in config else config["dtype_s"]
             if self.irrelevant_features:
                 assert (
                     len(config["action_space_size"]) == 2
@@ -1226,7 +1227,8 @@ def init_transition_function(self):
             # fixed parameterisation for cont. envs. right now.
             pass
 
-        self.P = lambda s, a: self.transition_function(s, a)
+        # ####IMP Keep this commented out as it causes problems with deepcopy().
+        # self.P = lambda s, a: self.transition_function(s, a)
 
     def init_reward_function(self):
         """Initialises reward function, R by selecting random sequences to be rewardable for discrete environments. For continuous environments, we have fixed available options for the reward function."""
@@ -1550,7 +1552,7 @@ def get_rews(rng, r_dict):
         elif self.config["state_space_type"] == "grid":
             ...  # ###TODO Make sequences compatible with grid
 
-        self.R = lambda s, a: self.reward_function(s, a)
+        # self.R = lambda s, a: self.reward_function(s, a)
 
     def transition_function(self, state, action):
         """The transition function, P.
@@ -2009,7 +2011,7 @@ def step(self, action, imaginary_rollout=False):
         # ### TODO Decide whether to give reward before or after transition ("after" would mean taking next state into account and seems more logical to me) - make it a dimension? - R(s) or R(s, a) or R(s, a, s')? I'd say give it after and store the old state in the augmented_state to be able to let the R have any of the above possible forms. That would also solve the problem of implicit 1-step delay with giving it before. _And_ would not give any reward for already being in a rewarding state in the 1st step but _would_ give a reward if 1 moved to a rewardable state - even if called with R(s, a) because s' is stored in the augmented_state! #####IMP
 
         # ###TODO P uses last state while R uses augmented state; for cont. env, P does know underlying state_derivatives - we don't want this to be the case for the imaginary rollout scenario;
-        next_state = self.P(state, action)
+        next_state = self.transition_function(state, action)
 
         # if imaginary_rollout:
         #     pass
@@ -2025,7 +2027,7 @@ def step(self, action, imaginary_rollout=False):
 
         self.total_transitions_episode += 1
 
-        self.reward = self.R(self.augmented_state, action)
+        self.reward = self.reward_function(self.augmented_state, action)
 
         # #irrelevant dimensions part
         if self.config["state_space_type"] == "discrete":

diff --git a/mdp_playground/spaces/image_continuous.py b/mdp_playground/spaces/image_continuous.py
@@ -101,7 +101,7 @@ def __init__(
             "Image observations are " "supported only " "for 1- or 2-D feature spaces."
         )
 
-        # Shape has 1 appended for Ray Rllib to be compatible IIRC
+        # Shape needs 3rd dimension for Ray Rllib to be compatible IIRC
         super(ImageContinuous, self).__init__(
             shape=(width, height, num_channels), dtype=dtype, low=0, high=255
         )
@@ -244,7 +244,7 @@ def contains(self, x):
             self.width,
             self.height,
             self.num_channels,
-        ):  # TODO compare each pixel for all possible images?
+        ):  # TODO compare each pixel for all possible image observations? Hard to implement.
             return True
 
     def to_jsonable(self, sample_n):