Skip to content

Commit

Permalink
Removed self.P and self.R since they caused issues with copy.deepcopy()
Browse files Browse the repository at this point in the history
  • Loading branch information
RaghuSpaceRajan committed Dec 17, 2024
1 parent 3411bf7 commit 071dffe
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 7 deletions.
12 changes: 7 additions & 5 deletions mdp_playground/envs/rl_toy_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,8 @@ def __init__(self, **config):

# ##TODO Move these to the individual env types' defaults section above?
if config["state_space_type"] == "discrete":
self.dtype_s = np.int32 if "dtype_s" not in config else config["dtype_s"]
# I think Gymnasium wants int64, but Dreamer-V3 (~June 2024) code may prefer int32
self.dtype_s = np.int64 if "dtype_s" not in config else config["dtype_s"]
if self.irrelevant_features:
assert (
len(config["action_space_size"]) == 2
Expand Down Expand Up @@ -1226,7 +1227,8 @@ def init_transition_function(self):
# fixed parameterisation for cont. envs. right now.
pass

self.P = lambda s, a: self.transition_function(s, a)
# ####IMP Keep this commented out as it causes problems with deepcopy().
# self.P = lambda s, a: self.transition_function(s, a)

def init_reward_function(self):
"""Initialises reward function, R by selecting random sequences to be rewardable for discrete environments. For continuous environments, we have fixed available options for the reward function."""
Expand Down Expand Up @@ -1550,7 +1552,7 @@ def get_rews(rng, r_dict):
elif self.config["state_space_type"] == "grid":
... # ###TODO Make sequences compatible with grid

self.R = lambda s, a: self.reward_function(s, a)
# self.R = lambda s, a: self.reward_function(s, a)

def transition_function(self, state, action):
"""The transition function, P.
Expand Down Expand Up @@ -2009,7 +2011,7 @@ def step(self, action, imaginary_rollout=False):
# ### TODO Decide whether to give reward before or after transition ("after" would mean taking next state into account and seems more logical to me) - make it a dimension? - R(s) or R(s, a) or R(s, a, s')? I'd say give it after and store the old state in the augmented_state to be able to let the R have any of the above possible forms. That would also solve the problem of implicit 1-step delay with giving it before. _And_ would not give any reward for already being in a rewarding state in the 1st step but _would_ give a reward if 1 moved to a rewardable state - even if called with R(s, a) because s' is stored in the augmented_state! #####IMP

# ###TODO P uses last state while R uses augmented state; for cont. env, P does know underlying state_derivatives - we don't want this to be the case for the imaginary rollout scenario;
next_state = self.P(state, action)
next_state = self.transition_function(state, action)

# if imaginary_rollout:
# pass
Expand All @@ -2025,7 +2027,7 @@ def step(self, action, imaginary_rollout=False):

self.total_transitions_episode += 1

self.reward = self.R(self.augmented_state, action)
self.reward = self.reward_function(self.augmented_state, action)

# #irrelevant dimensions part
if self.config["state_space_type"] == "discrete":
Expand Down
4 changes: 2 additions & 2 deletions mdp_playground/spaces/image_continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def __init__(
"Image observations are " "supported only " "for 1- or 2-D feature spaces."
)

# Shape has 1 appended for Ray Rllib to be compatible IIRC
# Shape needs 3rd dimension for Ray Rllib to be compatible IIRC
super(ImageContinuous, self).__init__(
shape=(width, height, num_channels), dtype=dtype, low=0, high=255
)
Expand Down Expand Up @@ -244,7 +244,7 @@ def contains(self, x):
self.width,
self.height,
self.num_channels,
): # TODO compare each pixel for all possible images?
): # TODO compare each pixel for all possible image observations? Hard to implement.
return True

def to_jsonable(self, sample_n):
Expand Down

0 comments on commit 071dffe

Please sign in to comment.