Skip to content

Commit

Permalink
MAJOR: Version changed to 1.0.0 with breaking API changes: Change to …
Browse files Browse the repository at this point in the history
…using gymnasium instead of gym and associated changes in step() and reset() return values, etc.; upgrade numpy and random number generation; still need to update tests; and minigrid example is failing in example.py
  • Loading branch information
RaghuSpaceRajan committed Jun 19, 2024
1 parent 2502532 commit 4ee76c1
Show file tree
Hide file tree
Showing 25 changed files with 340 additions and 387 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
<span class="c1"># print(&quot;Setting Mujoco self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight to&quot;, self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight, &quot;corresponding to time_unit in config.&quot;)</span>

<div class="viewcode-block" id="GymEnvWrapper.step"><a class="viewcode-back" href="../../../_autosummary/mdp_playground.envs.gym_env_wrapper.GymEnvWrapper.html#mdp_playground.envs.gym_env_wrapper.GymEnvWrapper.step">[docs]</a> <span class="k">def</span> <span class="nf">step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action</span><span class="p">):</span>
<span class="c1"># next_state, reward, done, info = super(GymEnvWrapper, self).step(action)</span>
<span class="c1"># next_state, reward, done, trunc, info = super(GymEnvWrapper, self).step(action)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">total_transitions_episode</span> <span class="o">+=</span> <span class="mi">1</span>

<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">config</span><span class="p">[</span><span class="s2">&quot;state_space_type&quot;</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;discrete&quot;</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">transition_noise</span> <span class="o">&gt;</span> <span class="mf">0.0</span><span class="p">:</span>
Expand Down Expand Up @@ -689,7 +689,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
<span class="sd"> int</span>
<span class="sd"> The seed returned by Gym</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># If seed is None, you get a randomly generated seed from gym.utils...</span>
<span class="c1"># If seed is None, you get a randomly generated seed from gymnasium.utils...</span>
<span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">seeding</span><span class="o">.</span><span class="n">np_random</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="c1">#random</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Env SEED set to: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;. Returned seed from Gym: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">seed_</span><span class="p">))</span>

Expand All @@ -701,8 +701,8 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh


<span class="c1"># from mdp_playground.envs.gym_env_wrapper import get_gym_wrapper</span>
<span class="c1"># from gym.envs.atari import AtariEnv</span>
<span class="c1"># from gym.wrappers import AtariPreprocessing</span>
<span class="c1"># from gymnasium.envs.atari import AtariEnv</span>
<span class="c1"># from gymnasium.wrappers import AtariPreprocessing</span>
<span class="c1"># AtariPreprocessing()</span>
<span class="c1"># AtariEnvWrapper = get_gym_wrapper(AtariEnv)</span>
<span class="c1"># from ray.tune.registry import register_env</span>
Expand All @@ -711,7 +711,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
<span class="c1"># ob = aew.reset()</span>

<span class="c1"># from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper</span>
<span class="c1"># from gym.envs.atari import AtariEnv</span>
<span class="c1"># from gymnasium.envs.atari import AtariEnv</span>
<span class="c1"># ae = AtariEnv(**{&#39;game&#39;: &#39;beam_rider&#39;, &#39;obs_type&#39;: &#39;image&#39;, &#39;frameskip&#39;: 1})</span>
<span class="c1"># aew = GymEnvWrapper(ae, **{&#39;reward_noise&#39;: lambda a: a.normal(0, 0.1), &#39;transition_noise&#39;: 0.1, &#39;delay&#39;: 1, &#39;frame_skip&#39;: 4, &quot;atari_preprocessing&quot;: True, &quot;state_space_type&quot;: &quot;discrete&quot;, &#39;seed&#39;: 0})</span>
<span class="c1"># ob = aew.reset()</span>
Expand All @@ -720,7 +720,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
<span class="c1"># total_reward = 0.0</span>
<span class="c1"># for i in range(200):</span>
<span class="c1"># act = aew.action_space.sample()</span>
<span class="c1"># next_state, reward, done, info = aew.step(act)</span>
<span class="c1"># next_state, reward, done, trunc, info = aew.step(act)</span>
<span class="c1"># print(reward, done, act)</span>
<span class="c1"># if reward &gt; 10:</span>
<span class="c1"># print(&quot;reward in step:&quot;, i, reward)</span>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ <h1 class="site-logo" id="site-title">MDP Playground 0.0.1 documentation</h1>
<div>

<h1>Source code for mdp_playground.envs.mujoco_env_wrapper</h1><div class="highlight"><pre>
<span></span><span class="c1"># from gym.envs.mujoco.mujoco_env import MujocoEnv</span>
<span></span><span class="c1"># from gymnasium.envs.mujoco.mujoco_env import MujocoEnv</span>
<span class="kn">from</span> <span class="nn">gym.envs.mujoco.half_cheetah_v3</span> <span class="kn">import</span> <span class="n">HalfCheetahEnv</span>
<span class="kn">from</span> <span class="nn">gym.envs.mujoco.pusher</span> <span class="kn">import</span> <span class="n">PusherEnv</span>
<span class="kn">from</span> <span class="nn">gym.envs.mujoco.reacher</span> <span class="kn">import</span> <span class="n">ReacherEnv</span>
Expand Down Expand Up @@ -516,7 +516,7 @@ <h1>Source code for mdp_playground.envs.mujoco_env_wrapper</h1><div class="highl

<span class="c1"># from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper #hack</span>
<span class="c1">#</span>
<span class="c1"># from gym.envs.mujoco.reacher import ReacherEnv</span>
<span class="c1"># from gymnasium.envs.mujoco.reacher import ReacherEnv</span>
<span class="c1"># ReacherWrapperV2 = get_mujoco_wrapper(ReacherEnv)</span>
<span class="c1"># config = {&quot;time_unit&quot;: 0.2}</span>
<span class="c1"># rw2 = ReacherWrapperV2(**config)</span>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1967,7 +1967,7 @@ <h1>Source code for mdp_playground.envs.rl_toy_env</h1><div class="highlight"><p
<span class="sd"> int</span>
<span class="sd"> The seed returned by Gym</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># If seed is None, you get a randomly generated seed from gym.utils...</span>
<span class="c1"># If seed is None, you get a randomly generated seed from gymnasium.utils...</span>
<span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">seeding</span><span class="o">.</span><span class="n">np_random</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="c1">#random</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Env SEED set to: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;. Returned seed from Gym: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">seed_</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span></div></div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -442,8 +442,8 @@ <h1>Source code for mdp_playground.spaces.test_image_multi_discrete</h1><div cla
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">mdp_playground.spaces.image_multi_discrete</span> <span class="kn">import</span> <span class="n">ImageMultiDiscrete</span>
<span class="kn">from</span> <span class="nn">gym.spaces</span> <span class="kn">import</span> <span class="n">Discrete</span><span class="p">,</span> <span class="n">MultiDiscrete</span>
<span class="c1"># import gym</span>
<span class="c1"># from gym.spaces import MultiDiscrete</span>
<span class="c1"># import gymnasium as gym</span>
<span class="c1"># from gymnasium.spaces import MultiDiscrete</span>
<span class="c1"># # from .space import Space</span>
<span class="c1"># import PIL.ImageDraw as ImageDraw</span>
<span class="c1"># import PIL.Image as Image</span>
Expand Down
58 changes: 29 additions & 29 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def discrete_environment_example():
config["repeats_in_sequences"] = False

config["generate_random_mdp"] = True
env = RLToyEnv(**config) # Calls env.reset() automatically. So, in general,
env = RLToyEnv(**config) # Calls env.reset()[0] automatically. So, in general,
# there is no need to call it after this.

# The environment maintains an augmented state which contains the underlying
Expand All @@ -73,7 +73,7 @@ def discrete_environment_example():
"the transition:"
)
action = env.action_space.sample()
next_state, reward, done, info = env.step(action)
next_state, reward, done, trunc, info = env.step(action)
print("sars', done =", state, action, reward, next_state, done)

env.close()
Expand Down Expand Up @@ -113,7 +113,7 @@ def discrete_environment_image_representations_example():
"the transition:"
)
action = env.action_space.sample()
next_state_image, reward, done, info = env.step(action)
next_state_image, reward, done, trunc, info = env.step(action)
augmented_state_dict = env.get_augmented_state()
next_state = augmented_state_dict["curr_state"] # Underlying MDP state holds
# the current discrete state.
Expand Down Expand Up @@ -159,7 +159,7 @@ def discrete_environment_diameter_image_representations_example():
"the transition:"
)
action = env.action_space.sample()
next_state_image, reward, done, info = env.step(action)
next_state_image, reward, done, trunc, info = env.step(action)
augmented_state_dict = env.get_augmented_state()
next_state = augmented_state_dict["curr_state"] # Underlying MDP state holds
# the current discrete state.
Expand Down Expand Up @@ -192,14 +192,14 @@ def continuous_environment_example_move_to_a_point():
config["reward_function"] = "move_to_a_point"

env = RLToyEnv(**config)
state = env.reset().copy()
state = env.reset()[0].copy()

print(
"Taking a step in the environment with a random action and printing "
"the transition:"
)
action = env.action_space.sample()
next_state, reward, done, info = env.step(action)
next_state, reward, done, trunc, info = env.step(action)
print("sars', done =", state, action, reward, next_state, done)

env.close()
Expand Down Expand Up @@ -231,7 +231,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
config["relevant_indices"] = [0, 1]

env = RLToyEnv(**config)
state = env.reset()
state = env.reset()[0]
augmented_state_dict = env.get_augmented_state()
state = augmented_state_dict["curr_state"].copy() # Underlying MDP state holds
# the current continuous state.
Expand All @@ -241,7 +241,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
"the transition:"
)
action = env.action_space.sample()
next_state_image, reward, done, info = env.step(action)
next_state_image, reward, done, trunc, info = env.step(action)
augmented_state_dict = env.get_augmented_state()
next_state = augmented_state_dict["curr_state"].copy() # Underlying MDP state holds
# the current continuous state.
Expand Down Expand Up @@ -274,14 +274,14 @@ def continuous_environment_example_move_along_a_line():
config["reward_function"] = "move_along_a_line"

env = RLToyEnv(**config)
state = env.reset().copy()
state = env.reset()[0].copy()

print(
"Taking a step in the environment with a random action and printing "
"the transition:"
)
action = env.action_space.sample()
next_state, reward, done, info = env.step(action)
next_state, reward, done, trunc, info = env.step(action)
print("sars', done =", state, action, reward, next_state, done)

env.close()
Expand All @@ -305,12 +305,12 @@ def grid_environment_example():

for i in range(len(actions)):
action = actions[i]
next_obs, reward, done, info = env.step(action)
next_obs, reward, done, trunc, info = env.step(action)
next_state = env.get_augmented_state()["augmented_state"][-1]
print("sars', done =", state, action, reward, next_state, done)
state = next_state

env.reset()
env.reset()[0]
env.close()


Expand All @@ -334,12 +334,12 @@ def grid_environment_image_representations_example():

for i in range(len(actions)):
action = actions[i]
next_obs, reward, done, info = env.step(action)
next_obs, reward, done, trunc, info = env.step(action)
next_state = env.get_augmented_state()["augmented_state"][-1]
print("sars', done =", state, action, reward, next_state, done)
state = next_state

env.reset()
env.reset()[0]
env.close()

display_image(next_obs)
Expand All @@ -356,18 +356,18 @@ def atari_wrapper_example():
}

from mdp_playground.envs import GymEnvWrapper
import gym
import gymnasium as gym

ae = gym.make("QbertNoFrameskip-v4")
env = GymEnvWrapper(ae, **config)
state = env.reset()
state = env.reset()[0]

print(
"Taking 10 steps in the environment with a random action and printing the transition:"
)
for i in range(10):
action = env.action_space.sample()
next_state, reward, done, info = env.step(action)
next_state, reward, done, trunc, info = env.step(action)
print(
"s.shape a r s'.shape, done =",
state.shape,
Expand Down Expand Up @@ -403,18 +403,18 @@ def mujoco_wrapper_example():
# of the Mujoco base_class.
try:
from mdp_playground.envs import get_mujoco_wrapper
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
from gymnasium.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv

wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)

env = wrapped_mujoco_env(**config)
state = env.reset()
state = env.reset()[0]

print(
"Taking a step in the environment with a random action and printing the transition:"
)
action = env.action_space.sample()
next_state, reward, done, info = env.step(action)
next_state, reward, done, trunc, info = env.step(action)
print("sars', done =", state, action, reward, next_state, done)

env.close()
Expand All @@ -440,22 +440,22 @@ def minigrid_wrapper_example():
}

from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
import gym
import gymnasium as gym

from gym_minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
from minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper

env = gym.make("MiniGrid-Empty-8x8-v0")
env = RGBImgPartialObsWrapper(env) # Get pixel observations
env = ImgObsWrapper(env) # Get rid of the 'mission' field

env = GymEnvWrapper(env, **config)
obs = env.reset() # This now produces an RGB tensor only
obs = env.reset()[0] # This now produces an RGB tensor only

print(
"Taking a step in the environment with a random action and printing the transition:"
)
action = env.action_space.sample()
next_obs, reward, done, info = env.step(action)
next_obs, reward, done, trunc, info = env.step(action)
print(
"s.shape ar s'.shape, done =",
obs.shape,
Expand All @@ -481,17 +481,17 @@ def procgen_wrapper_example():
}

from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
import gym
import gymnasium as gym

env = gym.make("procgen:procgen-coinrun-v0")
env = GymEnvWrapper(env, **config)
obs = env.reset()
obs = env.reset()[0]

print(
"Taking a step in the environment with a random action and printing the transition:"
)
action = env.action_space.sample()
next_obs, reward, done, info = env.step(action)
next_obs, reward, done, trunc, info = env.step(action)
print(
"s.shape ar s'.shape, done =",
obs.shape,
Expand Down Expand Up @@ -577,7 +577,7 @@ def procgen_wrapper_example():

# Using gym.make() example 1
import mdp_playground
import gym
import gymnasium as gym

gym.make("RLToy-v0")

Expand All @@ -591,6 +591,6 @@ def procgen_wrapper_example():
"maximally_connected": True,
}
)
env.reset()
env.reset()[0]
for i in range(10):
print(env.step(env.action_space.sample()))
4 changes: 2 additions & 2 deletions mdp_playground/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from gym.envs.registration import register
from gymnasium.envs.registration import register

register(
id="RLToy-v0",
Expand All @@ -11,4 +11,4 @@
max_episode_steps=100,
)

__version__ = "0.0.2"
__version__ = "1.0.0"
Loading

0 comments on commit 4ee76c1

Please sign in to comment.