MAJOR: Version changed to 1.0.0 with breaking API changes: Change to …

…using gymnasium instead of gym and associated changes in step() and reset() return values, etc.; upgrade numpy and random number generation; still need to update tests; and minigrid example is failing in example.py
automl · Jun 19, 2024 · 4ee76c1 · 4ee76c1
1 parent 2502532
commit 4ee76c1
Show file tree

Hide file tree

Showing 25 changed files with 340 additions and 387 deletions.
diff --git a/docs/_build/html/_modules/mdp_playground/envs/gym_env_wrapper.html b/docs/_build/html/_modules/mdp_playground/envs/gym_env_wrapper.html
@@ -602,7 +602,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
             <span class="c1"># print(&quot;Setting Mujoco self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight to&quot;, self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight, &quot;corresponding to time_unit in config.&quot;)</span>
 
 <div class="viewcode-block" id="GymEnvWrapper.step"><a class="viewcode-back" href="../../../_autosummary/mdp_playground.envs.gym_env_wrapper.GymEnvWrapper.html#mdp_playground.envs.gym_env_wrapper.GymEnvWrapper.step">[docs]</a>    <span class="k">def</span> <span class="nf">step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action</span><span class="p">):</span>
-        <span class="c1"># next_state, reward, done, info = super(GymEnvWrapper, self).step(action)</span>
+        <span class="c1"># next_state, reward, done, trunc, info = super(GymEnvWrapper, self).step(action)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">total_transitions_episode</span> <span class="o">+=</span> <span class="mi">1</span>
 
         <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">config</span><span class="p">[</span><span class="s2">&quot;state_space_type&quot;</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;discrete&quot;</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">transition_noise</span> <span class="o">&gt;</span> <span class="mf">0.0</span><span class="p">:</span>
@@ -689,7 +689,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
 <span class="sd">        int</span>
 <span class="sd">            The seed returned by Gym</span>
 <span class="sd">        &quot;&quot;&quot;</span>
-        <span class="c1"># If seed is None, you get a randomly generated seed from gym.utils...</span>
+        <span class="c1"># If seed is None, you get a randomly generated seed from gymnasium.utils...</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">seeding</span><span class="o">.</span><span class="n">np_random</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="c1">#random</span>
         <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Env SEED set to: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;. Returned seed from Gym: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">seed_</span><span class="p">))</span>
 
@@ -701,8 +701,8 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
 
 
 <span class="c1"># from mdp_playground.envs.gym_env_wrapper import get_gym_wrapper</span>
-<span class="c1"># from gym.envs.atari import AtariEnv</span>
-<span class="c1"># from gym.wrappers import AtariPreprocessing</span>
+<span class="c1"># from gymnasium.envs.atari import AtariEnv</span>
+<span class="c1"># from gymnasium.wrappers import AtariPreprocessing</span>
 <span class="c1"># AtariPreprocessing()</span>
 <span class="c1"># AtariEnvWrapper = get_gym_wrapper(AtariEnv)</span>
 <span class="c1"># from ray.tune.registry import register_env</span>
@@ -711,7 +711,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
 <span class="c1"># ob = aew.reset()</span>
 
 <span class="c1"># from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper</span>
-<span class="c1"># from gym.envs.atari import AtariEnv</span>
+<span class="c1"># from gymnasium.envs.atari import AtariEnv</span>
 <span class="c1"># ae = AtariEnv(**{&#39;game&#39;: &#39;beam_rider&#39;, &#39;obs_type&#39;: &#39;image&#39;, &#39;frameskip&#39;: 1})</span>
 <span class="c1"># aew = GymEnvWrapper(ae, **{&#39;reward_noise&#39;: lambda a: a.normal(0, 0.1), &#39;transition_noise&#39;: 0.1, &#39;delay&#39;: 1, &#39;frame_skip&#39;: 4, &quot;atari_preprocessing&quot;: True, &quot;state_space_type&quot;: &quot;discrete&quot;, &#39;seed&#39;: 0})</span>
 <span class="c1"># ob = aew.reset()</span>
@@ -720,7 +720,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
 <span class="c1"># total_reward = 0.0</span>
 <span class="c1"># for i in range(200):</span>
 <span class="c1">#     act = aew.action_space.sample()</span>
-<span class="c1">#     next_state, reward, done, info = aew.step(act)</span>
+<span class="c1">#     next_state, reward, done, trunc, info = aew.step(act)</span>
 <span class="c1">#     print(reward, done, act)</span>
 <span class="c1">#     if reward &gt; 10:</span>
 <span class="c1">#         print(&quot;reward in step:&quot;, i, reward)</span>

diff --git a/docs/_build/html/_modules/mdp_playground/envs/mujoco_env_wrapper.html b/docs/_build/html/_modules/mdp_playground/envs/mujoco_env_wrapper.html
@@ -438,7 +438,7 @@ <h1 class="site-logo" id="site-title">MDP Playground 0.0.1 documentation</h1>
               <div>
 
   <h1>Source code for mdp_playground.envs.mujoco_env_wrapper</h1><div class="highlight"><pre>
-<span></span><span class="c1"># from gym.envs.mujoco.mujoco_env import MujocoEnv</span>
+<span></span><span class="c1"># from gymnasium.envs.mujoco.mujoco_env import MujocoEnv</span>
 <span class="kn">from</span> <span class="nn">gym.envs.mujoco.half_cheetah_v3</span> <span class="kn">import</span> <span class="n">HalfCheetahEnv</span>
 <span class="kn">from</span> <span class="nn">gym.envs.mujoco.pusher</span> <span class="kn">import</span> <span class="n">PusherEnv</span>
 <span class="kn">from</span> <span class="nn">gym.envs.mujoco.reacher</span> <span class="kn">import</span> <span class="n">ReacherEnv</span>
@@ -516,7 +516,7 @@ <h1>Source code for mdp_playground.envs.mujoco_env_wrapper</h1><div class="highl
 
 <span class="c1"># from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper #hack</span>
 <span class="c1">#</span>
-<span class="c1"># from gym.envs.mujoco.reacher import ReacherEnv</span>
+<span class="c1"># from gymnasium.envs.mujoco.reacher import ReacherEnv</span>
 <span class="c1"># ReacherWrapperV2 = get_mujoco_wrapper(ReacherEnv)</span>
 <span class="c1"># config = {&quot;time_unit&quot;: 0.2}</span>
 <span class="c1"># rw2 = ReacherWrapperV2(**config)</span>

diff --git a/docs/_build/html/_modules/mdp_playground/envs/rl_toy_env.html b/docs/_build/html/_modules/mdp_playground/envs/rl_toy_env.html
@@ -1967,7 +1967,7 @@ <h1>Source code for mdp_playground.envs.rl_toy_env</h1><div class="highlight"><p
 <span class="sd">        int</span>
 <span class="sd">            The seed returned by Gym</span>
 <span class="sd">        &quot;&quot;&quot;</span>
-        <span class="c1"># If seed is None, you get a randomly generated seed from gym.utils...</span>
+        <span class="c1"># If seed is None, you get a randomly generated seed from gymnasium.utils...</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">seeding</span><span class="o">.</span><span class="n">np_random</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="c1">#random</span>
         <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Env SEED set to: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;. Returned seed from Gym: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">seed_</span><span class="p">))</span>
         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span></div></div>

diff --git a/docs/_build/html/_modules/mdp_playground/spaces/test_image_multi_discrete.html b/docs/_build/html/_modules/mdp_playground/spaces/test_image_multi_discrete.html
@@ -442,8 +442,8 @@ <h1>Source code for mdp_playground.spaces.test_image_multi_discrete</h1><div cla
 <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
 <span class="kn">from</span> <span class="nn">mdp_playground.spaces.image_multi_discrete</span> <span class="kn">import</span> <span class="n">ImageMultiDiscrete</span>
 <span class="kn">from</span> <span class="nn">gym.spaces</span> <span class="kn">import</span> <span class="n">Discrete</span><span class="p">,</span> <span class="n">MultiDiscrete</span>
-<span class="c1"># import gym</span>
-<span class="c1"># from gym.spaces import MultiDiscrete</span>
+<span class="c1"># import gymnasium as gym</span>
+<span class="c1"># from gymnasium.spaces import MultiDiscrete</span>
 <span class="c1"># # from .space import Space</span>
 <span class="c1"># import PIL.ImageDraw as ImageDraw</span>
 <span class="c1"># import PIL.Image as Image</span>

diff --git a/example.py b/example.py
@@ -59,7 +59,7 @@ def discrete_environment_example():
     config["repeats_in_sequences"] = False
 
     config["generate_random_mdp"] = True
-    env = RLToyEnv(**config)  # Calls env.reset() automatically. So, in general,
+    env = RLToyEnv(**config)  # Calls env.reset()[0] automatically. So, in general,
     # there is no need to call it after this.
 
     # The environment maintains an augmented state which contains the underlying
@@ -73,7 +73,7 @@ def discrete_environment_example():
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state, reward, done, info = env.step(action)
+    next_state, reward, done, trunc, info = env.step(action)
     print("sars', done =", state, action, reward, next_state, done)
 
     env.close()
@@ -113,7 +113,7 @@ def discrete_environment_image_representations_example():
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state_image, reward, done, info = env.step(action)
+    next_state_image, reward, done, trunc, info = env.step(action)
     augmented_state_dict = env.get_augmented_state()
     next_state = augmented_state_dict["curr_state"]  # Underlying MDP state holds
     # the current discrete state.
@@ -159,7 +159,7 @@ def discrete_environment_diameter_image_representations_example():
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state_image, reward, done, info = env.step(action)
+    next_state_image, reward, done, trunc, info = env.step(action)
     augmented_state_dict = env.get_augmented_state()
     next_state = augmented_state_dict["curr_state"]  # Underlying MDP state holds
     # the current discrete state.
@@ -192,14 +192,14 @@ def continuous_environment_example_move_to_a_point():
     config["reward_function"] = "move_to_a_point"
 
     env = RLToyEnv(**config)
-    state = env.reset().copy()
+    state = env.reset()[0].copy()
 
     print(
         "Taking a step in the environment with a random action and printing "
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state, reward, done, info = env.step(action)
+    next_state, reward, done, trunc, info = env.step(action)
     print("sars', done =", state, action, reward, next_state, done)
 
     env.close()
@@ -231,7 +231,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
     config["relevant_indices"] = [0, 1]
 
     env = RLToyEnv(**config)
-    state = env.reset()
+    state = env.reset()[0]
     augmented_state_dict = env.get_augmented_state()
     state = augmented_state_dict["curr_state"].copy()  # Underlying MDP state holds
     # the current continuous state.
@@ -241,7 +241,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state_image, reward, done, info = env.step(action)
+    next_state_image, reward, done, trunc, info = env.step(action)
     augmented_state_dict = env.get_augmented_state()
     next_state = augmented_state_dict["curr_state"].copy()  # Underlying MDP state holds
     # the current continuous state.
@@ -274,14 +274,14 @@ def continuous_environment_example_move_along_a_line():
     config["reward_function"] = "move_along_a_line"
 
     env = RLToyEnv(**config)
-    state = env.reset().copy()
+    state = env.reset()[0].copy()
 
     print(
         "Taking a step in the environment with a random action and printing "
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state, reward, done, info = env.step(action)
+    next_state, reward, done, trunc, info = env.step(action)
     print("sars', done =", state, action, reward, next_state, done)
 
     env.close()
@@ -305,12 +305,12 @@ def grid_environment_example():
 
     for i in range(len(actions)):
         action = actions[i]
-        next_obs, reward, done, info = env.step(action)
+        next_obs, reward, done, trunc, info = env.step(action)
         next_state = env.get_augmented_state()["augmented_state"][-1]
         print("sars', done =", state, action, reward, next_state, done)
         state = next_state
 
-    env.reset()
+    env.reset()[0]
     env.close()
 
 
@@ -334,12 +334,12 @@ def grid_environment_image_representations_example():
 
     for i in range(len(actions)):
         action = actions[i]
-        next_obs, reward, done, info = env.step(action)
+        next_obs, reward, done, trunc, info = env.step(action)
         next_state = env.get_augmented_state()["augmented_state"][-1]
         print("sars', done =", state, action, reward, next_state, done)
         state = next_state
 
-    env.reset()
+    env.reset()[0]
     env.close()
 
     display_image(next_obs)
@@ -356,18 +356,18 @@ def atari_wrapper_example():
     }
 
     from mdp_playground.envs import GymEnvWrapper
-    import gym
+    import gymnasium as gym
 
     ae = gym.make("QbertNoFrameskip-v4")
     env = GymEnvWrapper(ae, **config)
-    state = env.reset()
+    state = env.reset()[0]
 
     print(
         "Taking 10 steps in the environment with a random action and printing the transition:"
     )
     for i in range(10):
         action = env.action_space.sample()
-        next_state, reward, done, info = env.step(action)
+        next_state, reward, done, trunc, info = env.step(action)
         print(
             "s.shape a r s'.shape, done =",
             state.shape,
@@ -403,18 +403,18 @@ def mujoco_wrapper_example():
     # of the Mujoco base_class.
     try:
         from mdp_playground.envs import get_mujoco_wrapper
-        from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+        from gymnasium.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
 
         wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)
 
         env = wrapped_mujoco_env(**config)
-        state = env.reset()
+        state = env.reset()[0]
 
         print(
             "Taking a step in the environment with a random action and printing the transition:"
         )
         action = env.action_space.sample()
-        next_state, reward, done, info = env.step(action)
+        next_state, reward, done, trunc, info = env.step(action)
         print("sars', done =", state, action, reward, next_state, done)
 
         env.close()
@@ -440,22 +440,22 @@ def minigrid_wrapper_example():
     }
 
     from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
-    import gym
+    import gymnasium as gym
 
-    from gym_minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
+    from minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
 
     env = gym.make("MiniGrid-Empty-8x8-v0")
     env = RGBImgPartialObsWrapper(env)  # Get pixel observations
     env = ImgObsWrapper(env)  # Get rid of the 'mission' field
 
     env = GymEnvWrapper(env, **config)
-    obs = env.reset()  # This now produces an RGB tensor only
+    obs = env.reset()[0]  # This now produces an RGB tensor only
 
     print(
         "Taking a step in the environment with a random action and printing the transition:"
     )
     action = env.action_space.sample()
-    next_obs, reward, done, info = env.step(action)
+    next_obs, reward, done, trunc, info = env.step(action)
     print(
         "s.shape ar s'.shape, done =",
         obs.shape,
@@ -481,17 +481,17 @@ def procgen_wrapper_example():
     }
 
     from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
-    import gym
+    import gymnasium as gym
 
     env = gym.make("procgen:procgen-coinrun-v0")
     env = GymEnvWrapper(env, **config)
-    obs = env.reset()
+    obs = env.reset()[0]
 
     print(
         "Taking a step in the environment with a random action and printing the transition:"
     )
     action = env.action_space.sample()
-    next_obs, reward, done, info = env.step(action)
+    next_obs, reward, done, trunc, info = env.step(action)
     print(
         "s.shape ar s'.shape, done =",
         obs.shape,
@@ -577,7 +577,7 @@ def procgen_wrapper_example():
 
     # Using gym.make() example 1
     import mdp_playground
-    import gym
+    import gymnasium as gym
 
     gym.make("RLToy-v0")
 
@@ -591,6 +591,6 @@ def procgen_wrapper_example():
             "maximally_connected": True,
         }
     )
-    env.reset()
+    env.reset()[0]
     for i in range(10):
         print(env.step(env.action_space.sample()))
diff --git a/mdp_playground/__init__.py b/mdp_playground/__init__.py
@@ -1,4 +1,4 @@
-from gym.envs.registration import register
+from gymnasium.envs.registration import register
 
 register(
     id="RLToy-v0",
@@ -11,4 +11,4 @@
     max_episode_steps=100,
 )
 
-__version__ = "0.0.2"
+__version__ = "1.0.0"