From 4ee76c177b70c823fd7f79537bfa799edc73e338 Mon Sep 17 00:00:00 2001
From: Raghu Rajan <raghuspacerajan@gmail.com>
Date: Wed, 19 Jun 2024 12:43:05 +0200
Subject: [PATCH] MAJOR: Version changed to 1.0.0 with breaking API changes:
 Change to using gymnasium instead of gym and associated changes in step() and
 reset() return values, etc.; upgrade numpy and random number generation;
 still need to update tests; and minigrid example is failing in example.py

---
 .../mdp_playground/envs/gym_env_wrapper.html  |  12 +-
 .../envs/mujoco_env_wrapper.html              |   4 +-
 .../mdp_playground/envs/rl_toy_env.html       |   2 +-
 .../spaces/test_image_multi_discrete.html     |   4 +-
 example.py                                    |  58 ++--
 mdp_playground/__init__.py                    |   4 +-
 .../config_processor/config_processor.py      |  14 +-
 mdp_playground/envs/__init__.py               |   2 +-
 mdp_playground/envs/gym_env_wrapper.py        |  66 +++--
 mdp_playground/envs/mujoco_env_wrapper.py     |  14 +-
 mdp_playground/envs/rl_toy_env.py             |  46 +--
 mdp_playground/spaces/box_extended.py         |   4 +-
 mdp_playground/spaces/discrete_extended.py    |   4 +-
 mdp_playground/spaces/grid_action_space.py    |   6 +-
 mdp_playground/spaces/image_continuous.py     |   4 +-
 mdp_playground/spaces/image_multi_discrete.py |  14 +-
 .../spaces/multi_discrete_extended.py         |   4 +-
 .../spaces/test_image_continuous.py           |   2 +-
 .../spaces/test_image_multi_discrete.py       |   6 +-
 mdp_playground/spaces/tuple_extended.py       |   4 +-
 setup.py                                      |   4 +-
 tests/test_gym_env_wrapper.py                 | 177 ++++--------
 tests/test_mdp_playground.py                  | 268 +++++++++---------
 tests/test_run_experiments.py                 |   2 +-
 tests/test_version.py                         |   2 +-
 25 files changed, 340 insertions(+), 387 deletions(-)
diff --git a/docs/_build/html/_modules/mdp_playground/envs/gym_env_wrapper.html b/docs/_build/html/_modules/mdp_playground/envs/gym_env_wrapper.html
index a11160e..916f361 100644
--- a/docs/_build/html/_modules/mdp_playground/envs/gym_env_wrapper.html
+++ b/docs/_build/html/_modules/mdp_playground/envs/gym_env_wrapper.html
@@ -602,7 +602,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
             <span class="c1"># print(&quot;Setting Mujoco self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight to&quot;, self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight, &quot;corresponding to time_unit in config.&quot;)</span>
 
 <div class="viewcode-block" id="GymEnvWrapper.step"><a class="viewcode-back" href="../../../_autosummary/mdp_playground.envs.gym_env_wrapper.GymEnvWrapper.html#mdp_playground.envs.gym_env_wrapper.GymEnvWrapper.step">[docs]</a>    <span class="k">def</span> <span class="nf">step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action</span><span class="p">):</span>
-        <span class="c1"># next_state, reward, done, info = super(GymEnvWrapper, self).step(action)</span>
+        <span class="c1"># next_state, reward, done, trunc, info = super(GymEnvWrapper, self).step(action)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">total_transitions_episode</span> <span class="o">+=</span> <span class="mi">1</span>
 
         <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">config</span><span class="p">[</span><span class="s2">&quot;state_space_type&quot;</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;discrete&quot;</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">transition_noise</span> <span class="o">&gt;</span> <span class="mf">0.0</span><span class="p">:</span>
@@ -689,7 +689,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
 <span class="sd">        int</span>
 <span class="sd">            The seed returned by Gym</span>
 <span class="sd">        &quot;&quot;&quot;</span>
-        <span class="c1"># If seed is None, you get a randomly generated seed from gym.utils...</span>
+        <span class="c1"># If seed is None, you get a randomly generated seed from gymnasium.utils...</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">seeding</span><span class="o">.</span><span class="n">np_random</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="c1">#random</span>
         <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Env SEED set to: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;. Returned seed from Gym: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">seed_</span><span class="p">))</span>
 
@@ -701,8 +701,8 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
 
 
 <span class="c1"># from mdp_playground.envs.gym_env_wrapper import get_gym_wrapper</span>
-<span class="c1"># from gym.envs.atari import AtariEnv</span>
-<span class="c1"># from gym.wrappers import AtariPreprocessing</span>
+<span class="c1"># from gymnasium.envs.atari import AtariEnv</span>
+<span class="c1"># from gymnasium.wrappers import AtariPreprocessing</span>
 <span class="c1"># AtariPreprocessing()</span>
 <span class="c1"># AtariEnvWrapper = get_gym_wrapper(AtariEnv)</span>
 <span class="c1"># from ray.tune.registry import register_env</span>
@@ -711,7 +711,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
 <span class="c1"># ob = aew.reset()</span>
 
 <span class="c1"># from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper</span>
-<span class="c1"># from gym.envs.atari import AtariEnv</span>
+<span class="c1"># from gymnasium.envs.atari import AtariEnv</span>
 <span class="c1"># ae = AtariEnv(**{&#39;game&#39;: &#39;beam_rider&#39;, &#39;obs_type&#39;: &#39;image&#39;, &#39;frameskip&#39;: 1})</span>
 <span class="c1"># aew = GymEnvWrapper(ae, **{&#39;reward_noise&#39;: lambda a: a.normal(0, 0.1), &#39;transition_noise&#39;: 0.1, &#39;delay&#39;: 1, &#39;frame_skip&#39;: 4, &quot;atari_preprocessing&quot;: True, &quot;state_space_type&quot;: &quot;discrete&quot;, &#39;seed&#39;: 0})</span>
 <span class="c1"># ob = aew.reset()</span>
@@ -720,7 +720,7 @@ <h1>Source code for mdp_playground.envs.gym_env_wrapper</h1><div class="highligh
 <span class="c1"># total_reward = 0.0</span>
 <span class="c1"># for i in range(200):</span>
 <span class="c1">#     act = aew.action_space.sample()</span>
-<span class="c1">#     next_state, reward, done, info = aew.step(act)</span>
+<span class="c1">#     next_state, reward, done, trunc, info = aew.step(act)</span>
 <span class="c1">#     print(reward, done, act)</span>
 <span class="c1">#     if reward &gt; 10:</span>
 <span class="c1">#         print(&quot;reward in step:&quot;, i, reward)</span>
diff --git a/docs/_build/html/_modules/mdp_playground/envs/mujoco_env_wrapper.html b/docs/_build/html/_modules/mdp_playground/envs/mujoco_env_wrapper.html
index dd9a434..9d5fb80 100644
--- a/docs/_build/html/_modules/mdp_playground/envs/mujoco_env_wrapper.html
+++ b/docs/_build/html/_modules/mdp_playground/envs/mujoco_env_wrapper.html
@@ -438,7 +438,7 @@ <h1 class="site-logo" id="site-title">MDP Playground 0.0.1 documentation</h1>
               <div>
                 
   <h1>Source code for mdp_playground.envs.mujoco_env_wrapper</h1><div class="highlight"><pre>
-<span></span><span class="c1"># from gym.envs.mujoco.mujoco_env import MujocoEnv</span>
+<span></span><span class="c1"># from gymnasium.envs.mujoco.mujoco_env import MujocoEnv</span>
 <span class="kn">from</span> <span class="nn">gym.envs.mujoco.half_cheetah_v3</span> <span class="kn">import</span> <span class="n">HalfCheetahEnv</span>
 <span class="kn">from</span> <span class="nn">gym.envs.mujoco.pusher</span> <span class="kn">import</span> <span class="n">PusherEnv</span>
 <span class="kn">from</span> <span class="nn">gym.envs.mujoco.reacher</span> <span class="kn">import</span> <span class="n">ReacherEnv</span>
@@ -516,7 +516,7 @@ <h1>Source code for mdp_playground.envs.mujoco_env_wrapper</h1><div class="highl
 
 <span class="c1"># from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper #hack</span>
 <span class="c1">#</span>
-<span class="c1"># from gym.envs.mujoco.reacher import ReacherEnv</span>
+<span class="c1"># from gymnasium.envs.mujoco.reacher import ReacherEnv</span>
 <span class="c1"># ReacherWrapperV2 = get_mujoco_wrapper(ReacherEnv)</span>
 <span class="c1"># config = {&quot;time_unit&quot;: 0.2}</span>
 <span class="c1"># rw2 = ReacherWrapperV2(**config)</span>
diff --git a/docs/_build/html/_modules/mdp_playground/envs/rl_toy_env.html b/docs/_build/html/_modules/mdp_playground/envs/rl_toy_env.html
index 98d7c9f..5aa8b15 100644
--- a/docs/_build/html/_modules/mdp_playground/envs/rl_toy_env.html
+++ b/docs/_build/html/_modules/mdp_playground/envs/rl_toy_env.html
@@ -1967,7 +1967,7 @@ <h1>Source code for mdp_playground.envs.rl_toy_env</h1><div class="highlight"><p
 <span class="sd">        int</span>
 <span class="sd">            The seed returned by Gym</span>
 <span class="sd">        &quot;&quot;&quot;</span>
-        <span class="c1"># If seed is None, you get a randomly generated seed from gym.utils...</span>
+        <span class="c1"># If seed is None, you get a randomly generated seed from gymnasium.utils...</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">seeding</span><span class="o">.</span><span class="n">np_random</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="c1">#random</span>
         <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Env SEED set to: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;. Returned seed from Gym: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">seed_</span><span class="p">))</span>
         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">seed_</span></div></div>
diff --git a/docs/_build/html/_modules/mdp_playground/spaces/test_image_multi_discrete.html b/docs/_build/html/_modules/mdp_playground/spaces/test_image_multi_discrete.html
index e19c1eb..07851ce 100644
--- a/docs/_build/html/_modules/mdp_playground/spaces/test_image_multi_discrete.html
+++ b/docs/_build/html/_modules/mdp_playground/spaces/test_image_multi_discrete.html
@@ -442,8 +442,8 @@ <h1>Source code for mdp_playground.spaces.test_image_multi_discrete</h1><div cla
 <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
 <span class="kn">from</span> <span class="nn">mdp_playground.spaces.image_multi_discrete</span> <span class="kn">import</span> <span class="n">ImageMultiDiscrete</span>
 <span class="kn">from</span> <span class="nn">gym.spaces</span> <span class="kn">import</span> <span class="n">Discrete</span><span class="p">,</span> <span class="n">MultiDiscrete</span>
-<span class="c1"># import gym</span>
-<span class="c1"># from gym.spaces import MultiDiscrete</span>
+<span class="c1"># import gymnasium as gym</span>
+<span class="c1"># from gymnasium.spaces import MultiDiscrete</span>
 <span class="c1"># # from .space import Space</span>
 <span class="c1"># import PIL.ImageDraw as ImageDraw</span>
 <span class="c1"># import PIL.Image as Image</span>
diff --git a/example.py b/example.py
index 0c140d4..dc3bb01 100644
--- a/example.py
+++ b/example.py
@@ -59,7 +59,7 @@ def discrete_environment_example():
     config["repeats_in_sequences"] = False
 
     config["generate_random_mdp"] = True
-    env = RLToyEnv(**config)  # Calls env.reset() automatically. So, in general,
+    env = RLToyEnv(**config)  # Calls env.reset()[0] automatically. So, in general,
     # there is no need to call it after this.
 
     # The environment maintains an augmented state which contains the underlying
@@ -73,7 +73,7 @@ def discrete_environment_example():
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state, reward, done, info = env.step(action)
+    next_state, reward, done, trunc, info = env.step(action)
     print("sars', done =", state, action, reward, next_state, done)
 
     env.close()
@@ -113,7 +113,7 @@ def discrete_environment_image_representations_example():
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state_image, reward, done, info = env.step(action)
+    next_state_image, reward, done, trunc, info = env.step(action)
     augmented_state_dict = env.get_augmented_state()
     next_state = augmented_state_dict["curr_state"]  # Underlying MDP state holds
     # the current discrete state.
@@ -159,7 +159,7 @@ def discrete_environment_diameter_image_representations_example():
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state_image, reward, done, info = env.step(action)
+    next_state_image, reward, done, trunc, info = env.step(action)
     augmented_state_dict = env.get_augmented_state()
     next_state = augmented_state_dict["curr_state"]  # Underlying MDP state holds
     # the current discrete state.
@@ -192,14 +192,14 @@ def continuous_environment_example_move_to_a_point():
     config["reward_function"] = "move_to_a_point"
 
     env = RLToyEnv(**config)
-    state = env.reset().copy()
+    state = env.reset()[0].copy()
 
     print(
         "Taking a step in the environment with a random action and printing "
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state, reward, done, info = env.step(action)
+    next_state, reward, done, trunc, info = env.step(action)
     print("sars', done =", state, action, reward, next_state, done)
 
     env.close()
@@ -231,7 +231,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
     config["relevant_indices"] = [0, 1]
 
     env = RLToyEnv(**config)
-    state = env.reset()
+    state = env.reset()[0]
     augmented_state_dict = env.get_augmented_state()
     state = augmented_state_dict["curr_state"].copy()  # Underlying MDP state holds
     # the current continuous state.
@@ -241,7 +241,7 @@ def continuous_environment_example_move_to_a_point_irrelevant_image():
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state_image, reward, done, info = env.step(action)
+    next_state_image, reward, done, trunc, info = env.step(action)
     augmented_state_dict = env.get_augmented_state()
     next_state = augmented_state_dict["curr_state"].copy()  # Underlying MDP state holds
     # the current continuous state.
@@ -274,14 +274,14 @@ def continuous_environment_example_move_along_a_line():
     config["reward_function"] = "move_along_a_line"
 
     env = RLToyEnv(**config)
-    state = env.reset().copy()
+    state = env.reset()[0].copy()
 
     print(
         "Taking a step in the environment with a random action and printing "
         "the transition:"
     )
     action = env.action_space.sample()
-    next_state, reward, done, info = env.step(action)
+    next_state, reward, done, trunc, info = env.step(action)
     print("sars', done =", state, action, reward, next_state, done)
 
     env.close()
@@ -305,12 +305,12 @@ def grid_environment_example():
 
     for i in range(len(actions)):
         action = actions[i]
-        next_obs, reward, done, info = env.step(action)
+        next_obs, reward, done, trunc, info = env.step(action)
         next_state = env.get_augmented_state()["augmented_state"][-1]
         print("sars', done =", state, action, reward, next_state, done)
         state = next_state
 
-    env.reset()
+    env.reset()[0]
     env.close()
 
 
@@ -334,12 +334,12 @@ def grid_environment_image_representations_example():
 
     for i in range(len(actions)):
         action = actions[i]
-        next_obs, reward, done, info = env.step(action)
+        next_obs, reward, done, trunc, info = env.step(action)
         next_state = env.get_augmented_state()["augmented_state"][-1]
         print("sars', done =", state, action, reward, next_state, done)
         state = next_state
 
-    env.reset()
+    env.reset()[0]
     env.close()
 
     display_image(next_obs)
@@ -356,18 +356,18 @@ def atari_wrapper_example():
     }
 
     from mdp_playground.envs import GymEnvWrapper
-    import gym
+    import gymnasium as gym
 
     ae = gym.make("QbertNoFrameskip-v4")
     env = GymEnvWrapper(ae, **config)
-    state = env.reset()
+    state = env.reset()[0]
 
     print(
         "Taking 10 steps in the environment with a random action and printing the transition:"
     )
     for i in range(10):
         action = env.action_space.sample()
-        next_state, reward, done, info = env.step(action)
+        next_state, reward, done, trunc, info = env.step(action)
         print(
             "s.shape a r s'.shape, done =",
             state.shape,
@@ -403,18 +403,18 @@ def mujoco_wrapper_example():
     # of the Mujoco base_class.
     try:
         from mdp_playground.envs import get_mujoco_wrapper
-        from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+        from gymnasium.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
 
         wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)
 
         env = wrapped_mujoco_env(**config)
-        state = env.reset()
+        state = env.reset()[0]
 
         print(
             "Taking a step in the environment with a random action and printing the transition:"
         )
         action = env.action_space.sample()
-        next_state, reward, done, info = env.step(action)
+        next_state, reward, done, trunc, info = env.step(action)
         print("sars', done =", state, action, reward, next_state, done)
 
         env.close()
@@ -440,22 +440,22 @@ def minigrid_wrapper_example():
     }
 
     from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
-    import gym
+    import gymnasium as gym
 
-    from gym_minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
+    from minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper
 
     env = gym.make("MiniGrid-Empty-8x8-v0")
     env = RGBImgPartialObsWrapper(env)  # Get pixel observations
     env = ImgObsWrapper(env)  # Get rid of the 'mission' field
 
     env = GymEnvWrapper(env, **config)
-    obs = env.reset()  # This now produces an RGB tensor only
+    obs = env.reset()[0]  # This now produces an RGB tensor only
 
     print(
         "Taking a step in the environment with a random action and printing the transition:"
     )
     action = env.action_space.sample()
-    next_obs, reward, done, info = env.step(action)
+    next_obs, reward, done, trunc, info = env.step(action)
     print(
         "s.shape ar s'.shape, done =",
         obs.shape,
@@ -481,17 +481,17 @@ def procgen_wrapper_example():
     }
 
     from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
-    import gym
+    import gymnasium as gym
 
     env = gym.make("procgen:procgen-coinrun-v0")
     env = GymEnvWrapper(env, **config)
-    obs = env.reset()
+    obs = env.reset()[0]
 
     print(
         "Taking a step in the environment with a random action and printing the transition:"
     )
     action = env.action_space.sample()
-    next_obs, reward, done, info = env.step(action)
+    next_obs, reward, done, trunc, info = env.step(action)
     print(
         "s.shape ar s'.shape, done =",
         obs.shape,
@@ -577,7 +577,7 @@ def procgen_wrapper_example():
 
     # Using gym.make() example 1
     import mdp_playground
-    import gym
+    import gymnasium as gym
 
     gym.make("RLToy-v0")
 
@@ -591,6 +591,6 @@ def procgen_wrapper_example():
             "maximally_connected": True,
         }
     )
-    env.reset()
+    env.reset()[0]
     for i in range(10):
         print(env.step(env.action_space.sample()))
diff --git a/mdp_playground/__init__.py b/mdp_playground/__init__.py
index 76d4bbe..63e6414 100644
--- a/mdp_playground/__init__.py
+++ b/mdp_playground/__init__.py
@@ -1,4 +1,4 @@
-from gym.envs.registration import register
+from gymnasium.envs.registration import register
 
 register(
     id="RLToy-v0",
@@ -11,4 +11,4 @@
     max_episode_steps=100,
 )
 
-__version__ = "0.0.2"
+__version__ = "1.0.0"
diff --git a/mdp_playground/config_processor/config_processor.py b/mdp_playground/config_processor/config_processor.py
index 376f1e5..ea10434 100644
--- a/mdp_playground/config_processor/config_processor.py
+++ b/mdp_playground/config_processor/config_processor.py
@@ -786,7 +786,7 @@ def combined_processing(*static_configs, varying_configs, framework="ray", algor
             from mdp_playground.envs.mujoco_env_wrapper import (
                 get_mujoco_wrapper,
             )  # hack
-            from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+            from gymnasium.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
 
             wrapped_mujoco_env = get_mujoco_wrapper(HalfCheetahEnv)
             register_env(
@@ -802,7 +802,7 @@ def combined_processing(*static_configs, varying_configs, framework="ray", algor
             from mdp_playground.envs.mujoco_env_wrapper import (
                 get_mujoco_wrapper,
             )  # hack
-            from gym.envs.mujoco.hopper_v3 import HopperEnv
+            from gymnasium.envs.mujoco.hopper_v3 import HopperEnv
 
             wrapped_mujoco_env = get_mujoco_wrapper(HopperEnv)
             register_env(
@@ -818,7 +818,7 @@ def combined_processing(*static_configs, varying_configs, framework="ray", algor
             from mdp_playground.envs.mujoco_env_wrapper import (
                 get_mujoco_wrapper,
             )  # hack
-            from gym.envs.mujoco.pusher import PusherEnv
+            from gymnasium.envs.mujoco.pusher import PusherEnv
 
             wrapped_mujoco_env = get_mujoco_wrapper(PusherEnv)
             register_env(
@@ -834,7 +834,7 @@ def combined_processing(*static_configs, varying_configs, framework="ray", algor
             from mdp_playground.envs.mujoco_env_wrapper import (
                 get_mujoco_wrapper,
             )  # hack
-            from gym.envs.mujoco.reacher import ReacherEnv
+            from gymnasium.envs.mujoco.reacher import ReacherEnv
 
             wrapped_mujoco_env = get_mujoco_wrapper(ReacherEnv)
             register_env(
@@ -993,7 +993,7 @@ def post_processing(framework="ray"):
 
 
 def create_gym_env_wrapper_atari(config):
-    from gym.envs.atari import AtariEnv
+    from gymnasium.envs.atari import AtariEnv
     from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
 
     ae = AtariEnv(**config["AtariEnv"])
@@ -1008,9 +1008,9 @@ def create_gym_env_wrapper_frame_stack_atari(config):  # hack ###TODO remove?
     """When using frameStack GymEnvWrapper should wrap AtariEnv using wrap_deepmind_ray and therefore this function sets "wrap_deepmind_ray": True and 'frame_skip': 1 inside config so as to keep config same as for create_gym_env_wrapper_atari above and reduce manual errors when switching between the 2."""
     config["wrap_deepmind_ray"] = True  # hack
     config["frame_skip"] = 1  # hack
-    from gym.envs.atari import AtariEnv
+    from gymnasium.envs.atari import AtariEnv
     from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
-    import gym
+    import gymnasium as gym
 
     game = config["AtariEnv"]["game"]
     game = "".join([g.capitalize() for g in game.split("_")])
diff --git a/mdp_playground/envs/__init__.py b/mdp_playground/envs/__init__.py
index 42ec031..b393fad 100644
--- a/mdp_playground/envs/__init__.py
+++ b/mdp_playground/envs/__init__.py
@@ -1,5 +1,5 @@
 from mdp_playground.envs.rl_toy_env import RLToyEnv
-from gym import error
+from gymnasium import error
 
 try:
     from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
diff --git a/mdp_playground/envs/gym_env_wrapper.py b/mdp_playground/envs/gym_env_wrapper.py
index 4f8e521..885e19b 100644
--- a/mdp_playground/envs/gym_env_wrapper.py
+++ b/mdp_playground/envs/gym_env_wrapper.py
@@ -1,9 +1,9 @@
-import gym
+import gymnasium as gym
 import copy
 import numpy as np
 import sys
-from gym.spaces import Box, Tuple
-from gym.wrappers import AtariPreprocessing
+from gymnasium.spaces import Box, Tuple
+from gymnasium.wrappers import AtariPreprocessing
 from mdp_playground.envs.rl_toy_env import RLToyEnv
 import warnings
 import PIL.ImageDraw as ImageDraw
@@ -31,8 +31,12 @@ class GymEnvWrapper(gym.Env):
 
     """
 
-    # Should not be a gym.Wrapper because 1) gym.Wrapper has member variables observation_space and action_space while here with irrelevant_features we would have multiple observation_spaces and this could cause conflict with code that assumes any subclass of gym.Wrapper should have these member variables.
-    # However, it _should_ be at least a gym.Env
+    # Should not be a gym.Wrapper because 1) gym.Wrapper has member variables 
+    # observation_space and action_space while here with irrelevant_features 
+    # we would have multiple observation_spaces and this could cause conflict 
+    # with code that assumes any subclass of gym.Wrapper should have these member
+    # variables. However, it _should_ be at least a gym.Env.
+    # Following comment based on the old get_gym_wrapper(base_class) code:
     # Does it need to be a subclass of base_class because some external code
     # may check if it's an AtariEnv, for instance, and do further stuff based
     # on that?
@@ -46,16 +50,16 @@ def __init__(self, env, **config):
         if "seed" in config:
             seed_int = config["seed"]
 
-        self.seed(seed_int)  # seed
+        self.seed(seed_int)  # #seed
         # IMP Move below code from here to seed()? Because if seed is called
         # during the run of an env, the expectation is that all obs., act. space,
-        # etc. seeds are set? Only Atari in Gym seems to do something similar, the
-        # others I saw there don't seem to set seed for obs., act. spaces.
+        # etc. seeds are set during that call? Only Atari in Gym seems to do something 
+        # similar, the others I saw there don't seem to set seed for obs., act. spaces.
         self.env.seed(
             seed_int
-        )  # seed ###IMP Apparently Atari also has a seed. :/ Without this, for beam_rider(?), about 1 in 5 times I got reward of 88.0 and 44.0 the remaining times with the same action sequence!! With setting this seed, I got the same reward of 44.0 when I ran about 20 times.; ##TODO If this is really a wrapper, should it be modifying the seed of the env?
-        obs_space_seed = self.np_random.randint(sys.maxsize)  # random
-        act_space_seed = self.np_random.randint(sys.maxsize)  # random
+        )  # #seed ###IMP Apparently Atari also has a seed. :/ Without this, for beam_rider(?), about 1 in 5 times I got reward of 88.0 and 44.0 the remaining times with the same action sequence!! With setting this seed, I got the same reward of 44.0 when I ran about 20 times.; ##TODO If this is really a wrapper, should it be modifying the seed of the env?
+        obs_space_seed = self.np_random.integers(sys.maxsize).item()  # random
+        act_space_seed = self.np_random.integers(sys.maxsize).item()  # random
         self.env.observation_space.seed(obs_space_seed)  # seed
         self.env.action_space.seed(act_space_seed)  # seed
 
@@ -203,7 +207,7 @@ def __init__(self, env, **config):
             # self.irrelevant_features =  config["irrelevant_features"]
             irr_toy_env_conf = config["irrelevant_features"]
             if "seed" not in irr_toy_env_conf:
-                irr_toy_env_conf["seed"] = self.np_random.randint(sys.maxsize)  # random
+                irr_toy_env_conf["seed"] = self.np_random.integers(sys.maxsize).item()  # random
 
             if config["state_space_type"] == "discrete":
                 pass
@@ -323,7 +327,7 @@ def __init__(self, env, **config):
         # print("Setting Mujoco self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight to", self.frame_skip, self._ctrl_cost_weight, self._forward_reward_weight, "corresponding to time_unit in config.")
 
     def step(self, action):
-        # next_state, reward, done, info = super(GymEnvWrapper, self).step(action)
+        # next_state, reward, done, trunc, info = super(GymEnvWrapper, self).step(action)
         self.total_transitions_episode += 1
 
         if self.config["state_space_type"] == "discrete":
@@ -356,22 +360,22 @@ def step(self, action):
 
         if "irrelevant_features" in self.config:
             if self.config["state_space_type"] == "discrete":
-                next_state, reward, done, info = self.env.step(action[0])
-                next_state_irr, _, done_irr, _ = self.irr_toy_env.step(action[1])
+                next_state, reward, done, trunc, info = self.env.step(action[0])
+                next_state_irr, _, done_irr, trunc_irr, _ = self.irr_toy_env.step(action[1])
                 next_state = tuple([next_state, next_state_irr])
             else:
                 # env_act_shape is the shape of the underlying env's action space and we
                 # sub-select those dimensions from the total action space next and apply
                 # to the underlying env:
-                next_state, reward, done, info = self.env.step(
+                next_state, reward, done, trunc, info = self.env.step(
                     action[: self.env_act_shape[0]]
                 )
-                next_state_irr, _, done_irr, _ = self.irr_toy_env.step(
+                next_state_irr, _, done_irr, trunc_irr, _ = self.irr_toy_env.step(
                     action[self.env_act_shape[0] :]
                 )
                 next_state = np.concatenate((next_state, next_state_irr))
         else:
-            next_state, reward, done, info = self.env.step(action)
+            next_state, reward, done, trunc, info = self.env.step(action)
             if self.config["state_space_type"] == "continuous":
                 next_state += noise_in_transition
 
@@ -403,7 +407,7 @@ def step(self, action):
         reward *= self.reward_scale
         reward += self.reward_shift
 
-        return next_state, reward, done, info
+        return next_state, reward, done, trunc, info
 
     def reset(self):
         # on episode "end" stuff (to not be invoked when reset() called when
@@ -441,15 +445,15 @@ def reset(self):
 
         if "irrelevant_features" in self.config:
             if self.config["state_space_type"] == "discrete":
-                reset_state = self.env.reset()
-                reset_state_irr = self.irr_toy_env.reset()
+                reset_state = self.env.reset()[0]
+                reset_state_irr = self.irr_toy_env.reset()[0]
                 reset_state = tuple([reset_state, reset_state_irr])
             else:
-                reset_state = self.env.reset()
-                reset_state_irr = self.irr_toy_env.reset()
+                reset_state = self.env.reset()[0]
+                reset_state_irr = self.irr_toy_env.reset()[0]
                 reset_state = np.concatenate((reset_state, reset_state_irr))
         else:
-            reset_state = self.env.reset()
+            reset_state = self.env.reset()[0]
 
         if self.image_transforms:
             reset_state = self.get_transformed_image(reset_state)
@@ -470,7 +474,7 @@ def seed(self, seed=None):
         int
             The seed returned by Gym
         """
-        # If seed is None, you get a randomly generated seed from gym.utils...
+        # If seed is None, you get a randomly generated seed from gymnasium.utils...
         self.np_random, self.seed_ = gym.utils.seeding.np_random(seed)  # random
         print(
             "Env SEED set to: "
@@ -544,8 +548,8 @@ def get_transformed_image(self, env_img):
         if "shift" in self.image_transforms:
             max_shift_w = (tot_width - R) // 2
             max_shift_h = (tot_height - R) // 2
-            add_shift_w = self.np_random.randint(-max_shift_w + 1, max_shift_w)
-            add_shift_h = self.np_random.randint(-max_shift_h + 1, max_shift_h)
+            add_shift_w = self.np_random.integers(-max_shift_w + 1, max_shift_w).item()
+            add_shift_h = self.np_random.integers(-max_shift_h + 1, max_shift_h).item()
             # print("add_shift_w, add_shift_h", add_shift_w, add_shift_h)
             add_shift_w = int(add_shift_w / sh_quant) * sh_quant
             add_shift_h = int(add_shift_h / sh_quant) * sh_quant
@@ -582,8 +586,8 @@ def get_transformed_image(self, env_img):
 
 
 # from mdp_playground.envs.gym_env_wrapper import get_gym_wrapper
-# from gym.envs.atari import AtariEnv
-# from gym.wrappers import AtariPreprocessing
+# from gymnasium.envs.atari import AtariEnv
+# from gymnasium.wrappers import AtariPreprocessing
 # AtariPreprocessing()
 # AtariEnvWrapper = get_gym_wrapper(AtariEnv)
 # from ray.tune.registry import register_env
@@ -592,7 +596,7 @@ def get_transformed_image(self, env_img):
 # ob = aew.reset()
 
 # from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
-# from gym.envs.atari import AtariEnv
+# from gymnasium.envs.atari import AtariEnv
 # ae = AtariEnv(**{'game': 'beam_rider', 'obs_type': 'image', 'frameskip': 1})
 # aew = GymEnvWrapper(ae, **{'reward_noise': lambda a: a.normal(0, 0.1), 'transition_noise': 0.1, 'delay': 1, 'frame_skip': 4, "atari_preprocessing": True, "state_space_type": "discrete", 'seed': 0})
 # ob = aew.reset()
@@ -601,7 +605,7 @@ def get_transformed_image(self, env_img):
 # total_reward = 0.0
 # for i in range(200):
 #     act = aew.action_space.sample()
-#     next_state, reward, done, info = aew.step(act)
+#     next_state, reward, done, trunc, info = aew.step(act)
 #     print(reward, done, act)
 #     if reward > 10:
 #         print("reward in step:", i, reward)
diff --git a/mdp_playground/envs/mujoco_env_wrapper.py b/mdp_playground/envs/mujoco_env_wrapper.py
index 748db1f..6c9c91c 100644
--- a/mdp_playground/envs/mujoco_env_wrapper.py
+++ b/mdp_playground/envs/mujoco_env_wrapper.py
@@ -1,7 +1,7 @@
-# from gym.envs.mujoco.mujoco_env import MujocoEnv
-from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
-from gym.envs.mujoco.pusher import PusherEnv
-from gym.envs.mujoco.reacher import ReacherEnv
+# from gymnasium.envs.mujoco.mujoco_env import MujocoEnv
+from gymnasium.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+from gymnasium.envs.mujoco.pusher import PusherEnv
+from gymnasium.envs.mujoco.reacher import ReacherEnv
 import copy
 
 
@@ -104,20 +104,20 @@ def __init__(self, **config):  # Gets passed env_config from run_experiments.py
                     )
 
         def step(self, action):  # hack
-            obs, reward, done, info = super(MujocoEnvWrapper, self).step(action)
+            obs, reward, done, trunc, info = super(MujocoEnvWrapper, self).step(action)
             if (
                 self.base_class in [PusherEnv, ReacherEnv]
                 and "time_unit" in self.config
             ):
                 reward *= self.time_unit
-            return obs, reward, done, info
+            return obs, reward, done, trunc, info
 
     return MujocoEnvWrapper
 
 
 # from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper #hack
 #
-# from gym.envs.mujoco.reacher import ReacherEnv
+# from gymnasium.envs.mujoco.reacher import ReacherEnv
 # ReacherWrapperV2 = get_mujoco_wrapper(ReacherEnv)
 # config = {"time_unit": 0.2}
 # rw2 = ReacherWrapperV2(**config)
diff --git a/mdp_playground/envs/rl_toy_env.py b/mdp_playground/envs/rl_toy_env.py
index 7091ec0..9ae01e9 100644
--- a/mdp_playground/envs/rl_toy_env.py
+++ b/mdp_playground/envs/rl_toy_env.py
@@ -12,7 +12,7 @@
 import scipy
 from scipy import stats
 from scipy.spatial import distance
-import gym
+import gymnasium as gym
 from mdp_playground.spaces import (
     BoxExtended,
     DiscreteExtended,
@@ -287,7 +287,7 @@ def __init__(self, **config):
             self.seed_int = config["seed"]
             need_to_gen_seeds = True
         else:
-            raise TypeError("Unsupported data type for seed: ", type(config["seed"]))
+            raise TypeError("Unsupported data type for seed, actual config: ", type(config["seed"]), config)
 
         # #seed #TODO move to seed() so that obs., act. space, etc. have their
         # seeds reset too when env seed is reset?
@@ -300,25 +300,25 @@ def __init__(self, **config):
             # separation of the relevant and irrelevant dimensions!! _And_ the seed
             # remaining the same for the underlying discrete environment makes it
             # easier to write tests!
-            self.seed_dict["relevant_state_space"] = self.np_random.randint(
+            self.seed_dict["relevant_state_space"] = self.np_random.integers(
                 sys.maxsize
-            )  # #random
-            self.seed_dict["relevant_action_space"] = self.np_random.randint(
+            ).item()  # #random
+            self.seed_dict["relevant_action_space"] = self.np_random.integers(
                 sys.maxsize
-            )  # #random
-            self.seed_dict["irrelevant_state_space"] = self.np_random.randint(
+            ).item()  # #random
+            self.seed_dict["irrelevant_state_space"] = self.np_random.integers(
                 sys.maxsize
-            )  # #random
-            self.seed_dict["irrelevant_action_space"] = self.np_random.randint(
+            ).item()  # #random
+            self.seed_dict["irrelevant_action_space"] = self.np_random.integers(
                 sys.maxsize
-            )  # #random
+            ).item()  # #random
             # #IMP This is currently used to sample only for continuous spaces and not used for discrete spaces by the Environment. User might want to sample from it for multi-discrete environments. #random
-            self.seed_dict["state_space"] = self.np_random.randint(sys.maxsize)
+            self.seed_dict["state_space"] = self.np_random.integers(sys.maxsize).item()
             # #IMP This IS currently used to sample random actions by the RL agent for both discrete and continuous environments (but not used anywhere by the Environment). #random
-            self.seed_dict["action_space"] = self.np_random.randint(sys.maxsize)
-            self.seed_dict["image_representations"] = self.np_random.randint(
+            self.seed_dict["action_space"] = self.np_random.integers(sys.maxsize).item()
+            self.seed_dict["image_representations"] = self.np_random.integers(
                 sys.maxsize
-            )  # #random
+            ).item()  # #random
             # print("Mersenne0, dummy_eval:", self.np_random.get_state()[2], "dummy_eval" in config)
         else:  # if seed dict was passed
             self.seed(self.seed_dict["env"])
@@ -1928,8 +1928,8 @@ def step(self, action, imaginary_rollout=False):
 
         Returns
         -------
-        int or np.array, double, boolean, dict
-            The next state, reward, whether the episode terminated and additional info dict at the end of the current transition
+        int or np.array, double, boolean, boolean, dict
+            The next state, reward, whether the episode terminated, whether it was truncated and additional info dict at the end of the current transition
         """
 
         # For imaginary transitions, discussion:
@@ -2043,7 +2043,11 @@ def step(self, action, imaginary_rollout=False):
             + str(self.reward)
         )
 
-        return self.curr_obs, self.reward, self.done, self.get_augmented_state()
+        # The following returns False for the truncated variable as early termination of episodes is handled
+        # using max_episode_steps in the environment wrapper gymnasium.wrappers.TimeLimit when using 
+        # the env RLToyFinitieHorizon. In the experiments from the paper, early termination was handled by
+        # Ray Rllib's horizon parameter.
+        return self.curr_obs, self.reward, self.done, False, self.get_augmented_state()
 
     def get_augmented_state(self):
         """Intended to return the full augmented state which would be Markovian. (However, it's not Markovian wrt the noise in P and R because we're not returning the underlying RNG.) Currently, returns the augmented state which is the sequence of length "delay + sequence_length + 1" of past states for both discrete and continuous environments. Additonally, the current state derivatives are also returned for continuous environments.
@@ -2229,7 +2233,7 @@ def reset(self):
             + str(self.sequence_length)
         )
 
-        return self.curr_obs
+        return self.curr_obs, {}
 
     def seed(self, seed=None):
         """Initialises the Numpy RNG for the environment by calling a utility for this in Gym.
@@ -2246,7 +2250,11 @@ def seed(self, seed=None):
         int
             The seed returned by Gym
         """
-        # If seed is None, you get a randomly generated seed from gym.utils...
+        # If seed is None, you get a randomly generated seed from gymnasium.utils...
+        # As of 2024.06.18:
+        # seed_seq = np.random.SeedSequence(seed)
+        # np_seed = seed_seq.entropy
+        # rng = RandomNumberGenerator(np.random.PCG64(seed_seq))
         self.np_random, self.seed_ = gym.utils.seeding.np_random(seed)  # #random
         print(
             "Env SEED set to: "
diff --git a/mdp_playground/spaces/box_extended.py b/mdp_playground/spaces/box_extended.py
index 140a0f9..11661f8 100644
--- a/mdp_playground/spaces/box_extended.py
+++ b/mdp_playground/spaces/box_extended.py
@@ -1,6 +1,6 @@
 import numpy as np
-import gym
-from gym.spaces import Box
+import gymnasium as gym
+from gymnasium.spaces import Box
 
 
 class BoxExtended(Box):
diff --git a/mdp_playground/spaces/discrete_extended.py b/mdp_playground/spaces/discrete_extended.py
index 4d6c5ad..f827493 100644
--- a/mdp_playground/spaces/discrete_extended.py
+++ b/mdp_playground/spaces/discrete_extended.py
@@ -1,6 +1,6 @@
 import numpy as np
-import gym
-from gym.spaces import Discrete
+import gymnasium as gym
+from gymnasium.spaces import Discrete
 
 
 class DiscreteExtended(Discrete):
diff --git a/mdp_playground/spaces/grid_action_space.py b/mdp_playground/spaces/grid_action_space.py
index e5b6697..92661ef 100644
--- a/mdp_playground/spaces/grid_action_space.py
+++ b/mdp_playground/spaces/grid_action_space.py
@@ -1,5 +1,5 @@
 import numpy as np
-import gym
+import gymnasium as gym
 from mdp_playground.spaces import BoxExtended
 
 
@@ -15,8 +15,8 @@ def sample(self):
         # Select which dimension will have action (only 1 dimension can have
         # motion in traditional grid worlds). This also is more consistent with
         # Manhattan dist reward defined for grid worlds in rl_toy_env.py
-        ind = self.np_random.randint(self.high.size)
-        val = self.np_random.randint(3)
+        ind = self.np_random.integers(self.high.size).item()
+        val = self.np_random.integers(3).item()
         samp[ind] = val - 1  # Shift into grid action range of [-1, 0, 1]
 
         return samp.astype(int)
diff --git a/mdp_playground/spaces/image_continuous.py b/mdp_playground/spaces/image_continuous.py
index 1f4577d..946ca4f 100644
--- a/mdp_playground/spaces/image_continuous.py
+++ b/mdp_playground/spaces/image_continuous.py
@@ -1,7 +1,7 @@
 import warnings
 import numpy as np
-import gym
-from gym.spaces import Box, Space
+import gymnasium as gym
+from gymnasium.spaces import Box, Space
 import PIL.ImageDraw as ImageDraw
 import PIL.Image as Image
 from PIL.Image import FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM
diff --git a/mdp_playground/spaces/image_multi_discrete.py b/mdp_playground/spaces/image_multi_discrete.py
index 8a363b7..61af6c5 100644
--- a/mdp_playground/spaces/image_multi_discrete.py
+++ b/mdp_playground/spaces/image_multi_discrete.py
@@ -1,7 +1,7 @@
 import warnings
 import numpy as np
-import gym
-from gym.spaces import Box, Discrete, MultiDiscrete, Space
+import gymnasium as gym
+from gymnasium.spaces import Box, Discrete, MultiDiscrete, Space
 import PIL.ImageDraw as ImageDraw
 import PIL.Image as Image
 from PIL.Image import FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM
@@ -160,8 +160,8 @@ def generate_image(self, discrete_state):  # , state_space_size, polygon_sides
         if "shift" in self.transforms:
             max_shift_w = self.width / 2 - R
             max_shift_h = self.height / 2 - R
-            add_shift_w = self.np_random.randint(-max_shift_w + 1, max_shift_w)
-            add_shift_h = self.np_random.randint(-max_shift_h + 1, max_shift_h)
+            add_shift_w = self.np_random.integers(-max_shift_w + 1, max_shift_w).item()
+            add_shift_h = self.np_random.integers(-max_shift_h + 1, max_shift_h).item()
             add_shift_w = (add_shift_w // sh_quant) * sh_quant
             add_shift_h = (add_shift_h // sh_quant) * sh_quant
             # print("shift_w, shift_h", add_shift_w, add_shift_h)
@@ -236,15 +236,15 @@ def generate_image(self, discrete_state):  # , state_space_size, polygon_sides
             "rotate" in self.transforms
         ):  # TODO rotation can lead to image going out of bounds.
             # rotation_ = (360 / polygon_sides) * (discrete_state / state_space_size) # Need to divide by polygon_sides because
-            rotation = self.np_random.randint(360)
+            rotation = self.np_random.integers(360).item()
             rotation = (rotation // ro_quant) * ro_quant
             # print("rotation", rotation)
             image_ = image_.rotate(rotation)
             # image_.rotate(
 
         if "flip" in self.transforms:
-            if self.np_random.randint(2) == 0:  # Only flip half the times
-                if self.np_random.randint(2) == 0:
+            if self.np_random.integers(2).item() == 0:  # Only flip half the times
+                if self.np_random.integers(2).item() == 0:
                     image_ = image_.transpose(FLIP_LEFT_RIGHT)
                 else:
                     image_ = image_.transpose(FLIP_TOP_BOTTOM)
diff --git a/mdp_playground/spaces/multi_discrete_extended.py b/mdp_playground/spaces/multi_discrete_extended.py
index 5e669d9..0544b62 100644
--- a/mdp_playground/spaces/multi_discrete_extended.py
+++ b/mdp_playground/spaces/multi_discrete_extended.py
@@ -1,6 +1,6 @@
 import numpy as np
-import gym
-from gym.spaces import MultiDiscrete
+import gymnasium as gym
+from gymnasium.spaces import MultiDiscrete
 
 
 class MultiDiscreteExtended(MultiDiscrete):
diff --git a/mdp_playground/spaces/test_image_continuous.py b/mdp_playground/spaces/test_image_continuous.py
index 1d5e5cb..49b123f 100644
--- a/mdp_playground/spaces/test_image_continuous.py
+++ b/mdp_playground/spaces/test_image_continuous.py
@@ -1,7 +1,7 @@
 import unittest
 import numpy as np
 from mdp_playground.spaces.image_continuous import ImageContinuous
-from gym.spaces import Box
+from gymnasium.spaces import Box
 
 # import PIL.ImageDraw as ImageDraw
 import PIL.Image as Image
diff --git a/mdp_playground/spaces/test_image_multi_discrete.py b/mdp_playground/spaces/test_image_multi_discrete.py
index 5116cd7..4db4f76 100644
--- a/mdp_playground/spaces/test_image_multi_discrete.py
+++ b/mdp_playground/spaces/test_image_multi_discrete.py
@@ -1,10 +1,10 @@
 import unittest
 import numpy as np
 from mdp_playground.spaces.image_multi_discrete import ImageMultiDiscrete
-from gym.spaces import Discrete, MultiDiscrete
+from gymnasium.spaces import Discrete, MultiDiscrete
 
-# import gym
-# from gym.spaces import MultiDiscrete
+# import gymnasium as gym
+# from gymnasium.spaces import MultiDiscrete
 # # from .space import Space
 # import PIL.ImageDraw as ImageDraw
 # import PIL.Image as Image
diff --git a/mdp_playground/spaces/tuple_extended.py b/mdp_playground/spaces/tuple_extended.py
index 9dc9dce..c8aaf65 100644
--- a/mdp_playground/spaces/tuple_extended.py
+++ b/mdp_playground/spaces/tuple_extended.py
@@ -1,6 +1,6 @@
 import numpy as np
-import gym
-from gym.spaces import Tuple
+import gymnasium as gym
+from gymnasium.spaces import Tuple
 
 
 class TupleExtended(Tuple):
diff --git a/setup.py b/setup.py
index 69442e8..d9fbe2d 100644
--- a/setup.py
+++ b/setup.py
@@ -82,7 +82,7 @@
 
 setup(
     name="mdp-playground",
-    version="0.0.2",
+    version="1.0.0",
     author=AUTHORS,
     author_email=AUTHOR_EMAIL,
     description="A python package to design and debug RL agents",
@@ -109,7 +109,7 @@
     py_modules=[],
     python_requires=">=3.6",
     setup_requires=["numpy"],
-    install_requires=["dill", "numpy"],
+    install_requires=["dill", "numpy", "scipy", "pillow", "gymnasium"],
     extras_require={
         "extras": extras_require,
         "extras_disc": extras_require_disc,
diff --git a/tests/test_gym_env_wrapper.py b/tests/test_gym_env_wrapper.py
index 9912748..1159f11 100644
--- a/tests/test_gym_env_wrapper.py
+++ b/tests/test_gym_env_wrapper.py
@@ -4,6 +4,7 @@
 from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper
 import unittest
 import pytest
+import gymnasium as gym
 
 import sys
 
@@ -27,11 +28,6 @@ def test_r_delay(self):
         """ """
         print("\033[32;1;4mTEST_REWARD_DELAY\033[0m")
         config = {
-            "AtariEnv": {
-                "game": "beam_rider",  # "breakout",
-                "obs_type": "image",
-                "frameskip": 1,
-            },
             "delay": 1,
             # "GymEnvWrapper": {
             "atari_preprocessing": True,
@@ -46,9 +42,7 @@ def test_r_delay(self):
 
         # config["log_filename"] = log_filename
 
-        from gym.envs.atari import AtariEnv
-
-        ae = AtariEnv(**{"game": "beam_rider", "obs_type": "image", "frameskip": 1})
+        ae = gym.make("BeamRiderNoFrameskip-v4")
         aew = GymEnvWrapper(ae, **config)
         ob = aew.reset()
         print("observation_space.shape:", ob.shape)
@@ -56,7 +50,7 @@ def test_r_delay(self):
         total_reward = 0.0
         for i in range(200):
             act = aew.action_space.sample()
-            next_state, reward, done, info = aew.step(act)
+            next_state, reward, done, trunc, info = aew.step(act)
             print("step, reward, done, act:", i, reward, done, act)
             if i == 154 or i == 159:
                 assert reward == 44.0, (
@@ -72,11 +66,6 @@ def test_r_shift(self):
         """ """
         print("\033[32;1;4mTEST_REWARD_SHIFT\033[0m")
         config = {
-            "AtariEnv": {
-                "game": "beam_rider",  # "breakout",
-                "obs_type": "image",
-                "frameskip": 1,
-            },
             "reward_shift": 1,
             # "GymEnvWrapper": {
             "atari_preprocessing": True,
@@ -91,9 +80,8 @@ def test_r_shift(self):
 
         # config["log_filename"] = log_filename
 
-        from gym.envs.atari import AtariEnv
 
-        ae = AtariEnv(**{"game": "beam_rider", "obs_type": "image", "frameskip": 1})
+        ae = gym.make("BeamRiderNoFrameskip-v4")
         aew = GymEnvWrapper(ae, **config)
         ob = aew.reset()
         print("observation_space.shape:", ob.shape)
@@ -101,7 +89,7 @@ def test_r_shift(self):
         total_reward = 0.0
         for i in range(200):
             act = aew.action_space.sample()
-            next_state, reward, done, info = aew.step(act)
+            next_state, reward, done, trunc, info = aew.step(act)
             print("step, reward, done, act:", i, reward, done, act)
             if i == 153 or i == 158:
                 assert reward == 45.0, (
@@ -119,11 +107,6 @@ def test_r_scale(self):
         """ """
         print("\033[32;1;4mTEST_REWARD_SCALE\033[0m")
         config = {
-            "AtariEnv": {
-                "game": "beam_rider",  # "breakout",
-                "obs_type": "image",
-                "frameskip": 1,
-            },
             "reward_scale": 2,
             # "GymEnvWrapper": {
             "atari_preprocessing": True,
@@ -138,9 +121,7 @@ def test_r_scale(self):
 
         # config["log_filename"] = log_filename
 
-        from gym.envs.atari import AtariEnv
-
-        ae = AtariEnv(**{"game": "beam_rider", "obs_type": "image", "frameskip": 1})
+        ae = gym.make("BeamRiderNoFrameskip-v4")
         aew = GymEnvWrapper(ae, **config)
         ob = aew.reset()
         print("observation_space.shape:", ob.shape)
@@ -148,7 +129,7 @@ def test_r_scale(self):
         total_reward = 0.0
         for i in range(200):
             act = aew.action_space.sample()
-            next_state, reward, done, info = aew.step(act)
+            next_state, reward, done, trunc, info = aew.step(act)
             print("step, reward, done, act:", i, reward, done, act)
             if i == 153 or i == 158:
                 assert reward == 88.0, (
@@ -167,11 +148,6 @@ def test_r_scale(self):
     #     """ """
     #     print("\033[32;1;4mTEST_TERM_STATE_REWARD\033[0m")
     #     config = {
-    #         "AtariEnv": {
-    #             "game": "beam_rider",  # "breakout",
-    #             "obs_type": "image",
-    #             "frameskip": 1,
-    #         },
     #         "term_state_reward": 200,
     #         # "GymEnvWrapper": {
     #         "atari_preprocessing": True,
@@ -186,9 +162,7 @@ def test_r_scale(self):
 
     #     # config["log_filename"] = log_filename
 
-    #     from gym.envs.atari import AtariEnv
-
-    #     ae = AtariEnv(**{"game": "beam_rider", "obs_type": "image", "frameskip": 1})
+    #     ae = gym.make("BeamRiderNoFrameskip-v4")
     #     aew = GymEnvWrapper(ae, **config)
     #     ob = aew.reset()
     #     print("observation_space.shape:", ob.shape)
@@ -196,7 +170,7 @@ def test_r_scale(self):
     #     total_reward = 0.0
     #     for i in range(200):
     #         act = aew.action_space.sample()
-    #         next_state, reward, done, info = aew.step(act)
+    #         next_state, reward, done, trunc, info = aew.step(act)
     #         print("step, reward, done, act:", i, reward, done, act)
     #         if i == 153 or i == 158:
     #             assert reward == 88.0, (
@@ -210,57 +184,50 @@ def test_r_scale(self):
     #     print("total_reward:", total_reward)
     #     aew.reset()
 
-    def test_r_delay_ray_frame_stack(self):
-        """
-        Uses wrap_deepmind_ray to frame stack Atari
-        """
-        print("\033[32;1;4mTEST_REWARD_DELAY_RAY_FRAME_STACK\033[0m")
-        config = {
-            "AtariEnv": {
-                "game": "beam_rider",  # "breakout",
-                "obs_type": "image",
-                "frameskip": 1,
-            },
-            "delay": 1,
-            # "GymEnvWrapper": {
-            "wrap_deepmind_ray": True,
-            "frame_skip": 1,
-            "atari_preprocessing": True,
-            "frame_skip": 4,
-            "grayscale_obs": False,
-            "state_space_type": "discrete",
-            "action_space_type": "discrete",
-            "seed": 0,
-            # },
-            # 'seed': 0, #seed
-        }
-
-        # config["log_filename"] = log_filename
+    # Disabled tests for Ray Rllib for now, too much maintenance overhead.
+    # def test_r_delay_ray_frame_stack(self):
+    #     """
+    #     Uses wrap_deepmind_ray to frame stack Atari
+    #     """
+    #     print("\033[32;1;4mTEST_REWARD_DELAY_RAY_FRAME_STACK\033[0m")
+    #     config = {
+    #         "delay": 1,
+    #         # "GymEnvWrapper": {
+    #         "wrap_deepmind_ray": True,
+    #         "frame_skip": 1,
+    #         "atari_preprocessing": True,
+    #         "frame_skip": 4,
+    #         "grayscale_obs": False,
+    #         "state_space_type": "discrete",
+    #         "action_space_type": "discrete",
+    #         "seed": 0,
+    #         # },
+    #         # 'seed': 0, #seed
+    #     }
 
-        from gym.envs.atari import AtariEnv
-        import gym
+    #     # config["log_filename"] = log_filename
 
-        game = "beam_rider"
-        game = "".join([g.capitalize() for g in game.split("_")])
-        ae = gym.make("{}NoFrameskip-v4".format(game))
-        aew = GymEnvWrapper(ae, **config)
-        ob = aew.reset()
-        print("observation_space.shape:", ob.shape)
-        # print(ob)
-        total_reward = 0.0
-        for i in range(200):
-            act = aew.action_space.sample()
-            next_state, reward, done, info = aew.step(act)
-            print("step, reward, done, act:", i, reward, done, act)
-            if i == 142 or i == 159:
-                assert reward == 44.0, (
-                    "1-step delayed reward in step: "
-                    + str(i)
-                    + " should have been 44.0."
-                )
-            total_reward += reward
-        print("total_reward:", total_reward)
-        aew.reset()
+    #     game = "beam_rider"
+    #     game = "".join([g.capitalize() for g in game.split("_")])
+    #     ae = gym.make("{}NoFrameskip-v4".format(game))
+    #     aew = GymEnvWrapper(ae, **config)
+    #     ob = aew.reset()
+    #     print("observation_space.shape:", ob.shape)
+    #     # print(ob)
+    #     total_reward = 0.0
+    #     for i in range(200):
+    #         act = aew.action_space.sample()
+    #         next_state, reward, done, trunc, info = aew.step(act)
+    #         print("step, reward, done, act:", i, reward, done, act)
+    #         if i == 142 or i == 159:
+    #             assert reward == 44.0, (
+    #                 "1-step delayed reward in step: "
+    #                 + str(i)
+    #                 + " should have been 44.0."
+    #             )
+    #         total_reward += reward
+    #     print("total_reward:", total_reward)
+    #     aew.reset()
 
     def test_r_delay_p_noise_r_noise(self):
         """
@@ -268,11 +235,6 @@ def test_r_delay_p_noise_r_noise(self):
         """
         print("\033[32;1;4mTEST_MULTIPLE\033[0m")
         config = {
-            "AtariEnv": {
-                "game": "beam_rider",  # "breakout",
-                "obs_type": "image",
-                "frameskip": 1,
-            },
             "delay": 1,
             "reward_noise": lambda a: a.normal(0, 0.1),
             "transition_noise": 0.1,
@@ -289,9 +251,7 @@ def test_r_delay_p_noise_r_noise(self):
 
         # config["log_filename"] = log_filename
 
-        from gym.envs.atari import AtariEnv
-
-        ae = AtariEnv(**{"game": "beam_rider", "obs_type": "image", "frameskip": 1})
+        ae = gym.make("BeamRiderNoFrameskip-v4")
         aew = GymEnvWrapper(ae, **config)
         ob = aew.reset()
         print("observation_space.shape:", ob.shape)
@@ -299,7 +259,7 @@ def test_r_delay_p_noise_r_noise(self):
         total_reward = 0.0
         for i in range(200):
             act = aew.action_space.sample()
-            next_state, reward, done, info = aew.step(act)
+            next_state, reward, done, trunc, info = aew.step(act)
             print("step, reward, done, act:", i, reward, done, act)
             # Testing hardcoded values at these timesteps implicitly tests that there
             # were 21 noisy transitions in total and noise inserted in rewards.
@@ -329,11 +289,6 @@ def test_discrete_irr_features(self):
         """ """
         print("\033[32;1;4mTEST_DISC_IRR_FEATURES\033[0m")
         config = {
-            "AtariEnv": {
-                "game": "beam_rider",  # "breakout",
-                "obs_type": "image",
-                "frameskip": 1,
-            },
             "delay": 1,
             # "GymEnvWrapper": {
             "atari_preprocessing": True,
@@ -359,9 +314,7 @@ def test_discrete_irr_features(self):
 
         # config["log_filename"] = log_filename
 
-        from gym.envs.atari import AtariEnv
-
-        ae = AtariEnv(**{"game": "beam_rider", "obs_type": "image", "frameskip": 1})
+        ae = gym.make("BeamRiderNoFrameskip-v4")
         aew = GymEnvWrapper(ae, **config)
         ob = aew.reset()
         print("type(observation_space):", type(ob))
@@ -369,7 +322,7 @@ def test_discrete_irr_features(self):
         total_reward = 0.0
         for i in range(200):
             act = aew.action_space.sample()
-            next_state, reward, done, info = aew.step(act)
+            next_state, reward, done, trunc, info = aew.step(act)
             print(
                 "step, reward, done, act, next_state[1]:",
                 i,
@@ -392,11 +345,6 @@ def test_image_transforms(self):
         """ """
         print("\033[32;1;4mTEST_IMAGE_TRANSFORMS\033[0m")
         config = {
-            "AtariEnv": {
-                "game": "beam_rider",  # "breakout",
-                "obs_type": "image",
-                "frameskip": 1,
-            },
             "image_transforms": "shift,scale,rotate",
             # "image_sh_quant": 2,
             "image_width": 40,
@@ -414,9 +362,7 @@ def test_image_transforms(self):
 
         # config["log_filename"] = log_filename
 
-        from gym.envs.atari import AtariEnv
-
-        ae = AtariEnv(**{"game": "beam_rider", "obs_type": "image", "frameskip": 1})
+        ae = gym.make("BeamRiderNoFrameskip-v4")
         aew = GymEnvWrapper(ae, **config)
         ob = aew.reset()
         print("observation_space.shape:", ob.shape)
@@ -425,7 +371,7 @@ def test_image_transforms(self):
         total_reward = 0.0
         for i in range(200):
             act = aew.action_space.sample()
-            next_state, reward, done, info = aew.step(act)
+            next_state, reward, done, trunc, info = aew.step(act)
             print("step, reward, done, act:", i, reward, done, act)
             if i == 153 or i == 158:
                 assert reward == 44.0, (
@@ -440,11 +386,6 @@ def test_cont_irr_features(self):
         """ """
         print("\033[32;1;4mTEST_CONT_IRR_FEATURES\033[0m")
         config = {
-            # "AtariEnv": {
-            #     "game": 'beam_rider', #"breakout",
-            #     'obs_type': 'image',
-            #     'frameskip': 1,
-            # },
             # 'delay': 1,
             # "GymEnvWrapper": {
             "state_space_type": "continuous",
@@ -471,7 +412,7 @@ def test_cont_irr_features(self):
         # config["log_filename"] = log_filename
 
         from mdp_playground.envs.mujoco_env_wrapper import get_mujoco_wrapper  # hack
-        from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+        from gymnasium.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
 
         HalfCheetahWrapperV3 = get_mujoco_wrapper(HalfCheetahEnv)
         base_env_config = {}
@@ -512,7 +453,7 @@ def test_cont_irr_features(self):
 
         for i in range(200):
             act = hc3w.action_space.sample()
-            next_state, reward, done, info = hc3w.step(act)
+            next_state, reward, done, trunc, info = hc3w.step(act)
             print(
                 "step, reward, done, act, next_state:", i, reward, done, act, next_state
             )
diff --git a/tests/test_mdp_playground.py b/tests/test_mdp_playground.py
index 308c246..c687667 100644
--- a/tests/test_mdp_playground.py
+++ b/tests/test_mdp_playground.py
@@ -59,14 +59,14 @@ def test_continuous_dynamics_move_along_a_line(self):
 
         # Test 1: general dynamics and reward
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         self.assertEqual(
             type(state), np.ndarray, "Type of continuous state should be numpy.ndarray."
         )
         for i in range(20):
             # action = env.action_space.sample()
             action = np.array([1, 1, 1, 1])  # just to test if acting "in a line" works
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             np.testing.assert_allclose(
                 0.0, reward, atol=1e-5, err_msg="Step: " + str(i)
@@ -77,7 +77,7 @@ def test_continuous_dynamics_move_along_a_line(self):
         )
         # test_ = np.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)
         # self.assertAlmostEqual(state, np.array([21.59339006, 20.68189965, 21.49608203, 20.19183292]), places=3) # Error
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test 2: sequence lengths # TODO done in next test.
@@ -86,14 +86,14 @@ def test_continuous_dynamics_move_along_a_line(self):
         # of optimal actions leads to good reward. Also implicitly tests sequence
         # lengths.
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         prev_reward = None
         for i in range(40):
             if i < 20:
                 action = env.action_space.sample()
             else:
                 action = np.array([1, 1, 1, 1])
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             if i >= 29:
                 np.testing.assert_allclose(
@@ -119,21 +119,21 @@ def test_continuous_dynamics_move_along_a_line(self):
                 )
             state = next_state.copy()
             prev_reward = reward
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test 4: same as 3 above except with delay
         print("\033[32;1;4mTEST_CONTINUOUS_DYNAMICS_DELAY\033[0m")
         config["delay"] = 1
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         prev_reward = None
         for i in range(40):
             if i < 20:
                 action = env.action_space.sample()
             else:
                 action = np.array([1, 1, 1, 1])
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             if i >= 30:
                 np.testing.assert_allclose(
@@ -157,7 +157,7 @@ def test_continuous_dynamics_move_along_a_line(self):
                 )
             state = next_state.copy()
             prev_reward = reward
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test 5: R noise - same as 1 above except with reward noise
@@ -165,12 +165,12 @@ def test_continuous_dynamics_move_along_a_line(self):
         config["reward_noise"] = lambda a: a.normal(0, 0.5)
         config["delay"] = 0
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         expected_rewards = [-0.70707351, 0.44681, 0.150735, -0.346204, 0.80687]
         for i in range(5):
             # action = env.action_space.sample()
             action = np.array([1, 1, 1, 1])  # just to test if acting "in a line" works
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             np.testing.assert_allclose(
                 expected_rewards[i], reward, atol=1e-6, err_msg="Step: " + str(i)
@@ -179,7 +179,7 @@ def test_continuous_dynamics_move_along_a_line(self):
         np.testing.assert_allclose(
             state, np.array([6.59339006, 5.68189965, 6.49608203, 5.19183292]), atol=1e-5
         )
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test 6: for dynamics and reward in presence of irrelevant dimensions
@@ -189,13 +189,13 @@ def test_continuous_dynamics_move_along_a_line(self):
         config["relevant_indices"] = [0, 1, 2, 6]
         config["action_space_relevant_indices"] = [0, 1, 2, 6]
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         for i in range(20):
             action = env.action_space.sample()
             action[
                 config["action_space_relevant_indices"]
             ] = 1.0  # test to see if acting "in a line" works for relevant dimensions
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             np.testing.assert_allclose(
                 0.0, reward, atol=1e-5, err_msg="Step: " + str(i)
@@ -205,19 +205,19 @@ def test_continuous_dynamics_move_along_a_line(self):
             state[config["relevant_indices"]],
             np.array([21.59339006, 20.68189965, 21.49608203, 19.835966]),
         )
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test that random actions in relevant action space along with linear
         # actions in irrelevant action space leads to bad reward for
         # move_along_a_line reward function
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         for i in range(20):
             action = env.action_space.sample()
             # test to see if acting "in a line" for irrelevant dimensions and not for relevant dimensions produces bad reward
             action[[3, 4, 5]] = 1.0
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             if i > 10:
                 assert reward < -0.8, (
@@ -227,24 +227,24 @@ def test_continuous_dynamics_move_along_a_line(self):
                     + str(reward)
                 )
             state = next_state.copy()
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test using config values: state_space_max and action_space_max
         config["state_space_max"] = 5  # Will be a Box in the range [-max, max]
         config["action_space_max"] = 1  # Will be a Box in the range [-max, max]
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         for _ in range(20):
             # action = env.action_space.sample()
             action = np.array([-1] * 7)  # just to test if acting "in a line" works
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             state = next_state.copy()
         np.testing.assert_allclose(state, np.array([-5] * 7))
         # test_ = np.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)
         # self.assertAlmostEqual(state, np.array([21.59339006, 20.68189965, 21.49608203, 20.19183292]), places=3) # Error
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test for terminal states in presence of irrelevant dimensions
@@ -259,14 +259,14 @@ def test_continuous_dynamics_move_along_a_line(self):
         ]
         config["term_state_edge"] = 1.0
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         state_derivatives = copy.deepcopy(env.state_derivatives)
         # augmented_state = copy.deepcopy(env.augmented_state)
 
         for _ in range(20):
             # action = env.action_space.sample()
             action = np.array([1] * 7)  # just to test if acting "in a line" works
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             if _ == 1:
                 assert done, "Terminal state should have been reached at step " + str(_)
@@ -281,13 +281,13 @@ def test_continuous_dynamics_move_along_a_line(self):
         )  # 5 because of state_space_max
         # test_ = np.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)
         # self.assertAlmostEqual(state, np.array([21.59339006, 20.68189965, 21.49608203, 20.19183292]), places=3) # Error
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test P noise
         config["transition_noise"] = lambda a: a.normal([0] * 7, [0.5] * 7)
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         state_derivatives = copy.deepcopy(env.state_derivatives)
         # augmented_state = copy.deepcopy(env.augmented_state)
 
@@ -366,7 +366,7 @@ def test_continuous_dynamics_move_along_a_line(self):
         for i in range(3):
             # action = env.action_space.sample()
             action = np.array([1] * 7)  # just to test if acting "in a line" works
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             np.testing.assert_allclose(
                 state_derivatives[0], env.augmented_state[-2]
@@ -382,7 +382,7 @@ def test_continuous_dynamics_move_along_a_line(self):
             # augmented_state = copy.deepcopy(env.augmented_state)
         # test_ = np.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)
         # self.assertAlmostEqual(state, np.array([21.59339006, 20.68189965, 21.49608203, 20.19183292]), places=3) # Error
-        env.reset()
+        env.reset()[0]
         env.close()
 
     # TODO Write test for continuous for checking reward with/without irrelevant dimensions, delay, r noise, seq_len?
@@ -418,7 +418,7 @@ def test_continuous_dynamics_order(self):
         state_derivatives = copy.deepcopy(env.state_derivatives)
 
         action = np.array([2.0, 1.0])
-        next_state, reward, done, info = env.step(action)
+        next_state, reward, done, trunc, info = env.step(action)
         print("sars', done =", state, action, reward, next_state, done)
         np.testing.assert_allclose(
             next_state - state, (1 / 6) * np.array([1, 0.5]) * 1e-6, atol=1e-7
@@ -437,7 +437,7 @@ def test_continuous_dynamics_order(self):
         state_derivatives = copy.deepcopy(env.state_derivatives)
 
         action = np.array([2.0, 1.0])
-        next_state, reward, done, info = env.step(action)
+        next_state, reward, done, trunc, info = env.step(action)
         print("sars', done =", state, action, reward, next_state, done)
         np.testing.assert_allclose(
             next_state - state, (7 / 6) * np.array([1, 0.5]) * 1e-6, atol=1e-7
@@ -456,7 +456,7 @@ def test_continuous_dynamics_order(self):
 
         # TODO Test for more timesteps? (>seq_len so that reward function kicks in) or higher order derivatives (.DONE)
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_continuous_dynamics_target_point_dense(self):
@@ -489,11 +489,11 @@ def test_continuous_dynamics_target_point_dense(self):
 
         # Test : dense reward
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         for i in range(20):
             # action = env.action_space.sample()
             action = np.array([0.5] * 2)  # just to test if acting "in a line" works
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             # At each step, the distance reduces by ~0.035355 to the final point of
             # this trajectory which is also the target point by design for this test.
@@ -503,7 +503,7 @@ def test_continuous_dynamics_target_point_dense(self):
             )
             state = next_state.copy()
         np.testing.assert_allclose(state, np.array([-0.29792, 1.71012]), atol=1e-6)
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test irrelevant dimensions
@@ -513,11 +513,11 @@ def test_continuous_dynamics_target_point_dense(self):
         config["action_space_relevant_indices"] = [1, 2]
         config["target_point"] = [1.71012, 0.941906]
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         for i in range(20):
             # action = env.action_space.sample()
             action = np.array([0.5] * 5)  # just to test if acting "in a line" works
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             # At each step, the distance reduces by ~0.035355 to the final point of
             # this trajectory which is also the target point by design for this test.
@@ -532,24 +532,24 @@ def test_continuous_dynamics_target_point_dense(self):
             atol=1e-6,
         )
         # check 1 extra step away from target point gives -ve reward
-        next_state, reward, done, info = env.step(action)
+        next_state, reward, done, trunc, info = env.step(action)
         print("sars', done =", state, action, reward, next_state, done)
         # At each step, the distance reduces by ~0.035355 to the final point of
         # this trajectory which is also the target point by design for this
         np.testing.assert_allclose(
             -0.035355, reward, atol=1e-5, err_msg="Step: " + str(i)
         )
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test delay
         config["delay"] = 10
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         for i in range(20):
             # action = env.action_space.sample()
             action = np.array([0.5] * 5)  # just to test if acting "in a line" works
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             if i < 10:
                 np.testing.assert_allclose(
@@ -568,7 +568,7 @@ def test_continuous_dynamics_target_point_dense(self):
             np.array([-0.29792, 1.71012, 0.941906, -0.034626, 0.493934]),
             atol=1e-6,
         )
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_continuous_dynamics_target_point_sparse(self):
@@ -603,11 +603,11 @@ def test_continuous_dynamics_target_point_sparse(self):
 
         # Test : sparse reward
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         for i in range(20):
             # action = env.action_space.sample()
             action = np.array([0.5] * 2)  # just to test if acting "in a line" works
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             if i < 17:
                 np.testing.assert_allclose(
@@ -621,17 +621,17 @@ def test_continuous_dynamics_target_point_sparse(self):
         np.testing.assert_allclose(state, np.array([-0.29792, 1.71012]), atol=1e-6)
         # test_ = np.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)
         # self.assertAlmostEqual(state, np.array([21.59339006, 20.68189965, 21.49608203, 20.19183292]), places=3) # Error
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test delay
         config["delay"] = 10
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         for i in range(35):
             # action = env.action_space.sample()
             action = np.array([0.5] * 2)  # just to test if acting "in a line" works
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             if i < 27 or i > 31:
                 np.testing.assert_allclose(
@@ -645,7 +645,7 @@ def test_continuous_dynamics_target_point_sparse(self):
         np.testing.assert_allclose(state, np.array([0.07708, 2.08512]), atol=1e-6)
         # test_ = np.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)
         # self.assertAlmostEqual(state, np.array([21.59339006, 20.68189965, 21.49608203, 20.19183292]), places=3) # Error
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test irrelevant dimensions
@@ -655,11 +655,11 @@ def test_continuous_dynamics_target_point_sparse(self):
         config["action_space_relevant_indices"] = [1, 2]
         config["target_point"] = [1.71012, 0.941906]
         env = RLToyEnv(**config)
-        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()
+        state = env.get_augmented_state()["curr_state"].copy()  # env.reset()[0]
         for i in range(35):
             # action = env.action_space.sample()
             action = np.array([0.5] * 5)  # just to test if acting "in a line" works
-            next_state, reward, done, info = env.step(action)
+            next_state, reward, done, trunc, info = env.step(action)
             print("sars', done =", state, action, reward, next_state, done)
             if i < 27 or i > 31:
                 # At each step, the distance reduces by ~0.035355 to the final point of
@@ -676,7 +676,7 @@ def test_continuous_dynamics_target_point_sparse(self):
         np.testing.assert_allclose(
             state, np.array([0.07708, 2.08512, 1.316906, 0.340374, 0.868934]), atol=1e-6
         )
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_continuous_image_representations(self):
@@ -724,7 +724,7 @@ def test_continuous_image_representations(self):
         for i in range(5):
             # action = env.action_space.sample()
             action = np.array([-0.45, 0.8])  # just to test if acting "in a line" works
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.get_augmented_state()["augmented_state"][-1]
             print("sars', done =", state, action, reward, next_state, done)
             state = next_state.copy()
@@ -747,7 +747,7 @@ def test_continuous_image_representations(self):
 
         # test_ = np.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)
         # self.assertAlmostEqual(state, np.array([21.59339006, 20.68189965, 21.49608203, 20.19183292]), places=3) # Error
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_grid_image_representations(self):
@@ -793,7 +793,7 @@ def test_grid_image_representations(self):
         for i in range(len(actions)):
             # action = env.action_space.sample()
             action = actions[i]
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.get_augmented_state()["augmented_state"][-1]
             print("sars', done =", state, action, reward, next_state, done)
             state = next_state.copy()
@@ -816,7 +816,7 @@ def test_grid_image_representations(self):
         for i in range(4):
             # action = env.action_space.sample()
             action = [0, 1]
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.get_augmented_state()["augmented_state"][-1]
             print("sars', done =", state, action, reward, next_state, done)
             state = next_state.copy()
@@ -826,7 +826,7 @@ def test_grid_image_representations(self):
         assert state == [5, 7], str(state)
         # test_ = np.allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)
         # self.assertAlmostEqual(state, np.array([21.59339006, 20.68189965, 21.49608203, 20.19183292]), places=3) # Error
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test 2: Almost the same as above, but with make_denser
@@ -848,7 +848,7 @@ def test_grid_image_representations(self):
         tot_rew = 0
         for i in range(len(actions)):
             action = actions[i]
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.get_augmented_state()["augmented_state"][-1]
             print("sars', done =", state, action, reward, next_state, done)
             state = next_state.copy()
@@ -856,7 +856,7 @@ def test_grid_image_representations(self):
 
         assert tot_rew == 6.0, str(tot_rew)
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test 3: Almost the same as 2, but with terminal states
@@ -885,7 +885,7 @@ def test_grid_image_representations(self):
         tot_rew = 0
         for i in range(len(actions)):
             action = actions[i]
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.get_augmented_state()["augmented_state"][-1]
             print("sars', done =", state, action, reward, next_state, done)
             state = next_state.copy()
@@ -893,7 +893,7 @@ def test_grid_image_representations(self):
 
         assert tot_rew == 5.5, str(tot_rew)
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test 4: Almost the same as 3, but with irrelevant features
@@ -922,7 +922,7 @@ def test_grid_image_representations(self):
         tot_rew = 0
         for i in range(len(actions)):
             action = actions[i] + [0, 0]
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.curr_state
             print("sars', done =", state, action, reward, next_state, done)
             state = next_state.copy()
@@ -943,7 +943,7 @@ def test_grid_image_representations(self):
 
         for i in range(len(actions)):
             action = [0, 0] + actions[i]
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.curr_state
             print("sars', done =", state, action, reward, next_state, done)
             state = next_state.copy()
@@ -951,7 +951,7 @@ def test_grid_image_representations(self):
 
         assert tot_rew == 0.5, str(tot_rew)
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test 5: With transition noise
@@ -981,7 +981,7 @@ def test_grid_image_representations(self):
         tot_rew = 0
         for i in range(len(actions)):
             action = actions[i] + [0, 0]
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.curr_state
             print("sars', done =", state, action, reward, next_state, done)
             state = next_state.copy()
@@ -994,7 +994,7 @@ def test_grid_image_representations(self):
 
         assert tot_rew == 2.75, str(tot_rew)
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_grid_env(self):
@@ -1038,7 +1038,7 @@ def test_grid_env(self):
         tot_rew = 0
         for i in range(len(actions)):
             action = actions[i]
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.get_augmented_state()["augmented_state"][-1]
             print("sars', done =", state, action, reward, next_state, done)
             self.assertEqual(
@@ -1054,7 +1054,7 @@ def test_grid_env(self):
 
         assert tot_rew == 8.25, str(tot_rew)
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test 2: Almost the same as 1, but with irrelevant features and no terminal reward
@@ -1081,7 +1081,7 @@ def test_grid_env(self):
         tot_rew = 0
         for i in range(len(actions)):
             action = actions[i] + [0, 0]
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.get_augmented_state()["augmented_state"][-1]
             print("sars', done =", state, action, reward, next_state, done)
             self.assertEqual(
@@ -1098,7 +1098,7 @@ def test_grid_env(self):
         # Perform actions only in irrelevant space and noop in relevant space
         for i in range(len(actions)):
             action = [0, 0] + actions[i]
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.get_augmented_state()["augmented_state"][-1]
             print("sars', done =", state, action, reward, next_state, done)
             state = next_state.copy()
@@ -1106,7 +1106,7 @@ def test_grid_env(self):
 
         assert tot_rew == 9, str(tot_rew)
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Test 3: Almost the same as 1, but with delay
@@ -1136,7 +1136,7 @@ def test_grid_env(self):
         tot_rew = 0
         for i in range(len(actions)):
             action = actions[i]
-            next_obs, reward, done, info = env.step(action)
+            next_obs, reward, done, trunc, info = env.step(action)
             next_state = env.get_augmented_state()["augmented_state"][-1]
             print("sars', done =", state, action, reward, next_state, done)
             self.assertEqual(
@@ -1152,7 +1152,7 @@ def test_grid_env(self):
 
         assert tot_rew == 6.75, str(tot_rew)
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_discrete_dynamics(self):
@@ -1185,7 +1185,7 @@ def test_discrete_dynamics(self):
         )  # TODO Move this and the test_continuous_dynamics type checks to separate unit tests
 
         action = 2
-        next_state, reward, done, info = env.step(action)
+        next_state, reward, done, trunc, info = env.step(action)
         print("sars', done =", state, action, reward, next_state, done)
         self.assertEqual(
             next_state,
@@ -1195,7 +1195,7 @@ def test_discrete_dynamics(self):
         state = next_state
 
         action = 4
-        next_state, reward, done, info = env.step(action)
+        next_state, reward, done, trunc, info = env.step(action)
         print("sars', done =", state, action, reward, next_state, done)
         self.assertEqual(
             next_state,
@@ -1205,7 +1205,7 @@ def test_discrete_dynamics(self):
         state = next_state
 
         action = 1
-        next_state, reward, done, info = env.step(action)
+        next_state, reward, done, trunc, info = env.step(action)
         print("sars', done =", state, action, reward, next_state, done)
         self.assertEqual(
             next_state,
@@ -1221,7 +1221,7 @@ def test_discrete_dynamics(self):
 
         # Try a random action to see that terminal state leads back to same terminal state
         action = env.action_space.sample()
-        next_state, reward, done, info = env.step(action)
+        next_state, reward, done, trunc, info = env.step(action)
         print("sars', done =", state, action, reward, next_state, done)
         self.assertEqual(
             next_state,
@@ -1230,7 +1230,7 @@ def test_discrete_dynamics(self):
         )
         state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_discrete_reward_delay(self):
@@ -1269,13 +1269,13 @@ def test_discrete_reward_delay(self):
             5,
             2,
             3,
-            np.random.randint(config["action_space_size"]),
+            np.random.default_rng().integers(config["action_space_size"]),
             4,
         ]  # 2nd last action is random just to check that last delayed reward works with any action
         expected_rewards = [0, 0, 0, 1, 1, 0, 1, 0, 0]
         expected_states = [0, 2, 2, 5, 2, 5, 5, 0, 6]
         for i in range(len(expected_rewards)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             self.assertEqual(
                 reward,
@@ -1289,7 +1289,7 @@ def test_discrete_reward_delay(self):
             # for 2nd last time step due to random action.
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_discrete_rewardable_sequences(self):
@@ -1327,12 +1327,12 @@ def test_discrete_rewardable_sequences(self):
             3,
             4,
             2,
-            np.random.randint(config["action_space_size"]),
+            np.random.default_rng().integers(config["action_space_size"]),
             5,
         ]  #
         expected_rewards = [0, 0, 1, 0, 1, 0, 0, 0]
         for i in range(len(expected_rewards)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             self.assertEqual(
                 reward,
@@ -1343,7 +1343,7 @@ def test_discrete_rewardable_sequences(self):
             )
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_discrete_p_noise(self):
@@ -1374,7 +1374,7 @@ def test_discrete_p_noise(self):
         env = RLToyEnv(**config)
         state = env.get_augmented_state()["curr_state"]
 
-        actions = [6, 6, 2, np.random.randint(config["action_space_size"])]  #
+        actions = [6, 6, 2, np.random.default_rng().integers(config["action_space_size"])]  #
         expected_states = [
             2,
             6,
@@ -1382,7 +1382,7 @@ def test_discrete_p_noise(self):
             3,
         ]  # Last state 3 is fixed for this test because of fixed seed for Env which selects the next noisy state.
         for i in range(len(actions)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             self.assertEqual(
                 next_state,
@@ -1393,7 +1393,7 @@ def test_discrete_p_noise(self):
             )
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_discrete_r_noise(self):
@@ -1432,7 +1432,7 @@ def test_discrete_r_noise(self):
             0.086749,
         ]  # 2nd state produces 'true' reward
         for i in range(len(actions)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             np.testing.assert_allclose(
                 reward,
@@ -1445,7 +1445,7 @@ def test_discrete_r_noise(self):
 
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     # TODO Test for make_denser; also one for creating multiple instances of an Env with the same config dict (can lead to issues because the dict is shared as I found with Ray's A3C imple.)
@@ -1493,7 +1493,7 @@ def test_discrete_multiple_meta_features(self):
             3,
             4,
             2,
-            np.random.randint(config["action_space_size"]),
+            np.random.default_rng().integers(config["action_space_size"]),
             5,
         ]  #
         expected_rewards = [0, 0, 0, 1, 0, 1, 0, 0]
@@ -1512,7 +1512,7 @@ def test_discrete_multiple_meta_features(self):
                 expected_rewards[i] + expected_reward_noises[i]
             ) * config["reward_scale"] + config["reward_shift"]
         for i in range(len(expected_rewards)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             np.testing.assert_allclose(
                 reward,
@@ -1524,7 +1524,7 @@ def test_discrete_multiple_meta_features(self):
             )
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     # Commented out the following 2 tests after changing implementation of
@@ -1566,12 +1566,12 @@ def test_discrete_multiple_meta_features(self):
     #     actions = [[1, 1, 0], [0, 1, 0], [1, 0 ,1], [1, 0 ,0], [1, 0, 1], [0, 1, 0], [0, 1, 1], [0, 0, 1], [1, 0, 0]]
     #     expected_rewards = [0, 0, 0, 1, 1, 0, 1, 0, 0]
     #     for i in range(len(expected_rewards)):
-    #         next_state, reward, done, info = env.step(actions[i])
+    #         next_state, reward, done, trunc, info = env.step(actions[i])
     #         print("sars', done =", state, actions[i], reward, next_state, done)
     #         self.assertEqual(reward, expected_rewards[i], "Expected reward mismatch in time step: " + str(i + 1) + " when reward delay = 3.")
     #         state = next_state
     #
-    #     env.reset()
+    #     env.reset()[0]
     #     env.close()
 
     # def test_discrete_multi_discrete_irrelevant_dimensions(self):
@@ -1615,12 +1615,12 @@ def test_discrete_multiple_meta_features(self):
     #         actions = [[1, 1, 0], [0, 1, 0], [1, 0 ,1], [1, 0 ,0], [1, 0, 1], [0, 1, 0], [0, 1, 1], [0, 0, 1], [1, 0, 0]]
     #         expected_rewards = [0, 0, 0, 0, 1, 1, 0, 1, 0]
     #         for i in range(len(expected_rewards)):
-    #             next_state, reward, done, info = env.step(actions[i])
+    #             next_state, reward, done, trunc, info = env.step(actions[i])
     #             print("sars', done =", state, actions[i], reward, next_state, done)
     #             self.assertEqual(reward, expected_rewards[i], "Expected reward mismatch in time step: " + str(i + 1) + " when reward delay = 3.")
     #             state = next_state
     #
-    #         env.reset()
+    #         env.reset()[0]
     #         env.close()
     #
     #     except AssertionError as e:
@@ -1636,13 +1636,13 @@ def test_discrete_multiple_meta_features(self):
     #     expected_rewards = [0, 0, 0, 1, 1, 0, 1, 0, 0]
     #     expected_states = [[0, 0, 0, 3], [0, 1, 0, 1], [0, 1, 0, 1], [1, 0, 1, 3], [0, 1, 0, 2], [1, 0, 1, 0], [1, 0, 1, 1], [0, 0, 0, 4], [1, 0, 0, 2]]
     #     for i in range(len(expected_rewards)):
-    #         next_state, reward, done, info = env.step(actions[i])
+    #         next_state, reward, done, trunc, info = env.step(actions[i])
     #         print("sars', done =", state, actions[i], reward, next_state, done)
     #         self.assertEqual(reward, expected_rewards[i], "Expected reward mismatch in time step: " + str(i + 1) + " when reward delay = 3.")
     #         self.assertEqual(state, expected_states[i], "Expected state mismatch in time step: " + str(i + 1) + " when reward delay = 3.")
     #         state = next_state
     #
-    #     env.reset()
+    #     env.reset()[0]
     #     env.close()
 
     # Test: This test lets even irrelevant dimensions be multi-dimensional
@@ -1657,13 +1657,13 @@ def test_discrete_multiple_meta_features(self):
     # expected_rewards = [0, 0, 0, 1, 1, 0, 1, 0, 0]
     # expected_states = [[0, 0, 0, 0, 3], [0, 1, 0, 0, 1], [0, 1, 0, 0, 1], [1, 0, 1, 0, 3], [0, 1, 0, 0, 2], [1, 0, 1, 0, 0], [1, 0, 1, 0, 1], [0, 0, 0, 0, 4], [1, 0, 0, 0, 2]]
     # for i in range(len(expected_rewards)):
-    #     next_state, reward, done, info = env.step(actions[i])
+    #     next_state, reward, done, trunc, info = env.step(actions[i])
     #     print("sars', done =", state, actions[i], reward, next_state, done)
     #     self.assertEqual(reward, expected_rewards[i], "Expected reward mismatch in time step: " + str(i + 1) + " when reward delay = 3.")
     #     self.assertEqual(state, expected_states[i], "Expected state mismatch in time step: " + str(i + 1) + " when reward delay = 3.")
     #     state = next_state
     #
-    # env.reset()
+    # env.reset()[0]
     # env.close()
 
     def test_discrete_irr_features(self):
@@ -1694,11 +1694,11 @@ def test_discrete_irr_features(self):
         state = env.get_augmented_state()["curr_state"]
 
         actions = [[7, 0], [5, 0], [5, 0], [1, 2]] + [
-            [5, np.random.randint(config["action_space_size"][1])]
+            [5, np.random.default_rng().integers(config["action_space_size"][1])]
         ] * 5
         expected_rewards = [0, 1, 0, 1, 0, 0, 0, 0, 0]
         for i in range(len(expected_rewards)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             self.assertEqual(
                 reward,
@@ -1710,7 +1710,7 @@ def test_discrete_irr_features(self):
             )
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_discrete_image_representations(self):
@@ -1758,7 +1758,7 @@ def test_discrete_image_representations(self):
             3,
             4,
             2,
-            np.random.randint(config["action_space_size"]),
+            np.random.default_rng().integers(config["action_space_size"]),
             5,
         ]  #
         expected_rewards = [0, 0, 0, 1, 0, 1, 0, 0]
@@ -1782,7 +1782,7 @@ def test_discrete_image_representations(self):
                 expected_rewards[i] + expected_reward_noises[i]
             ) * config["reward_scale"] + config["reward_shift"]
         for i in range(len(expected_rewards)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             assert next_state.shape == (
                 100,
                 100,
@@ -1813,7 +1813,7 @@ def test_discrete_image_representations(self):
             )
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_discrete_reward_every_n_steps(self):
@@ -1854,12 +1854,12 @@ def test_discrete_reward_every_n_steps(self):
             6,
             1,
             0,
-            np.random.randint(config["action_space_size"]),
+            np.random.default_rng().integers(config["action_space_size"]),
             5,
         ]  #
         expected_rewards = [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0]
         for i in range(len(expected_rewards)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             self.assertEqual(
                 reward,
@@ -1870,7 +1870,7 @@ def test_discrete_reward_every_n_steps(self):
             )
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # With delay
@@ -1891,12 +1891,12 @@ def test_discrete_reward_every_n_steps(self):
             6,
             1,
             0,
-            np.random.randint(config["action_space_size"]),
+            np.random.default_rng().integers(config["action_space_size"]),
             5,
         ]  #
         expected_rewards = [0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
         for i in range(len(expected_rewards)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             self.assertEqual(
                 reward,
@@ -1907,7 +1907,7 @@ def test_discrete_reward_every_n_steps(self):
             )
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # With delay >= sequence length
@@ -1928,12 +1928,12 @@ def test_discrete_reward_every_n_steps(self):
             6,
             1,
             0,
-            np.random.randint(config["action_space_size"]),
+            np.random.default_rng().integers(config["action_space_size"]),
             5,
         ]  #
         expected_rewards = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
         for i in range(len(expected_rewards)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             self.assertEqual(
                 reward,
@@ -1944,7 +1944,7 @@ def test_discrete_reward_every_n_steps(self):
             )
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_discrete_custom_P_R(self):
@@ -1965,9 +1965,9 @@ def test_discrete_custom_P_R(self):
         config["reward_scale"] = 2.0
 
         config["use_custom_mdp"] = True
-        np.random.seed(0)  # seed
-        config["transition_function"] = np.random.randint(8, size=(8, 5))
-        config["reward_function"] = np.random.randint(4, size=(8, 5))
+        # np.random.seed(0)  # seed
+        config["transition_function"] = np.random.default_rng().integers(8, size=(8, 5))
+        config["reward_function"] = np.random.default_rng().integers(4, size=(8, 5))
         config["init_state_dist"] = np.array([1 / 8 for i in range(8)])
 
         env = RLToyEnv(**config)
@@ -1983,12 +1983,12 @@ def test_discrete_custom_P_R(self):
             4,
             1,
             0,
-            np.random.randint(config["action_space_size"]),
+            np.random.default_rng(0).integers(config["action_space_size"]),
             4,
         ]  #
         expected_rewards = [0, 0, 6, 4, 4, 0, 4, 6, 6, 2, 0]
         for i in range(len(expected_rewards)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             self.assertEqual(
                 reward,
@@ -1997,13 +1997,13 @@ def test_discrete_custom_P_R(self):
             )
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # np.random.seed(0) #seed
         config["delay"] = 2
-        P = np.random.randint(8, size=(8, 5))
-        R = np.random.randint(4, size=(8, 5))
+        P = np.random.default_rng().integers(8, size=(8, 5))
+        R = np.random.default_rng().integers(4, size=(8, 5))
         config["transition_function"] = lambda s, a: P[s, a]
         config["reward_function"] = lambda s, a: R[s[-2], a]
         config["init_state_dist"] = np.array([1 / 8 for i in range(8)])
@@ -2021,12 +2021,12 @@ def test_discrete_custom_P_R(self):
             4,
             1,
             0,
-            np.random.randint(config["action_space_size"]),
+            np.random.default_rng().integers(config["action_space_size"]),
             4,
         ]  #
         expected_rewards = [0, 0, 0, 2, 2, 0, 0, 6, 0, 4, 6]
         for i in range(len(expected_rewards)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             self.assertEqual(
                 reward,
@@ -2035,7 +2035,7 @@ def test_discrete_custom_P_R(self):
             )
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_continuous_custom_P_R(self):
@@ -2054,7 +2054,7 @@ def test_continuous_custom_P_R(self):
         config["delay"] = 1
 
         config["use_custom_mdp"] = True
-        np.random.seed(0)  # seed
+        # np.random.seed(0)  # seed
         config["transition_function"] = lambda s, a: s + a
         config["reward_function"] = lambda s, a: s[-2][0]
         # config["init_state_dist"] = np.array([1 / 8 for i in range(8)])
@@ -2074,7 +2074,7 @@ def test_continuous_custom_P_R(self):
             -1.564964,
         ]  # , -0.564964]
         for i in range(len(expected_rewards)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             np.testing.assert_allclose(
                 reward,
@@ -2084,7 +2084,7 @@ def test_continuous_custom_P_R(self):
             )
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     # def test_discrete_imaginary_rollouts(self):
@@ -2119,13 +2119,13 @@ def test_continuous_custom_P_R(self):
     #     expected_rewards = [0, 0, 0, 0, 0, 0]#, 1, 0, 0]
     #     expected_states = [9, 2, 4, 5, 8, 9] # [2, 4, 5, 8, 9] is a rewardable sequence. init state is 9 and action 0 leads to state 2.
     #     for i in range(len(expected_rewards)):
-    #         next_state, reward, done, info = env.step(actions[i])
+    #         next_state, reward, done, trunc, info = env.step(actions[i])
     #         print("sars', done =", state, actions[i], reward, next_state, done)
     #         self.assertEqual(reward, expected_rewards[i], "Expected reward mismatch in time step: " + str(i + 1) + " when reward delay = " + str(config["delay"]))
     #         self.assertEqual(state, expected_states[i], "Expected state mismatch in time step: " + str(i + 1) + " when reward delay = " + str(config["delay"]))
     #         state = next_state
     #
-    #     env.reset()
+    #     env.reset()[0]
     #     env.close()
 
     def test_discrete_r_dist(self):
@@ -2163,7 +2163,7 @@ def test_discrete_r_dist(self):
             1.424395,
         ]  # 1st, 3rd and 4th states produce 'true' rewards, every reward has been shifted by 1
         for i in range(len(actions)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             np.testing.assert_allclose(
                 reward,
@@ -2176,7 +2176,7 @@ def test_discrete_r_dist(self):
 
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     def test_discrete_diameter(self):
@@ -2248,7 +2248,7 @@ def test_discrete_diameter(self):
             1,
         ]  # 1st, 3rd and 4th states produce 'true' rewards, every reward has been shifted by 1
         for i in range(len(actions)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             np.testing.assert_allclose(
                 reward,
@@ -2262,7 +2262,7 @@ def test_discrete_diameter(self):
 
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
         # Sub-test 2 Have sequence length greater than the diameter and check selected rewardable sequences
@@ -2333,7 +2333,7 @@ def test_discrete_diameter(self):
             0,
         ]  # 1st, 3rd and 4th states produce 'true' rewards, every reward has been shifted by 1
         for i in range(len(actions)):
-            next_state, reward, done, info = env.step(actions[i])
+            next_state, reward, done, trunc, info = env.step(actions[i])
             print("sars', done =", state, actions[i], reward, next_state, done)
             np.testing.assert_allclose(
                 reward,
@@ -2347,7 +2347,7 @@ def test_discrete_diameter(self):
 
             state = next_state
 
-        env.reset()
+        env.reset()[0]
         env.close()
 
     # Unit tests
diff --git a/tests/test_run_experiments.py b/tests/test_run_experiments.py
index a427c06..226993e 100644
--- a/tests/test_run_experiments.py
+++ b/tests/test_run_experiments.py
@@ -71,7 +71,7 @@ def test_dqn_test_expt(self):
     #     from glob import glob
     #     expt_list = glob("experiments/*.py")
 
-    #     # sel_expt_list = np.random.randint(0, len(expt_list), 10)
+    #     # sel_expt_list = np.random.integers(0, len(expt_list), 10)
     #     expt_list = np.random.permutation(expt_list)
     #     for i in range(2):
     #         conf_file = expt_list[i]
diff --git a/tests/test_version.py b/tests/test_version.py
index 03d5fd6..7100a93 100644
--- a/tests/test_version.py
+++ b/tests/test_version.py
@@ -2,4 +2,4 @@
 
 
 def test_version():
-    assert __version__ == "0.0.2"
+    assert __version__ == "1.0.0"