diff --git a/addons/python/cfg/pendulum_sarsa.yaml b/addons/python/cfg/pendulum_sarsa.yaml new file mode 100644 index 00000000..8550bb24 --- /dev/null +++ b/addons/python/cfg/pendulum_sarsa.yaml @@ -0,0 +1,67 @@ +experiment: + type: experiment/online_learning + runs: 1 + trials: 1000 + steps: 0 + rate: 0 + test_interval: 10 + environment: + type: environment/modeled + model: + type: model/dynamical + control_step: 0.03 + integration_steps: 5 + dynamics: + type: dynamics/pendulum + task: + type: task/pendulum/swingup + timeout: 2.99 + randomization: 0 + agent: + type: agent/td + policy: + type: mapping/policy/discrete/value/q + discretizer: + type: discretizer/uniform + min: experiment/environment/task/action_min + max: experiment/environment/task/action_max + steps: [3] + projector: + type: projector/tile_coding + tilings: 16 + memory: 8388608 + safe: 0 + resolution: [0.31415, 3.1415, 3] + wrapping: [6.283, 0, 0] + representation: + type: representation/parameterized/linear + init_min: [0] + init_max: [1] + memory: experiment/agent/policy/projector/memory + outputs: 1 + output_min: [] + output_max: [] + sampler: + type: sampler/epsilon_greedy + rand_max: 0 + epsilon: 0.05 + predictor: + type: predictor/critic/sarsa + alpha: 0.2 + gamma: 0.97 + lambda: 0.65 + projector: experiment/agent/policy/projector + representation: experiment/agent/policy/representation + trace: + type: trace/enumerated/replacing + test_agent: + type: agent/fixed + policy: + type: mapping/policy/discrete/value/q + discretizer: experiment/agent/policy/discretizer + projector: experiment/agent/policy/projector + representation: experiment/agent/policy/representation + sampler: + type: sampler/greedy + rand_max: 0 + save_every: never diff --git a/addons/python/share/experiment_example.py b/addons/python/share/experiment_example.py new file mode 100755 index 00000000..0cf52030 --- /dev/null +++ b/addons/python/share/experiment_example.py @@ -0,0 +1,14 @@ +#!/usr/bin/python3 +# +# GRL experiment example + +import gym +import grlpy + +# Create GRL agent +conf = grlpy.Configurator("../cfg/pendulum_sarsa.yaml") +inst = conf.instantiate() +exp = grlpy.Experiment(inst["experiment"]) +exp.run() +exp.reset() +exp.run() diff --git a/addons/python/src/grlpy.cpp b/addons/python/src/grlpy.cpp index 49114da2..8a02edec 100644 --- a/addons/python/src/grlpy.cpp +++ b/addons/python/src/grlpy.cpp @@ -94,6 +94,9 @@ PYBIND11_MODULE(grlpy, m) { mycfg.set(item.first.attr("__str__")().cast(), item.second.attr("__str__")().cast()); conf.reconfigure(mycfg); + }) + .def("reset", [](Configurable &conf) { + conf.reset(); }); // Experiment diff --git a/externals/pybind11/include/pybind11/detail/class.h b/externals/pybind11/include/pybind11/detail/class.h index 7a5dd013..060c0a80 100644 --- a/externals/pybind11/include/pybind11/detail/class.h +++ b/externals/pybind11/include/pybind11/detail/class.h @@ -222,7 +222,9 @@ inline bool deregister_instance_impl(void *ptr, instance *self) { auto ®istered_instances = get_internals().registered_instances; auto range = registered_instances.equal_range(ptr); for (auto it = range.first; it != range.second; ++it) { - if (Py_TYPE(self) == Py_TYPE(it->second)) { + // #2252 / 2b6b98e + //if (Py_TYPE(self) == Py_TYPE(it->second)) { + if (self == it->second) { registered_instances.erase(it); return true; }