-
Notifications
You must be signed in to change notification settings - Fork 311
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This adds the following primitives to torch: - DiscreteQFDerivedPolicy - DiscreteMLPQFunction
- Loading branch information
Showing
13 changed files
with
317 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
"""A Discrete QFunction-derived policy. | ||
This policy chooses the action that yields to the largest Q-value. | ||
""" | ||
import numpy as np | ||
import torch | ||
|
||
from garage.torch.policies.policy import Policy | ||
|
||
|
||
class DiscreteQFArgmaxPolicy(Policy): | ||
"""Policy that derives its actions from a learned Q function. | ||
The action returned is the one that yields the highest Q value for | ||
a given state, as determined by the supplied Q function. | ||
Args: | ||
qf (object): Q network. | ||
env_spec (EnvSpec): Environment specification. | ||
name (str): Name of this policy. | ||
""" | ||
|
||
def __init__(self, qf, env_spec, name='DiscreteQFArgmaxPolicy'): | ||
super().__init__(env_spec, name) | ||
self._qf = qf | ||
|
||
# pylint: disable=arguments-differ | ||
def forward(self, observations): | ||
"""Get actions corresponding to a batch of observations. | ||
Args: | ||
observations(torch.Tensor): Batch of observations of shape | ||
:math:`(N, O)`. Observations should be flattened even | ||
if they are images as the underlying Q network handles | ||
unflattening. | ||
Returns: | ||
torch.Tensor: Batch of actions of shape :math:`(N, A)` | ||
""" | ||
qs = self._qf(observations) | ||
return torch.argmax(qs, dim=1) | ||
|
||
def get_action(self, observation): | ||
"""Get a single action given an observation. | ||
Args: | ||
observation (np.ndarray): Observation with shape :math:`(O, )`. | ||
Returns: | ||
torch.Tensor: Predicted action with shape :math:`(A, )`. | ||
dict: Empty since this policy does not produce a distribution. | ||
""" | ||
act, dist = self.get_actions(np.expand_dims(observation, axis=0)) | ||
return act[0], dist | ||
|
||
def get_actions(self, observations): | ||
"""Get actions given observations. | ||
Args: | ||
observations (np.ndarray): Batch of observations, should | ||
have shape :math:`(N, O)`. | ||
Returns: | ||
torch.Tensor: Predicted actions. Tensor has shape :math:`(N, A)`. | ||
dict: Empty since this policy does not produce a distribution. | ||
""" | ||
with torch.no_grad(): | ||
return self(torch.Tensor(observations)).numpy(), dict() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
"""This modules creates a continuous Q-function network.""" | ||
|
||
from torch import nn | ||
from torch.nn import functional as F | ||
|
||
from garage.torch.modules import MLPModule | ||
|
||
|
||
# pytorch v1.6 issue, see https://github.com/pytorch/pytorch/issues/42305 | ||
# pylint: disable=abstract-method | ||
class DiscreteMLPQFunction(MLPModule): | ||
"""Implements a discrete MLP Q-value network. | ||
It predicts the Q-value for all possible actions based on the | ||
input state. | ||
Args: | ||
env_spec (EnvSpec): Environment specification. | ||
hidden_sizes (list[int]): Output dimension of dense layer(s). | ||
For example, (32, 32) means this MLP consists of two | ||
hidden layers, each with 32 hidden units. | ||
hidden_nonlinearity (callable or torch.nn.Module): Activation function | ||
for intermediate dense layer(s). It should return a torch.Tensor. | ||
Set it to None to maintain a linear activation. | ||
hidden_w_init (callable): Initializer function for the weight | ||
of intermediate dense layer(s). The function should return a | ||
torch.Tensor. | ||
hidden_b_init (callable): Initializer function for the bias | ||
of intermediate dense layer(s). The function should return a | ||
torch.Tensor. | ||
output_nonlinearity (callable or torch.nn.Module): Activation function | ||
for output dense layer. It should return a torch.Tensor. | ||
Set it to None to maintain a linear activation. | ||
output_w_init (callable): Initializer function for the weight | ||
of output dense layer(s). The function should return a | ||
torch.Tensor. | ||
output_b_init (callable): Initializer function for the bias | ||
of output dense layer(s). The function should return a | ||
torch.Tensor. | ||
layer_normalization (bool): Bool for using layer normalization or not. | ||
""" | ||
|
||
def __init__(self, | ||
env_spec, | ||
hidden_sizes, | ||
hidden_nonlinearity=F.relu, | ||
hidden_w_init=nn.init.xavier_normal_, | ||
hidden_b_init=nn.init.zeros_, | ||
output_nonlinearity=None, | ||
output_w_init=nn.init.xavier_normal_, | ||
output_b_init=nn.init.zeros_, | ||
layer_normalization=False): | ||
|
||
input_dim = env_spec.observation_space.flat_dim | ||
output_dim = env_spec.action_space.flat_dim | ||
super().__init__(input_dim, output_dim, hidden_sizes, | ||
hidden_nonlinearity, hidden_w_init, hidden_b_init, | ||
output_nonlinearity, output_w_init, output_b_init, | ||
layer_normalization) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.