Skip to content

Commit

Permalink
fixed bug in behavPolicy testing. Added metrics and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
joshuaspear committed Jan 25, 2024
1 parent 6bb313b commit 0238094
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 9 deletions.
18 changes: 16 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# offline_rl_ope (BETA RELEASE)

**WARNING: Weighted importance sampling was incorrectly implemented in versions 1.X.X and 2.1.X, 2.2.X**
**WARNING: Unit testing currently only running in Python 3.11. 3.10 will be supported in the future**
**WARNING**
- Weighted importance sampling was incorrectly implemented in versions 1.X.X and 2.1.X, 2.2.X
- Unit testing currently only running in Python 3.11. 3.10 will be supported in the future

**IMPORTANT: THIS IS A BETA RELEASE. FUNCTIONALITY IS STILL BEING TESTED** Feedback/contributions are welcome :)

Expand All @@ -14,6 +15,10 @@
- [x] utils.py
- [x] DirectMethod.py*
- [x] DoublyRobust.py
- [x] Metrics
- [x] EffectiveSampleSize.py
- [x] ValidWeightsProp.py
- [ ] PropensityModels
- [ ] LowerBounds
- [ ] api/d3rlpy

Expand Down Expand Up @@ -82,6 +87,15 @@ If importance sampling based methods are evaluating to 0, consider visualising t
The different kinds of importance samples can also be visualised by querying the ```traj_is_weights``` attribute of a given ```ImportanceSampler``` object. If for example, vanilla importance sampling is being used and the samples are not ```NaN``` or ```Inf``` then visualising the ```traj_is_weights``` may provide insight. In particular, IS weights will tend to inifinity when the evaluation policy places large density on an action in comparison to the behaviour policy.

### Release log
#### 4.0.0
* Predefined propensity models including:
* Generic feedforward MLP for continuous and discrete action spaces built in PyTorch
* xGBoost for continuous and discrete action spaces built in sklearn
* Both PyTorch and sklearn models can handle space discrete actions spaces i.e., a propensity model can be exposed to 'new' actions provided the full action space definition is provided at the training time of the propensity model
* Metrics pattern with:
* Effective sample size calculation
* Proportion of valid weights i.e., the mean proportion of weights between a min and max value across trajectories
* Refactored the BehavPolicy class to accept a 'policy_func' that aligns with the other policy classes
#### 3.0.3
* 3.10 support
#### 3.0.2
Expand Down
21 changes: 21 additions & 0 deletions src/offline_rl_ope/Metrics/EffectiveSampleSize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import torch

from ..components.ImportanceSampler import ImportanceSampler

__all__ = [
"EffectiveSampleSize"
]

class EffectiveSampleSize:

def __init__(self, is_obj:ImportanceSampler) -> None:
self.__is_obj = is_obj

def __ess(self) -> float:
numer = torch.sum(torch.pow(self.__is_obj.traj_is_weights,2))
denom = torch.pow(torch.sum(self.__is_obj.traj_is_weights),2)
return (numer/denom).item()


def __call__(self) -> float:
return self.__ess()
31 changes: 31 additions & 0 deletions src/offline_rl_ope/Metrics/ValidWeightsProp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import torch

from ..components.ImportanceSampler import ImportanceSampler

__all__ = [
"ValidWeightsProp"
]

class ValidWeightsProp:

def __init__(
self,
is_obj:ImportanceSampler,
min_w:float,
max_w:float
) -> None:
self.__is_obj = is_obj
self.__min_w = min_w
self.__max_w = max_w

def __valid_weights(self) -> float:
vw_mask = (
(self.__is_obj.traj_is_weights > self.__min_w) &
(self.__is_obj.traj_is_weights < self.__max_w)
)
vw_num = torch.sum(vw_mask, axis=1)
vw_denom = torch.sum(self.__is_obj.is_weight_calc.weight_msk, axis=1)
return torch.mean(vw_num/vw_denom).item()

def __call__(self) -> float:
return self.__valid_weights()
2 changes: 2 additions & 0 deletions src/offline_rl_ope/Metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .EffectiveSampleSize import *
from .ValidWeightsProp import *
2 changes: 1 addition & 1 deletion src/offline_rl_ope/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.0.3"
__version__ = "4.0.0"
2 changes: 1 addition & 1 deletion src/offline_rl_ope/components/Policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __call__(self, state:torch.Tensor, action:torch.Tensor)->torch.Tensor:

class BehavPolicy(Policy):

def __init__(self, policy_func, collect_res:bool=False,
def __init__(self, policy_func:Callable, collect_res:bool=False,
collect_act:bool=False) -> None:
super().__init__(policy_func, collect_res=collect_res,
collect_act=collect_act)
Expand Down
Empty file added tests/Metrics/__init__.py
Empty file.
26 changes: 26 additions & 0 deletions tests/Metrics/test_EffectiveSampleSize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import unittest
import torch
import logging
import numpy as np
import copy
from offline_rl_ope.Metrics import EffectiveSampleSize
from ..base import weight_test_res

logger = logging.getLogger("offline_rl_ope")

class TestImportanceSampler:

def __init__(self) -> None:
self.is_weight_calc = None
self.traj_is_weights = weight_test_res


class EffectiveSampleSizeTest(unittest.TestCase):

def test_call(self):
num = torch.sum(torch.pow(weight_test_res,2))
denum = torch.pow(torch.sum(weight_test_res),2)
act_res = (num/denum).item()
metric = EffectiveSampleSize(is_obj=TestImportanceSampler())
pred_res = metric()
self.assertEqual(act_res,pred_res)
39 changes: 39 additions & 0 deletions tests/Metrics/test_ValidWeightsProp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import unittest
import torch
import logging
import numpy as np
import copy
from offline_rl_ope.Metrics import ValidWeightsProp
from ..base import weight_test_res, msk_test_res

logger = logging.getLogger("offline_rl_ope")


class TestImportanceCalc:

def __init__(self) -> None:
self.weight_msk = msk_test_res

class TestImportanceSampler:

def __init__(self) -> None:
self.is_weight_calc = None
self.traj_is_weights = weight_test_res
self.is_weight_calc = TestImportanceCalc()

class TestValidWeightsProp(unittest.TestCase):

def test_call(self):
max_val=10000
min_val=0.000001
num = (weight_test_res > min_val) & (weight_test_res < max_val)
num = torch.sum(num, axis=1)
denum = torch.sum(msk_test_res, axis=1)
act_res = torch.mean(num/denum).item()
metric = ValidWeightsProp(
is_obj=TestImportanceSampler(),
max_w=max_val,
min_w=min_val
)
pred_res = metric()
self.assertEqual(act_res,pred_res)
12 changes: 7 additions & 5 deletions tests/components/test_Policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,20 @@ def __init__(self) -> None:
class BehavPolicyTest(unittest.TestCase):

def setUp(self) -> None:
def __mock_return(dep_vals, indep_vals):
def __mock_return(y, x):
lkp = {
"_".join([str(np.array(state).astype(float)),
str(np.array(act).astype(float))]): np.array(probs)
for state,act,probs in zip(
test_state_vals, test_action_vals,
test_action_probs)
}
return lkp["_".join([str(indep_vals),str(dep_vals)])]
policy_class = MockPolicyClass()
policy_class.eval_pdf = MagicMock(side_effect=__mock_return)
self.policy = BehavPolicy(policy_class)
return lkp["_".join([str(x),str(y)])]
#policy_class = MockPolicyClass()
#policy_class.__call__ = MagicMock(side_effect=__mock_return)
#self.policy = BehavPolicy(policy_class)
self.policy = BehavPolicy(
policy_func=MagicMock(side_effect=__mock_return))


def test___call__(self):
Expand Down

0 comments on commit 0238094

Please sign in to comment.