fixed bug in behavPolicy testing. Added metrics and tests

joshuaspear · Jan 25, 2024 · 0238094 · 0238094
1 parent 6bb313b
commit 0238094
Show file tree

Hide file tree

Showing 10 changed files with 144 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,8 @@
 # offline_rl_ope (BETA RELEASE)
 
-**WARNING: Weighted importance sampling was incorrectly implemented in versions 1.X.X and 2.1.X, 2.2.X**
-**WARNING: Unit testing currently only running in Python 3.11. 3.10 will be supported in the future**
+**WARNING**
+- Weighted importance sampling was incorrectly implemented in versions 1.X.X and 2.1.X, 2.2.X
+- Unit testing currently only running in Python 3.11. 3.10 will be supported in the future
 
 **IMPORTANT: THIS IS A BETA RELEASE. FUNCTIONALITY IS STILL BEING TESTED** Feedback/contributions are welcome :) 
 
@@ -14,6 +15,10 @@
   - [x] utils.py
   - [x] DirectMethod.py*
   - [x] DoublyRobust.py
+- [x] Metrics
+  - [x] EffectiveSampleSize.py
+  - [x] ValidWeightsProp.py
+- [ ] PropensityModels
 - [ ] LowerBounds
 - [ ] api/d3rlpy
 
@@ -82,6 +87,15 @@ If importance sampling based methods are evaluating to 0, consider visualising t
 The different kinds of importance samples can also be visualised by querying the ```traj_is_weights``` attribute of a given ```ImportanceSampler``` object. If for example, vanilla importance sampling is being used and the samples are not ```NaN``` or ```Inf``` then visualising the ```traj_is_weights``` may provide insight. In particular, IS weights will tend to inifinity when the evaluation policy places large density on an action in comparison to the behaviour policy.
 
 ### Release log
+#### 4.0.0
+* Predefined propensity models including:
+  * Generic feedforward MLP for continuous and discrete action spaces built in PyTorch
+  * xGBoost for continuous and discrete action spaces built in sklearn
+  * Both PyTorch and sklearn models can handle space discrete actions spaces i.e., a propensity model can be exposed to 'new' actions provided the full action space definition is provided at the training time of the propensity model
+* Metrics pattern with:
+  * Effective sample size calculation
+  * Proportion of valid weights i.e., the mean proportion of weights between a min and max value across trajectories
+* Refactored the BehavPolicy class to accept a 'policy_func' that aligns with the other policy classes
 #### 3.0.3 
 * 3.10 support
 #### 3.0.2 

diff --git a/src/offline_rl_ope/Metrics/EffectiveSampleSize.py b/src/offline_rl_ope/Metrics/EffectiveSampleSize.py
@@ -0,0 +1,21 @@
+import torch
+
+from ..components.ImportanceSampler import ImportanceSampler
+
+__all__ = [
+    "EffectiveSampleSize"
+]
+
+class EffectiveSampleSize: 
+
+    def __init__(self, is_obj:ImportanceSampler) -> None:
+        self.__is_obj = is_obj
+
+    def __ess(self) -> float:
+        numer = torch.sum(torch.pow(self.__is_obj.traj_is_weights,2))
+        denom = torch.pow(torch.sum(self.__is_obj.traj_is_weights),2)
+        return (numer/denom).item()
+
+
+    def __call__(self) -> float:
+        return self.__ess()
diff --git a/src/offline_rl_ope/Metrics/ValidWeightsProp.py b/src/offline_rl_ope/Metrics/ValidWeightsProp.py
@@ -0,0 +1,31 @@
+import torch
+
+from ..components.ImportanceSampler import ImportanceSampler
+
+__all__ = [
+    "ValidWeightsProp"
+]
+
+class ValidWeightsProp: 
+
+    def __init__(
+        self, 
+        is_obj:ImportanceSampler, 
+        min_w:float, 
+        max_w:float
+        ) -> None:
+        self.__is_obj = is_obj
+        self.__min_w = min_w
+        self.__max_w = max_w
+
+    def __valid_weights(self) -> float:
+        vw_mask = (
+            (self.__is_obj.traj_is_weights > self.__min_w) & 
+            (self.__is_obj.traj_is_weights < self.__max_w)
+            )
+        vw_num = torch.sum(vw_mask, axis=1)
+        vw_denom = torch.sum(self.__is_obj.is_weight_calc.weight_msk, axis=1)
+        return torch.mean(vw_num/vw_denom).item()
+
+    def __call__(self) -> float:
+        return self.__valid_weights()
diff --git a/src/offline_rl_ope/Metrics/__init__.py b/src/offline_rl_ope/Metrics/__init__.py
@@ -0,0 +1,2 @@
+from .EffectiveSampleSize import *
+from .ValidWeightsProp import *
diff --git a/src/offline_rl_ope/_version.py b/src/offline_rl_ope/_version.py
@@ -1 +1 @@
-__version__ = "3.0.3"
+__version__ = "4.0.0"
diff --git a/src/offline_rl_ope/components/Policy.py b/src/offline_rl_ope/components/Policy.py
@@ -49,7 +49,7 @@ def __call__(self, state:torch.Tensor, action:torch.Tensor)->torch.Tensor:
 
 class BehavPolicy(Policy):
 
-    def __init__(self, policy_func, collect_res:bool=False, 
+    def __init__(self, policy_func:Callable, collect_res:bool=False, 
                  collect_act:bool=False) -> None:
         super().__init__(policy_func, collect_res=collect_res, 
                          collect_act=collect_act)

diff --git a/tests/Metrics/__init__.py b/tests/Metrics/__init__.py
diff --git a/tests/Metrics/test_EffectiveSampleSize.py b/tests/Metrics/test_EffectiveSampleSize.py
@@ -0,0 +1,26 @@
+import unittest
+import torch
+import logging
+import numpy as np
+import copy
+from offline_rl_ope.Metrics import EffectiveSampleSize
+from ..base import weight_test_res
+
+logger = logging.getLogger("offline_rl_ope")
+
+class TestImportanceSampler:
+
+    def __init__(self) -> None:
+        self.is_weight_calc = None
+        self.traj_is_weights = weight_test_res
+
+
+class EffectiveSampleSizeTest(unittest.TestCase):
+
+    def test_call(self):
+        num = torch.sum(torch.pow(weight_test_res,2))
+        denum = torch.pow(torch.sum(weight_test_res),2)
+        act_res = (num/denum).item()
+        metric = EffectiveSampleSize(is_obj=TestImportanceSampler())
+        pred_res = metric()
+        self.assertEqual(act_res,pred_res)
diff --git a/tests/Metrics/test_ValidWeightsProp.py b/tests/Metrics/test_ValidWeightsProp.py
@@ -0,0 +1,39 @@
+import unittest
+import torch
+import logging
+import numpy as np
+import copy
+from offline_rl_ope.Metrics import ValidWeightsProp
+from ..base import weight_test_res, msk_test_res
+
+logger = logging.getLogger("offline_rl_ope")
+
+
+class TestImportanceCalc:
+
+    def __init__(self) -> None:
+        self.weight_msk = msk_test_res
+
+class TestImportanceSampler:
+
+    def __init__(self) -> None:
+        self.is_weight_calc = None
+        self.traj_is_weights = weight_test_res
+        self.is_weight_calc = TestImportanceCalc()
+
+class TestValidWeightsProp(unittest.TestCase):
+
+    def test_call(self):
+        max_val=10000
+        min_val=0.000001
+        num = (weight_test_res > min_val) & (weight_test_res < max_val)
+        num = torch.sum(num, axis=1)
+        denum = torch.sum(msk_test_res, axis=1)
+        act_res = torch.mean(num/denum).item()
+        metric = ValidWeightsProp(
+            is_obj=TestImportanceSampler(), 
+            max_w=max_val,
+            min_w=min_val
+            )
+        pred_res = metric()
+        self.assertEqual(act_res,pred_res)
diff --git a/tests/components/test_Policy.py b/tests/components/test_Policy.py
@@ -73,18 +73,20 @@ def __init__(self) -> None:
 class BehavPolicyTest(unittest.TestCase):
 
     def setUp(self) -> None:
-        def __mock_return(dep_vals, indep_vals):
+        def __mock_return(y, x):
             lkp = {
                 "_".join([str(np.array(state).astype(float)),
                           str(np.array(act).astype(float))]): np.array(probs) 
                 for state,act,probs in zip(
                     test_state_vals, test_action_vals, 
                     test_action_probs)
                 }
-            return lkp["_".join([str(indep_vals),str(dep_vals)])]
-        policy_class = MockPolicyClass()
-        policy_class.eval_pdf = MagicMock(side_effect=__mock_return)
-        self.policy = BehavPolicy(policy_class)
+            return lkp["_".join([str(x),str(y)])]
+        #policy_class = MockPolicyClass()
+        #policy_class.__call__ = MagicMock(side_effect=__mock_return)
+        #self.policy = BehavPolicy(policy_class)
+        self.policy = BehavPolicy(
+            policy_func=MagicMock(side_effect=__mock_return))
 
 
     def test___call__(self):
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .EffectiveSampleSize import *
		from .ValidWeightsProp import *