From bf8d506db90e15694538274c70d5fd826411c599 Mon Sep 17 00:00:00 2001
From: Joshua Spear <josh.spear9@gmail.com>
Date: Fri, 1 Mar 2024 10:31:09 +0000
Subject: [PATCH] fixed per decision weighted IS. Updated testing. Altered
 effective sample size to return nan if all weights are 0

---
 README.md                                     |  11 ++
 .../Metrics/EffectiveSampleSize.py            |   9 +-
 src/offline_rl_ope/OPEEstimators/IS.py        |   6 +-
 src/offline_rl_ope/OPEEstimators/utils.py     |  81 ++++++--
 tests/Metrics/test_EffectiveSampleSize.py     |   9 +-
 tests/OPEEstimators/test_DoublyRobust.py      |   8 +-
 tests/OPEEstimators/test_IS.py                |   5 +-
 tests/OPEEstimators/test_utils.py             | 183 ++++++++++++++++--
 8 files changed, 264 insertions(+), 48 deletions(-)

diff --git a/README.md b/README.md
index d0ceb6a..b05615c 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # offline_rl_ope (BETA RELEASE)
 
 **WARNING**
+- Per-decision weighted importance sampling was incorrectly implemented in versions < 5.X
 - Weighted importance sampling was incorrectly implemented in versions 1.X.X and 2.1.X, 2.2.X
 - Unit testing currently only running in Python 3.11. 3.10 will be supported in the future
 - Only 1 dimensional discrete action spaces are currently supported!
@@ -88,6 +89,16 @@ If importance sampling based methods are evaluating to 0, consider visualising t
 The different kinds of importance samples can also be visualised by querying the ```traj_is_weights``` attribute of a given ```ImportanceSampler``` object. If for example, vanilla importance sampling is being used and the samples are not ```NaN``` or ```Inf``` then visualising the ```traj_is_weights``` may provide insight. In particular, IS weights will tend to inifinity when the evaluation policy places large density on an action in comparison to the behaviour policy.
 
 ### Release log
+#### 5.0.0
+* Correctly implemented per-decision weighted importance sampling
+* Expanded the different types of weights that can be implemented based on:
+  * http://proceedings.mlr.press/v48/jiang16.pdf: Per-decision weights are defined as the average weight at a given timepoint. This results in a different denominator for different timepoints. This is implemented with the following ```WISWeightNorm(avg_denom=True)```
+  * https://scholarworks.umass.edu/cgi/viewcontent.cgi?article=1079&context=cs_faculty_pubs: Per-decision weights are defined as the sum of discounted weights across all timesteps. This is implemented with the following ```WISWeightNorm(discount=discount_value)```
+  * Combinations of different weights can be easily implemented for example 'average discounted weights' ```WISWeightNorm(discount=discount_value, avg_denom=True)``` however, these do not necessaily have backing from literature.
+* EffectiveSampleSize metric optinally returns nan if all weights are 0
+* Bug fixes:
+  * Fix bug when running on cuda where tensors were not being pushed to CPU
+  * Improved static typing
 #### 4.0.0
 * Predefined propensity models including:
   * Generic feedforward MLP for continuous and discrete action spaces built in PyTorch
diff --git a/src/offline_rl_ope/Metrics/EffectiveSampleSize.py b/src/offline_rl_ope/Metrics/EffectiveSampleSize.py
index 79ed63b..34004e7 100644
--- a/src/offline_rl_ope/Metrics/EffectiveSampleSize.py
+++ b/src/offline_rl_ope/Metrics/EffectiveSampleSize.py
@@ -12,12 +12,13 @@ def __init__(self, nan_if_all_0:bool=True) -> None:
         
     def __ess(self, weights:torch.Tensor) -> float:        
         # https://victorelvira.github.io/papers/kong92.pdf
-        weights = weights.sum(dim=1)
-        numer = len(weights)
-        w_var = torch.var(weights).item()
-        if (w_var == 0) and (self.__nan_if_all_0):
+        all_0 = (weights == 0).all().item()
+        if (all_0) and (self.__nan_if_all_0):
             res = np.nan
         else:
+            weights = weights.sum(dim=1)
+            numer = len(weights)
+            w_var = torch.var(weights).item()
             res = (numer/(1+w_var))
         return res
         
diff --git a/src/offline_rl_ope/OPEEstimators/IS.py b/src/offline_rl_ope/OPEEstimators/IS.py
index 8b207c4..672ac9f 100644
--- a/src/offline_rl_ope/OPEEstimators/IS.py
+++ b/src/offline_rl_ope/OPEEstimators/IS.py
@@ -3,7 +3,7 @@
 from typing import Any, Dict, List
 
 from .utils import (
-    WISNormWeights, NormWeightsPass, WeightNorm,
+    WISWeightNorm, VanillaNormWeights, WeightNorm,
     clip_weights_pass as cwp, 
     clip_weights as cw
     )
@@ -23,9 +23,9 @@ def __init__(
         ) -> None:
         super().__init__(cache_traj_rewards)
         if norm_weights:    
-            _norm_weights = WISNormWeights(**norm_kwargs)
+            _norm_weights = WISWeightNorm(**norm_kwargs)
         else:
-            _norm_weights = NormWeightsPass(**norm_kwargs)
+            _norm_weights = VanillaNormWeights(**norm_kwargs)
         self.norm_weights:WeightNorm = _norm_weights
         self.clip = clip
         if clip_weights:
diff --git a/src/offline_rl_ope/OPEEstimators/utils.py b/src/offline_rl_ope/OPEEstimators/utils.py
index 2ec5deb..0719f8c 100644
--- a/src/offline_rl_ope/OPEEstimators/utils.py
+++ b/src/offline_rl_ope/OPEEstimators/utils.py
@@ -9,25 +9,52 @@ class WeightNorm(metaclass=ABCMeta):
     def __call__(self, traj_is_weights:torch.Tensor, is_msk:torch.Tensor
                  ) -> torch.Tensor:
         pass
+
+# is_msk.sum(axis=0, keepdim=True) is taken as the 
+#         denominator since it is required to take the average over valid time t 
+#         importance ratios. This may differ for different episodes.
+# ref: http://proceedings.mlr.press/v48/jiang16.pdf
+
+
+class WISWeightNorm(WeightNorm):
     
-class WISNormWeights(WeightNorm):
-    
-    def __init__(self, smooth_eps:float=0.0, *args, **kwargs) -> None:
+    def __init__(
+        self, 
+        smooth_eps:float=0.0,
+        avg_denom:bool=False,
+        discount:float=1,
+        *args, 
+        **kwargs
+        ) -> None:
         self.smooth_eps = smooth_eps
+        self.avg_denom = avg_denom
+        self.discount = discount
     
-    def calc_norm(self, traj_is_weights:torch.Tensor, is_msk:torch.Tensor
-                  ) -> torch.Tensor:
-        """Calculates the denominator for weighted importance sampling i.e.
-        w_{t} = 1/n sum_{i=1}^{n} p_{1:t}. Note, if traj_is_weights represent
-        vanilla IS samples then this will be w_{t} = 1/n sum_{i=1}^{n} p_{1:H}
-        for all samples. is_msk.sum(axis=0, keepdim=True) is taken as the 
-        denominator since it is required to take the average over valid time t 
-        importance ratios. This may differ for different episodes.
-        ref: http://proceedings.mlr.press/v48/jiang16.pdf
+    def calc_norm(
+        self, 
+        traj_is_weights:torch.Tensor, 
+        is_msk:torch.Tensor
+        ) -> torch.Tensor:
+        """Calculates the denominator for weighted importance sampling.
         smooth_eps prevents nan values occuring in instances where there exists
         valid time t importance ratios however, these are all 0. This should
         be set as small as possible. 
-
+        avg_denom: defines the denominator as the average weight for time t
+        as per http://proceedings.mlr.press/v48/jiang16.pdf
+        
+        Note:
+        - If traj_is_weights represents vanilla IS samples then:
+            - The denominator will be w_{t} = sum_{i=1}^{n} p_{1:H} for all 
+            samples.
+            - If avg_denom is set to true, the denominator will be 
+            w_{t} = 1/n_{t} sum_{i=1}^{n} p_{1:H} where n_{t} is the number of 
+            trajectories of at least length, t.
+        - If traj_is_weights represents PD IS samples then: 
+            - The denominator will be w_{t} = sum_{i=1}^{n} p_{1:t}.
+            - If avg_denom is set to true, the denominator will be 
+            w_{t} = 1/n_{t} sum_{i=1}^{n} p_{1:t} where n_{t} is the number of 
+            trajectories of at least length, t. This definition aligns with 
+            http://proceedings.mlr.press/v48/jiang16.pdf
         Args:
             traj_is_weights (torch.Tensor): (# trajectories, max(traj_length)) 
                 Tensor. traj_is_weights[i,j] defines the jth timestep propensity 
@@ -40,11 +67,19 @@ def calc_norm(self, traj_is_weights:torch.Tensor, is_msk:torch.Tensor
             torch.Tensor: Tensor of dimension (# trajectories, 1) defining the 
             normalisation value for each timestep
         """
-        denom:torch.Tensor = traj_is_weights.sum(dim=0, keepdim=True)
-        denom = (denom+self.smooth_eps)/(
-            is_msk.sum(dim=0, keepdim=True)+self.smooth_eps)
+        discnt_tens = torch.full(traj_is_weights.shape, self.discount)
+        discnt_pows = torch.arange(0, traj_is_weights.shape[1])[None,:].repeat(
+            traj_is_weights.shape[0],1)
+        discnt_tens = torch.pow(discnt_tens,discnt_pows)
+        traj_is_weights = torch.mul(traj_is_weights,discnt_tens)
+        denom = (
+            traj_is_weights.sum(dim=0, keepdim=True) + self.smooth_eps
+            )
+        if self.avg_denom:
+            denom = denom/(
+                is_msk.sum(dim=0, keepdim=True)+self.smooth_eps)
         return denom
-    
+
     def __call__(self, traj_is_weights:torch.Tensor, is_msk:torch.Tensor
                  ) -> torch.Tensor:
         """Normalised propensity weights according to 
@@ -63,10 +98,12 @@ def __call__(self, traj_is_weights:torch.Tensor, is_msk:torch.Tensor
             with normalised weights
         """
         denom = self.calc_norm(traj_is_weights=traj_is_weights, is_msk=is_msk)
-        res = traj_is_weights/(denom+self.smooth_eps)
+        res = traj_is_weights/denom
         return res
+
+
     
-class NormWeightsPass(WeightNorm):
+class VanillaNormWeights(WeightNorm):
     
     def __init__(self, *args, **kwargs) -> None:
         pass
@@ -84,9 +121,11 @@ def __call__(self, traj_is_weights:torch.Tensor, is_msk:torch.Tensor
                 ith trajectory was observed
 
         Returns:
-            torch.Tensor: Identical tensor to traj_is_weights
+            torch.Tensor: traj_is_weights with element wise average
         """
-        return traj_is_weights
+        # The first dimension defines the number of trajectories and we require
+        # the average over trajectories
+        return traj_is_weights/traj_is_weights.shape[0]
 
 def clip_weights(
     traj_is_weights:torch.Tensor, 
diff --git a/tests/Metrics/test_EffectiveSampleSize.py b/tests/Metrics/test_EffectiveSampleSize.py
index 6d6fe10..10a5878 100644
--- a/tests/Metrics/test_EffectiveSampleSize.py
+++ b/tests/Metrics/test_EffectiveSampleSize.py
@@ -23,6 +23,9 @@ def test_call(self):
         assert len(weights) == 2
         denum = 1 + torch.var(weights)
         act_res = (num/denum).item()
-        metric = EffectiveSampleSize(is_obj=TestImportanceSampler())
-        pred_res = metric()
-        self.assertEqual(act_res,pred_res)
\ No newline at end of file
+        metric = EffectiveSampleSize(nan_if_all_0=True)
+        pred_res = metric(
+            weights=weight_test_res
+        )
+        tol = act_res/1000
+        np.testing.assert_allclose(pred_res, act_res, atol=tol)
\ No newline at end of file
diff --git a/tests/OPEEstimators/test_DoublyRobust.py b/tests/OPEEstimators/test_DoublyRobust.py
index a1c0f72..5cfda2a 100644
--- a/tests/OPEEstimators/test_DoublyRobust.py
+++ b/tests/OPEEstimators/test_DoublyRobust.py
@@ -120,8 +120,11 @@ def v_side_effect(state:torch.Tensor):
                                                weights=weight_test_res, 
                                                discount=gamma, 
                                                is_msk=msk_test_res)
+        #weight_test_res = weight_test_res/weight_test_res.shape[0]
+        denom = weight_test_res.shape[0]
         for idx, (r,s,a,w,msk) in enumerate(zip(rewards, states, actions, 
                                                 weight_test_res, msk_test_res)):
+            w = w/denom
             p = torch.masked_select(w, msk>0)
             __test_res = is_est.get_traj_discnt_reward(
                 reward_array=r, discount=gamma, state_array=s, action_array=a, 
@@ -129,9 +132,8 @@ def v_side_effect(state:torch.Tensor):
             test_res.append(__test_res.numpy())
         #test_res = np.concatenate(test_res).mean()
         test_res = np.concatenate(test_res)
-        tol = (test_res.mean()/1000).item()
+        tol = (np.abs(test_res.mean()/100)).item()
         self.assertEqual(pred_res.shape, torch.Size((len(rewards),)))
-        np.testing.assert_allclose(pred_res.numpy(),test_res, atol=tol)
-        
+        np.testing.assert_allclose(pred_res.numpy(),test_res, atol=tol)    
             
     
\ No newline at end of file
diff --git a/tests/OPEEstimators/test_IS.py b/tests/OPEEstimators/test_IS.py
index 0af439a..f88c597 100644
--- a/tests/OPEEstimators/test_IS.py
+++ b/tests/OPEEstimators/test_IS.py
@@ -58,7 +58,10 @@ def __mock_return(rewards, discount, h):
         pred_res = self.is_estimator.predict_traj_rewards(
             rewards=rewards, actions=[], states=[], weights=weight_test_res,
             discount=gamma, is_msk=msk_test_res)
-        test_res = np.multiply(reward_test_res.numpy(), weight_test_res.numpy())
+        test_res = np.multiply(
+            reward_test_res.numpy(), 
+            weight_test_res.numpy()/weight_test_res.shape[0]
+            )
         test_res=test_res.sum(axis=1)
         #test_res = test_res.sum(axis=1).mean()
         tol = test_res.mean()/1000
diff --git a/tests/OPEEstimators/test_utils.py b/tests/OPEEstimators/test_utils.py
index 9dca121..2d00b9e 100644
--- a/tests/OPEEstimators/test_utils.py
+++ b/tests/OPEEstimators/test_utils.py
@@ -3,7 +3,7 @@
 import torch
 import unittest
 from offline_rl_ope.OPEEstimators.utils import (
-    clip_weights, clip_weights_pass, NormWeightsPass, WISNormWeights)
+    clip_weights, clip_weights_pass, VanillaNormWeights, WISWeightNorm)
 from ..base import (weight_test_res, msk_test_res)
 
 weight_test_res_alter = copy.deepcopy(weight_test_res)
@@ -37,10 +37,11 @@ def test_clip_weights_pass(self):
     #     np.testing.assert_allclose(pred_res.numpy(), test_res.numpy(), 
     #                                atol=toll.numpy())
     
-    def test_norm_weights_pass(self):
-            test_res = copy.deepcopy(weight_test_res)
+    def test_norm_weights_vanilla(self):
+            denom = weight_test_res.shape[0]
+            test_res = weight_test_res/denom
             toll = test_res.mean()/1000
-            calculator = NormWeightsPass()
+            calculator = VanillaNormWeights()
             pred_res = calculator(traj_is_weights=weight_test_res, 
                                   is_msk=msk_test_res)
             self.assertEqual(pred_res.shape,weight_test_res.shape)
@@ -48,10 +49,10 @@ def test_norm_weights_pass(self):
                                        atol=toll.numpy())
     
     def test_norm_weights_wis(self):
-        denom = weight_test_res.sum(dim=0)/msk_test_res.sum(dim=0)
+        denom = weight_test_res.sum(dim=0)
         test_res = weight_test_res/denom
         toll = test_res.mean()/1000
-        calculator = WISNormWeights()
+        calculator = WISWeightNorm()
         pred_res = calculator(traj_is_weights=weight_test_res, 
                               is_msk=msk_test_res)
         self.assertEqual(pred_res.shape,weight_test_res.shape)
@@ -60,11 +61,10 @@ def test_norm_weights_wis(self):
 
     def test_norm_weights_wis_smooth(self):
         smooth_eps = 0.00000001
-        denom = (weight_test_res_alter.sum(dim=0)+smooth_eps)/(
-            msk_test_res.sum(dim=0)+smooth_eps)
-        test_res: torch.Tensor = weight_test_res_alter/(denom)
+        denom = weight_test_res_alter.sum(dim=0)+smooth_eps
+        test_res = weight_test_res_alter/denom
         toll = test_res.nanmean()/1000
-        calculator = WISNormWeights(smooth_eps=smooth_eps)
+        calculator = WISWeightNorm(smooth_eps=smooth_eps)
         pred_res = calculator(traj_is_weights=weight_test_res_alter, 
                               is_msk=msk_test_res)
         self.assertEqual(pred_res.shape,weight_test_res_alter.shape)
@@ -72,13 +72,170 @@ def test_norm_weights_wis_smooth(self):
                                    atol=toll.numpy())
         
     def test_norm_weights_wis_no_smooth(self):
-        denom = weight_test_res_alter.sum(dim=0)/msk_test_res.sum(dim=0)
-        test_res: torch.Tensor = weight_test_res_alter/denom
+        denom = weight_test_res_alter.sum(dim=0)
+        test_res = weight_test_res_alter/denom
         toll = test_res.nanmean()/1000
-        calculator = WISNormWeights()
+        calculator = WISWeightNorm()
         pred_res = calculator(traj_is_weights=weight_test_res_alter, 
                               is_msk=msk_test_res)
         self.assertEqual(pred_res.shape,weight_test_res_alter.shape)
         np.testing.assert_allclose(pred_res.numpy(), test_res.numpy(), 
                                    atol=toll.numpy(), equal_nan=True)
         
+    def test_norm_weights_wis_smooth_discount(self):
+        smooth_eps = 0.00000001
+        discount=0.99
+        discnt_tens = torch.full(
+            weight_test_res_alter.shape,
+            discount
+            )
+        discnt_pows = torch.arange(
+            0, weight_test_res_alter.shape[1])[None,:].repeat(
+                weight_test_res_alter.shape[0],1
+                )
+        discnt_tens = torch.pow(discnt_tens,discnt_pows)
+        denom = torch.mul(
+            weight_test_res_alter,
+            discnt_tens
+        )
+        denom = denom.sum(dim=0)+smooth_eps
+        test_res = weight_test_res_alter/denom
+        toll = test_res.nanmean()/1000
+        calculator = WISWeightNorm(
+            smooth_eps=smooth_eps,
+            discount=discount
+            )
+        pred_res = calculator(traj_is_weights=weight_test_res_alter, 
+                              is_msk=msk_test_res)
+        self.assertEqual(pred_res.shape,weight_test_res_alter.shape)
+        np.testing.assert_allclose(pred_res.numpy(), test_res.numpy(), 
+                                   atol=toll.numpy())
+
+    def test_norm_weights_wis_no_smooth_discount(self):
+        discount=0.99
+        discnt_tens = torch.full(
+            weight_test_res_alter.shape,
+            discount
+            )
+        discnt_pows = torch.arange(
+            0, weight_test_res_alter.shape[1])[None,:].repeat(
+                weight_test_res_alter.shape[0],1
+                )
+        discnt_tens = torch.pow(discnt_tens,discnt_pows)
+        denom = torch.mul(
+            weight_test_res_alter,
+            discnt_tens
+        )
+        denom = denom.sum(dim=0)
+        test_res = weight_test_res_alter/denom
+        toll = test_res.nanmean()/1000
+        calculator = WISWeightNorm(
+            discount=discount
+            )
+        pred_res = calculator(traj_is_weights=weight_test_res_alter, 
+                              is_msk=msk_test_res)
+        self.assertEqual(pred_res.shape,weight_test_res_alter.shape)
+        np.testing.assert_allclose(pred_res.numpy(), test_res.numpy(), 
+                                   atol=toll.numpy())
+
+    def test_norm_weights_wis_smooth_avg(self):
+        smooth_eps = 0.00000001
+        time_t_freq = msk_test_res.sum(dim=0, keepdim=True).repeat(
+            msk_test_res.shape[0],1
+        )
+        denom = weight_test_res_alter/time_t_freq
+        denom = denom.sum(dim=0)+smooth_eps
+        test_res = weight_test_res_alter/denom
+        toll = test_res.nanmean()/1000
+        calculator = WISWeightNorm(
+            smooth_eps=smooth_eps,
+            avg_denom=True
+            )
+        pred_res = calculator(traj_is_weights=weight_test_res_alter, 
+                              is_msk=msk_test_res)
+        self.assertEqual(pred_res.shape,weight_test_res_alter.shape)
+        np.testing.assert_allclose(pred_res.numpy(), test_res.numpy(), 
+                                   atol=toll.numpy())
+
+    def test_norm_weights_wis_no_smooth_avg(self):
+        time_t_freq = msk_test_res.sum(dim=0, keepdim=True).repeat(
+            msk_test_res.shape[0],1
+        )
+        denom = weight_test_res_alter/time_t_freq
+        denom = denom.sum(dim=0)
+        test_res = weight_test_res_alter/denom
+        toll = test_res.nanmean()/1000
+        calculator = WISWeightNorm(
+            avg_denom=True
+            )
+        pred_res = calculator(traj_is_weights=weight_test_res_alter, 
+                              is_msk=msk_test_res)
+        self.assertEqual(pred_res.shape,weight_test_res_alter.shape)
+        np.testing.assert_allclose(pred_res.numpy(), test_res.numpy(), 
+                                   atol=toll.numpy())
+
+    def test_norm_weights_wis_smooth_discount_avg(self):
+        smooth_eps = 0.00000001
+        discount=0.99
+        discnt_tens = torch.full(
+            weight_test_res_alter.shape,
+            discount
+            )
+        discnt_pows = torch.arange(
+            0, weight_test_res_alter.shape[1])[None,:].repeat(
+                weight_test_res_alter.shape[0],1
+                )
+        discnt_tens = torch.pow(discnt_tens,discnt_pows)
+        denom = torch.mul(
+            weight_test_res_alter,
+            discnt_tens
+        )
+        time_t_freq = msk_test_res.sum(dim=0, keepdim=True).repeat(
+            msk_test_res.shape[0],1
+        )
+        denom = denom/time_t_freq
+        denom = denom.sum(dim=0)+smooth_eps
+        test_res = weight_test_res_alter/denom
+        toll = test_res.nanmean()/1000
+        calculator = WISWeightNorm(
+            smooth_eps=smooth_eps,
+            discount=discount,
+            avg_denom=True
+            )
+        pred_res = calculator(traj_is_weights=weight_test_res_alter, 
+                              is_msk=msk_test_res)
+        self.assertEqual(pred_res.shape,weight_test_res_alter.shape)
+        np.testing.assert_allclose(pred_res.numpy(), test_res.numpy(), 
+                                   atol=toll.numpy())
+
+    def test_norm_weights_wis_no_smooth_discount_avg(self):
+        discount=0.99
+        discnt_tens = torch.full(
+            weight_test_res_alter.shape,
+            discount
+            )
+        discnt_pows = torch.arange(
+            0, weight_test_res_alter.shape[1])[None,:].repeat(
+                weight_test_res_alter.shape[0],1
+                )
+        discnt_tens = torch.pow(discnt_tens,discnt_pows)
+        denom = torch.mul(
+            weight_test_res_alter,
+            discnt_tens
+        )
+        time_t_freq = msk_test_res.sum(dim=0, keepdim=True).repeat(
+            msk_test_res.shape[0],1
+        )
+        denom = denom/time_t_freq
+        denom = denom.sum(dim=0)
+        test_res = weight_test_res_alter/denom
+        toll = test_res.nanmean()/1000
+        calculator = WISWeightNorm(
+            discount=0.99,
+            avg_denom=True
+            )
+        pred_res = calculator(traj_is_weights=weight_test_res_alter, 
+                              is_msk=msk_test_res)
+        self.assertEqual(pred_res.shape,weight_test_res_alter.shape)
+        np.testing.assert_allclose(pred_res.numpy(), test_res.numpy(), 
+                                   atol=toll.numpy())