prevented gradient in IS calculations and returned numpy array from I…

…S d3rlpy scorer
joshuaspear · Feb 25, 2024 · fe3bc26 · fe3bc26
1 parent 9996b30
commit fe3bc26
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 5 deletions.
diff --git a/src/offline_rl_ope/api/d3rlpy/Scorers/IS.py b/src/offline_rl_ope/api/d3rlpy/Scorers/IS.py
@@ -48,7 +48,7 @@ def __call__(
                            weights=self.cache[self.is_type].traj_is_weights, 
                            is_msk=self.cache.weight_msk, discount=self.discount
                            )
-        return res
+        return res.numpy()
 
 
 

diff --git a/src/offline_rl_ope/components/ImportanceSampler.py b/src/offline_rl_ope/components/ImportanceSampler.py
@@ -34,10 +34,11 @@ def get_traj_w(self, states:torch.Tensor, actions:torch.Tensor,
             logger.debug("states.shape: {}".format(states.shape))
             logger.debug("actions.shape: {}".format(actions.shape))
             raise Exception("State and actions should have 2 dimensions")
-        behav_probs = self.__behav_policy(action=actions, 
-                                          state=states)
-        #logger.debug("behav_probs: {}".format(behav_probs))
-        eval_probs = eval_policy(action=actions, state=states)
+        with torch.no_grad():
+            behav_probs = self.__behav_policy(action=actions, 
+                                            state=states)
+            #logger.debug("behav_probs: {}".format(behav_probs))
+            eval_probs = eval_policy(action=actions, state=states)
         #logger.debug("eval_probs: {}".format(eval_probs))
         weight_array = eval_probs/behav_probs
         weight_array = weight_array.view(-1)