diff --git a/examples/static_torch_deterministic_continuous.py b/examples/static_torch_deterministic_continuous.py index 935bdd9..2629aba 100644 --- a/examples/static_torch_deterministic_continuous.py +++ b/examples/static_torch_deterministic_continuous.py @@ -17,7 +17,7 @@ from offline_rl_ope.components.Policy import Policy, GreedyDeterministic from offline_rl_ope.components.ImportanceSampler import ISWeightOrchestrator from offline_rl_ope.OPEEstimators import ( - ISEstimator, DREstimator, D3rlpyQlearnDM) + ISEstimator, WDR, D3rlpyQlearnDM) from offline_rl_ope.PropensityModels.torch import FullGuassian, TorchRegTrainer from offline_rl_ope.LowerBounds.HCOPE import get_lower_bound @@ -176,9 +176,9 @@ def __call__( wis_estimator_smooth = ISEstimator(norm_weights=True, norm_kwargs={ "smooth_eps":0.0000001 }) - w_dr_estimator = DREstimator( - dm_model=fqe_dm_model, norm_weights=True, - ignore_nan=True) + w_dr_estimator = WDR( + dm_model=fqe_dm_model, + ) res = is_estimator.predict( diff --git a/examples/static_torch_stochastic_continuous.py b/examples/static_torch_stochastic_continuous.py index 4d74dc8..85431fb 100644 --- a/examples/static_torch_stochastic_continuous.py +++ b/examples/static_torch_stochastic_continuous.py @@ -17,7 +17,7 @@ from offline_rl_ope.components.Policy import Policy from offline_rl_ope.components.ImportanceSampler import ISWeightOrchestrator from offline_rl_ope.OPEEstimators import ( - ISEstimator, DREstimator, D3rlpyQlearnDM) + ISEstimator, WDR, D3rlpyQlearnDM) from offline_rl_ope.PropensityModels.torch import FullGuassian, TorchRegTrainer from offline_rl_ope.LowerBounds.HCOPE import get_lower_bound @@ -158,9 +158,9 @@ wis_estimator_smooth = ISEstimator(norm_weights=True, norm_kwargs={ "smooth_eps":0.0000001 }) - w_dr_estimator = DREstimator( - dm_model=fqe_dm_model, norm_weights=True, - ignore_nan=True) + w_dr_estimator = WDR( + dm_model=fqe_dm_model + ) res = is_estimator.predict( diff --git a/examples/static_xgboost_discrete.py b/examples/static_xgboost_discrete.py index 5795d95..73b9793 100644 --- a/examples/static_xgboost_discrete.py +++ b/examples/static_xgboost_discrete.py @@ -16,7 +16,7 @@ GreedyDeterministic, Policy, NumpyPolicyFuncWrapper) from offline_rl_ope.components.ImportanceSampler import ISWeightOrchestrator from offline_rl_ope.OPEEstimators import ( - ISEstimator, DREstimator, D3rlpyQlearnDM) + ISEstimator, WDR, D3rlpyQlearnDM) from offline_rl_ope.PropensityModels.sklearn import ( SklearnDiscrete) from offline_rl_ope.LowerBounds.HCOPE import get_lower_bound @@ -123,9 +123,9 @@ wis_estimator_smooth = ISEstimator(norm_weights=True, norm_kwargs={ "smooth_eps":0.0000001 }) - w_dr_estimator = DREstimator( - dm_model=fqe_dm_model, norm_weights=True, - ignore_nan=True) + w_dr_estimator = WDR( + dm_model=fqe_dm_model + ) res = is_estimator.predict(