Skip to content

Commit

Permalink
fixed scripts and added dataset_size code
Browse files Browse the repository at this point in the history
  • Loading branch information
davidzhu27 committed Apr 27, 2024
1 parent 2c05f94 commit 2d3436c
Show file tree
Hide file tree
Showing 14 changed files with 249 additions and 90 deletions.
2 changes: 2 additions & 0 deletions algorithms/offline/cql.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class TrainConfig:
num_berno: int = 1
out_name: str = ""
quick_stop: int = 0
dataset_size_multiplier: float = 1.0

# Experiment
device: str = "cuda"
Expand Down Expand Up @@ -921,6 +922,7 @@ def train(config: TrainConfig):
pbrl_dataset = generate_pbrl_dataset_no_overlap(dataset, pbrl_dataset_file_path=f'CORL/saved/pbrl_datasets/pbrl_dataset_{config.env}_{num_t}_{len_t}_numTrials={num_trials}_noOVLP.npz', num_t=num_t, len_t=len_t)
dataset = label_by_trajectory_reward(dataset, pbrl_dataset, num_t=num_t, len_t=len_t, num_trials=num_trials)

dataset = small_d4rl_dataset(dataset, dataset_size_multiplier=config.dataset_size_multiplier)
if config.quick_stop:
return

Expand Down
3 changes: 3 additions & 0 deletions algorithms/offline/iql.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class TrainConfig:
num_berno: int = 1
out_name: str = ""
quick_stop: int = 0
dataset_size_multiplier: float = 1.0

# Experiment
device: str = "cuda"
Expand Down Expand Up @@ -595,6 +596,8 @@ def train(config: TrainConfig):
pbrl_dataset = generate_pbrl_dataset_no_overlap(dataset, pbrl_dataset_file_path=f'CORL/saved/pbrl_datasets/pbrl_dataset_{config.env}_{num_t}_{len_t}_numTrials={num_trials}_noOVLP.npz', num_t=num_t, len_t=len_t)
dataset = label_by_trajectory_reward(dataset, pbrl_dataset, num_t=num_t, len_t=len_t, num_trials=num_trials)

dataset = small_d4rl_dataset(dataset, dataset_size_multiplier=config.dataset_size_multiplier)

if config.quick_stop:
return

Expand Down
3 changes: 2 additions & 1 deletion algorithms/offline/pbrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,9 @@ def pick_and_calc_reward(dataset, starting_indices, len_t):
r = np.sum(dataset['rewards'][n0:n0+len_t])
return ns, r

def small_d4rl_dataset(dataset, n_states):
def small_d4rl_dataset(dataset, dataset_size_multiplier=1.0):
smaller = dataset.copy()
n_states = int(dataset_size_multiplier * dataset['observations'].shape[0])
smaller['observations'] = smaller['observations'][:n_states]
smaller['actions'] = smaller['actions'][:n_states]
smaller['next_observations'] = smaller['next_observations'][:n_states]
Expand Down
25 changes: 25 additions & 0 deletions commands_dataset_sizes_1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

# originals
for seed in 1 2 3
do
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-original-0.5" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.5
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-original-0.2" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.2
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-original-0.1" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.1
done

# latent rewards
for seed in 1 2 3
do
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-latent_reward-0.5" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.5
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-latent_reward-0.2" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.2
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-latent_reward-0.1" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.1
done

# binary labels
for seed in 1 2 3
do
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-binary_labels-0.5" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.5
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-binary_labels-0.2" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.2
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-binary_labels-0.1" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.1
done
25 changes: 25 additions & 0 deletions commands_dataset_sizes_2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

# originals
for seed in 1 2 3
do
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "cql-hopper-medium-replay-original-0.5" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.5
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "cql-hopper-medium-replay-original-0.2" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.2
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "cql-hopper-medium-replay-original-0.1" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.1
done

# latent rewards
for seed in 1 2 3
do
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "cql-hopper-medium-replay-latent_reward-0.5" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.5
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "cql-hopper-medium-replay-latent_reward-0.2" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.2
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "cql-hopper-medium-replay-latent_reward-0.1" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.1
done

# binary labels
for seed in 1 2 3
do
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "cql-hopper-medium-replay-binary_labels-0.5" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.5
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "cql-hopper-medium-replay-binary_labels-0.2" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.2
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "hopper-medium-replay-v2" --out_name "cql-hopper-medium-replay-binary_labels-0.1" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0 --dataset_size_multiplier 0.1
done
29 changes: 14 additions & 15 deletions commands_halfcheetah_main_cql.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
#!/bin/bash

# originals
for seed in 1 2 3
do
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-original" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-replay-v2" --out_name "cql-halfcheetah-medium-replay-original" --num_t 5000 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-v2" --out_name "cql-halfcheetah-medium-original" --num_t 24960 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
# for seed in 1 2 3
# do
# python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-original" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
# python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-replay-v2" --out_name "cql-halfcheetah-medium-replay-original" --num_t 5000 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
# python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-v2" --out_name "cql-halfcheetah-medium-original" --num_t 24960 --len_t 20 --latent_reward 0 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
# done

# latent rewards
for seed in 1 2 3 4 5
do
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-latent_reward" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-replay-v2" --out_name "cql-halfcheetah-medium-replay-latent_reward" --num_t 5000 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-v2" --out_name "cql-halfcheetah-medium-latent_reward" --num_t 24960 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
done
# # latent rewards
# for seed in 1 2 3
# do
# python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-latent_reward" --num_t 49920 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
# python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-replay-v2" --out_name "cql-halfcheetah-medium-replay-latent_reward" --num_t 5000 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
# python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-v2" --out_name "cql-halfcheetah-medium-latent_reward" --num_t 24960 --len_t 20 --latent_reward 1 --bin_label 0 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
# done

# latent rewards multiple bernoulli
for seed in 1 2 3
Expand All @@ -24,7 +25,7 @@ do
done

# binary labels
for seed in 1 2 3 4 5
for seed in 1 2 3
do
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-binary_labels" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-replay-v2" --out_name "cql-halfcheetah-medium-replay-binary_labels" --num_t 5000 --len_t 20 --latent_reward 0 --bin_label 1 --num_berno 1 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
Expand All @@ -37,6 +38,4 @@ do
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-expert-v2" --out_name "cql-halfcheetah-medium-expert-binary_labels_multi_berno" --num_t 49920 --len_t 20 --latent_reward 0 --bin_label 1 --num_berno 10 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-replay-v2" --out_name "cql-halfcheetah-medium-replay-binary_labels_multi_berno" --num_t 5000 --len_t 20 --latent_reward 0 --bin_label 1 --num_berno 10 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
python CORL/algorithms/offline/cql.py --project "Experiments_CORL" --env "halfcheetah-medium-v2" --out_name "cql-halfcheetah-medium-binary_labels_multi_berno" --num_t 24960 --len_t 20 --latent_reward 0 --bin_label 1 --num_berno 10 --bin_label_trajectory_batch 0 --bin_label_allow_overlap 1 --seed $seed --quick_stop 0
done

done
Loading

0 comments on commit 2d3436c

Please sign in to comment.