Skip to content

Commit

Permalink
switch to yaml config
Browse files Browse the repository at this point in the history
add rigid_fall to install package

change log folder name format
  • Loading branch information
jongyaoY committed Aug 30, 2024
1 parent f9eb5c1 commit 207e1fa
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 133 deletions.
32 changes: 32 additions & 0 deletions config/test_train.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Training params
use_cuda: true
batch_size: 16
num_workers: 0
lr_init: 1.0e-3
lr_decay_rate: 0.1
lr_decay_steps: 500
training_steps: 500
warmup_steps: 1
# Evaluation
loss_report_step: 100
save_model_step: 100
eval_step: 100
rollout_steps: 10
run_validate: true
num_eval_rollout: 2
save_video: false
# Dataset
data_path: "test_datasets/mujoco_moviA_500"
test_data_path: "test_datasets/mujoco_moviA_500.npz"
data_config:
noise_std: 3.0e-5
connectivity_radius: 0.01
input_seq_length: 3
# Logging
logging_folder: "log_test"
log_level: "info"
# continue_log_from: "2024-08-29-17:05"
# Simulator params
latent_dim: 128
message_passing_steps: 10
mlp_layers: 2
26 changes: 0 additions & 26 deletions config/train.json

This file was deleted.

35 changes: 35 additions & 0 deletions config/train.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Training params
use_cuda: true
batch_size: 64
num_workers: 16
lr_init: 1.0e-3
lr_decay_rate: 0.1
lr_decay_steps: 1.0e+6
training_steps: 1.0e+6
warmup_steps: 5
# Evaluation
loss_report_step: 2000
save_model_step: 2000
eval_step: 2000
rollout_steps: 50
run_validate: true
num_eval_rollout: 10
save_video: true
# Dataset
data_path: "datasets/mujoco_moviA_1000000.npz"
test_data_path: "datasets/mujoco_moviA_10000.npz"
data_config:
noise_std: 3.0e-5
connectivity_radius: 0.01
input_seq_length: 3
# Logging
logging_folder: "log"
log_level: "info"
# continue_log_from: "2024-08-29-17:05"
# Resume training
# model_file: "log/sim-pc/models/weights_itr_482000.ckpt"
# train_state_file: "log/sim-pc/models/train_state_itr_482000.ckpt"
# Simulator params
latent_dim: 128
message_passing_steps: 10
mlp_layers: 2
2 changes: 1 addition & 1 deletion fignet/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def _get_sample(self, idx):
return graph
else:
if os.path.exists(self._file_list[idx]):
return self._load_graph(self._file_list[idx])
return self._transform(self._load_graph(self._file_list[idx]))
else:
raise FileNotFoundError

Expand Down
2 changes: 1 addition & 1 deletion fignet/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class Logger:
def __init__(self, config):
self.config = config
time_str = datetime.datetime.fromtimestamp(time.time()).strftime(
"%Y%m%d%H%M"
"%Y-%m-%d-%H:%M"
)
self.tb_prefix = ""
if config.get("continue_log_from") is None:
Expand Down
43 changes: 39 additions & 4 deletions fignet/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import torch.utils
import torch.utils.data
import tqdm
import yaml
from torchvision import transforms as T

from fignet.data_loader import MujocoDataset, ToTensor, collate_fn
Expand Down Expand Up @@ -99,10 +100,10 @@ def __init__(
# Optimization params
self._lr_init = config["lr_init"]
self._lr_decay_rate = config["lr_decay_rate"]
self._lr_decay_steps = config["lr_decay_steps"]
self._loss_report_step = config["loss_report_step"]
self._save_model_step = config["save_model_step"]
self._eval_step = config["eval_step"]
self._lr_decay_steps = int(config["lr_decay_steps"])
self._loss_report_step = int(config["loss_report_step"])
self._save_model_step = int(config["save_model_step"])
self._eval_step = int(config["eval_step"])
self._optimizer = torch.optim.Adam(
self._sim.parameters(), lr=self._lr_init
)
Expand Down Expand Up @@ -415,3 +416,37 @@ def validate(self, step: int):
self._logger.tb.add_scalar(
"val/onestep_rotation_error", np.mean(onestep_r_errors), step
)


def create_trainer(config_file: str):
with open(os.path.join(os.getcwd(), config_file)) as f:
if config_file.endswith("yaml"):
config = yaml.safe_load(f)
elif config_file.endswith("json"):
print("Warning: json config file will not be supported soon")
import json

config = json.load(f)
else:
raise TypeError("Unsupported config file type")

logger = Logger(config)
if torch.cuda.is_available() and config.get("use_cuda", True):
device = torch.device("cuda")
logger.print("Using GPU")
else:
device = torch.device("cpu")
logger.print("Using CPU")

sim = LearnedSimulator(
mesh_dimensions=3,
latent_dim=config.get("latent_dim", 128),
nmessage_passing_steps=config.get("message_passing_steps", 10),
nmlp_layers=config.get("mlp_layers", 2),
input_seq_length=config["data_config"]["input_seq_length"],
mlp_hidden_dim=config.get("latent_dim", 128),
device=device,
)
trainer = Trainer(sim=sim, logger=logger, config=config, device=device)

return trainer
25 changes: 22 additions & 3 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,10 @@ through the wheels we
built if you are using python3.8

```bash
# Install pre-compiled binary through pip if you are using python3.8
# Install pre-compiled binary through pip if you are using python3.8, try upgrade your pip first

# pip install --upgrade pip

pip install https://cloud.dfki.de/owncloud/index.php/s/F9EwmwWkSW8pzfL/download/eigenpy-3.8.0-0-cp38-cp38-manylinux_2_31_x86_64.whl

pip install https://cloud.dfki.de/owncloud/index.php/s/Tb4baydBiRP6iN2/download/hpp_fcl-2.4.5-3-cp38-cp38-manylinux_2_31_x86_64.whl
Expand All @@ -145,6 +148,9 @@ git clone https://github.com/jongyaoY/fignet
cd fignet
pip install -r requirements.txt
pip install .

# Setup robosuite
python -m robosuite.scripts.setup_macros
```

## How to train
Expand All @@ -160,7 +166,7 @@ You can pre-compute the graphs from the raw dataset beforehand so that the train
(only the training dataset).

```bash
python scripts/preprocess_data.py --data_path=[path_to_dataset/train_dataset_name.npz] --num_workers=[default to 1]
python scripts/preprocess_data.py --data_path=[path_to_dataset/train_dataset_name.npz] --num_workers=[default to 1] --config_file=config/train.yaml
```

This process takes around 8 hours with `num_workers=8`, and will create a
Expand All @@ -180,7 +186,7 @@ dataset (npz file) or the folder containing pre-computed graphs, while the test
dataset should be a npz file. Also adapt `batch_size` and `num_workers` accordingly.

```bash
python scripts/train.py --config_file=config/train.json
python scripts/train.py --config_file=config/train.yaml
```

### 3. Generate animation
Expand Down Expand Up @@ -227,3 +233,16 @@ interaction graph networks." arXiv preprint arXiv:2212.03574 (2022).
## License

[MIT License](LICENSE)

## Known issues

### Preprocessing script with `num_workers > 0` raises following error

> RuntimeError: received 0 items of ancdata
Add the following line to [preprocess_data.py](scripts/preprocess_data.py)
should solve the problem (see [here](https://discuss.pytorch.org/t/runtimeerror-received-0-items-of-ancdata/4999/4)).

```python
torch.multiprocessing.set_sharing_strategy('file_system')
```
4 changes: 2 additions & 2 deletions scripts/preprocess_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
# SOFTWARE.

import argparse
import json
import os
import pickle
import sys
Expand All @@ -30,6 +29,7 @@
import torch
import torchvision.transforms as T
import tqdm
import yaml

from fignet.data_loader import MujocoDataset, ToTensor

Expand Down Expand Up @@ -70,7 +70,7 @@ def save_graph(graph, graph_i, save_path):
if __name__ == "__main__":
try:
with open(os.path.join(os.getcwd(), args.config_file)) as f:
config = json.load(f)
config = yaml.safe_load(f)
except FileNotFoundError as e:
print(e)
sys.exit()
Expand Down
35 changes: 3 additions & 32 deletions scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,42 +21,13 @@
# SOFTWARE.

import argparse
import json
import os

import torch

from fignet.logger import Logger
from fignet.simulator import LearnedSimulator
from fignet.trainer import Trainer
from fignet.trainer import create_trainer

parser = argparse.ArgumentParser()
parser.add_argument(
"--config_file", required=False, default="config/train.json"
)
parser.add_argument("--config_file", required=True)
args = parser.parse_args()

if __name__ == "__main__":

latent_dim = 128

with open(os.path.join(os.getcwd(), args.config_file)) as f:
config = json.load(f)
logger = Logger(config)
if torch.cuda.is_available() and config.get("use_cuda", True):
device = torch.device("cuda")
logger.print("Using GPU")
else:
device = torch.device("cpu")
logger.print("Using CPU")
sim = LearnedSimulator(
mesh_dimensions=3,
latent_dim=latent_dim,
nmessage_passing_steps=10,
nmlp_layers=2,
input_seq_length=config["data_config"]["input_seq_length"],
mlp_hidden_dim=latent_dim,
device=device,
)
trainer = Trainer(sim=sim, logger=logger, config=config, device=device)
trainer = create_trainer(config_file=args.config_file)
trainer.train()
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
setup(
name="fignet",
packages=[
package for package in find_packages() if package.startswith("fignet")
package
for package in find_packages()
if package.startswith("fignet") or package.startswith("rigid_fall")
],
install_requires=[],
eager_resources=["*"],
Expand Down
66 changes: 3 additions & 63 deletions tests/test_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,69 +20,9 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import pytest
import torch
from fignet.trainer import create_trainer

from fignet.logger import Logger
from fignet.simulator import LearnedSimulator
from fignet.trainer import Trainer


@pytest.fixture
def init_trainer():
latent_dim = 128

config = {
"test_data_path": "test_datasets/mujoco_moviA_500.npz",
"data_path": "test_datasets/mujoco_moviA_500.npz",
"data_config": {
"noise_std": 3e-5,
"connectivity_radius": 0.01,
"input_seq_length": 3,
},
"batch_size": 64,
"num_workers": 0,
# "model_file": "log_test/202407261408/models/weights_itr_49.ckpt",
# "train_state_file": "log_test/202407261408/models/train_state_itr_49.ckpt",
# "continue_log_from": "202407261408",
"use_cuda": False,
"logging_folder": "log_test",
"log_level": "info",
"lr_init": 1e-3,
"lr_decay_rate": 0.1,
"lr_decay_steps": 1e6,
"loss_report_step": 100,
"save_model_step": 100,
"eval_step": 100,
"training_steps": 500,
# "clip_norm": 1e-2,
"rollout_steps": 10,
"run_validate": True,
"num_eval_rollout": 1,
"save_video": False,
"warmup_steps": 10,
}
logger = Logger(config)
if torch.cuda.is_available() and config["use_cuda"]:
device = torch.device("cuda")
logger.print("Using GPU")
else:
device = torch.device("cpu")
logger.print("Using CPU")
sim = LearnedSimulator(
mesh_dimensions=3,
latent_dim=latent_dim,
nmessage_passing_steps=10,
nmlp_layers=2,
input_seq_length=config["data_config"]["input_seq_length"],
mlp_hidden_dim=latent_dim,
device=device,
)
trainer = Trainer(sim=sim, logger=logger, config=config, device=device)

return trainer


def test_simulator(init_trainer):
trainer = init_trainer
def test_simulator():
trainer = create_trainer("config/test_train.yaml")
trainer.train()

0 comments on commit 207e1fa

Please sign in to comment.