From 88ab14871c14888d1495007573f0f270cb04f333 Mon Sep 17 00:00:00 2001 From: wildsnowman Date: Wed, 7 Sep 2022 11:02:52 +0900 Subject: [PATCH 1/2] use multiple split settings on creating the data --- benchmarks/set_matching_pytorch/train_sm.py | 25 ++++---------- benchmarks/set_matching_pytorch/train_we.py | 19 +++-------- shift15m/datasets/outfitfeature.py | 38 ++++++--------------- 3 files changed, 21 insertions(+), 61 deletions(-) diff --git a/benchmarks/set_matching_pytorch/train_sm.py b/benchmarks/set_matching_pytorch/train_sm.py index 8257c20..7575bd3 100644 --- a/benchmarks/set_matching_pytorch/train_sm.py +++ b/benchmarks/set_matching_pytorch/train_sm.py @@ -29,7 +29,7 @@ def get_train_val_loader( ) -> Tuple[Any, Any]: label_dir_name = f"{train_year}-{valid_year}-split{split}" - iqon_outfits = IQONOutfits(root=root, split=split) + iqon_outfits = IQONOutfits(root=root) train, valid = iqon_outfits.get_trainval_data(label_dir_name) feature_dir = iqon_outfits.feature_dir @@ -118,18 +118,14 @@ def eval_process(engine, batch): # early stopping handler = EarlyStopping( - patience=5, - score_function=exfn.stopping_score_function, - trainer=trainer, + patience=5, score_function=exfn.stopping_score_function, trainer=trainer, ) valid_evaluator.add_event_handler(Events.COMPLETED, handler) # lr scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.7) trainer.add_event_handler( - Events.EPOCH_COMPLETED, - exfn.lr_step, - lr_scheduler, + Events.EPOCH_COMPLETED, exfn.lr_step, lr_scheduler, ) # logging @@ -170,16 +166,11 @@ def eval_process(engine, batch): save_handler=DiskSaver(args.log_dir, require_empty=False), ) trainer.add_event_handler( - Events.EPOCH_COMPLETED(every=args.checkpoint_interval), - trainer_checkpointer, + Events.EPOCH_COMPLETED(every=args.checkpoint_interval), trainer_checkpointer, ) model_checkpointer = ModelCheckpoint( - args.log_dir, - "modelckpt", - n_saved=1, - create_dir=True, - require_empty=False, + args.log_dir, "modelckpt", n_saved=1, create_dir=True, require_empty=False, ) trainer.add_event_handler( Events.EPOCH_COMPLETED(every=args.checkpoint_interval), @@ -206,11 +197,7 @@ def eval_process(engine, batch): parser.add_argument( "--model", "-m", - choices=[ - "set_matching_sim", - "cov_mean", - "cov_max", - ], + choices=["set_matching_sim", "cov_mean", "cov_max",], default="cov_max", ) parser.add_argument("--batchsize", "-b", type=int, default=32) diff --git a/benchmarks/set_matching_pytorch/train_we.py b/benchmarks/set_matching_pytorch/train_we.py index 1af9ac8..177b770 100644 --- a/benchmarks/set_matching_pytorch/train_we.py +++ b/benchmarks/set_matching_pytorch/train_we.py @@ -33,7 +33,7 @@ def get_train_val_loader( ) -> Tuple[Any, Any]: label_dir_name = f"{train_year}-{valid_year}-split{split}" - iqon_outfits = IQONOutfits(root=root, split=split) + iqon_outfits = IQONOutfits(root=root) train, valid = iqon_outfits.get_trainval_data(label_dir_name) feature_dir = iqon_outfits.feature_dir @@ -125,18 +125,14 @@ def eval_process(engine, batch): # early stopping handler = EarlyStopping( - patience=5, - score_function=exfn.stopping_score_function, - trainer=trainer, + patience=5, score_function=exfn.stopping_score_function, trainer=trainer, ) valid_evaluator.add_event_handler(Events.COMPLETED, handler) # lr scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.7) trainer.add_event_handler( - Events.EPOCH_COMPLETED, - exfn.lr_step, - lr_scheduler, + Events.EPOCH_COMPLETED, exfn.lr_step, lr_scheduler, ) # logging @@ -177,16 +173,11 @@ def eval_process(engine, batch): save_handler=DiskSaver(args.log_dir, require_empty=False), ) trainer.add_event_handler( - Events.EPOCH_COMPLETED(every=args.checkpoint_interval), - trainer_checkpointer, + Events.EPOCH_COMPLETED(every=args.checkpoint_interval), trainer_checkpointer, ) model_checkpointer = ModelCheckpoint( - args.log_dir, - "modelckpt", - n_saved=1, - create_dir=True, - require_empty=False, + args.log_dir, "modelckpt", n_saved=1, create_dir=True, require_empty=False, ) trainer.add_event_handler( Events.EPOCH_COMPLETED(every=args.checkpoint_interval), diff --git a/shift15m/datasets/outfitfeature.py b/shift15m/datasets/outfitfeature.py index 7bccc21..4a82063 100644 --- a/shift15m/datasets/outfitfeature.py +++ b/shift15m/datasets/outfitfeature.py @@ -114,14 +114,10 @@ def __init__( self.root = root self.n_cand_sets = n_cand_sets self.transform_q = FeatureListTransform( - max_set_size=max_set_size_query, - apply_shuffle=False, - apply_padding=True, + max_set_size=max_set_size_query, apply_shuffle=False, apply_padding=True, ) self.transform_a = FeatureListTransform( - max_set_size=max_set_size_answer, - apply_shuffle=False, - apply_padding=True, + max_set_size=max_set_size_answer, apply_shuffle=False, apply_padding=True, ) def __len__(self): @@ -178,11 +174,7 @@ def _read_feature(self, path): class IQONOutfits: - def __init__( - self, - root: str = C.ROOT, - split: int = 0, - ) -> None: + def __init__(self, root: str = C.ROOT, split: int = 0,) -> None: # not used self.root = pathlib.Path(root) self.root.mkdir(parents=True, exist_ok=True) if not (self.root / "iqon_outfits.json").exists(): @@ -191,8 +183,11 @@ def __init__( self._label_dir = self.root / "set_matching/labels" if not self._label_dir.exists(): + print("Making train/val dataset.") self._label_dir.mkdir(parents=True, exist_ok=True) - self._make_trainval_dataset(seed=split) + splits = [0, 1, 2] + for _s in splits: + self._make_trainval_dataset(seed=_s) self._feature_dir = self.root / "features" if not self._feature_dir.exists(): @@ -226,13 +221,8 @@ def _download_outfit_label(self): res.check_returncode() def _make_trainval_dataset( - self, - min_num_categories: int = 4, - min_like_num: int = 50, - seed: int = 0, + self, min_num_categories: int = 4, min_like_num: int = 50, seed: int = 0, ): - print("Make train/val dataset.") - np.random.seed(seed) num_train, num_val, num_test = 30816, 3851, 3851 # max size @@ -285,11 +275,7 @@ def get_test_data(self, label_dir_name: str) -> List[Dict]: return test def get_fitb_data( - self, - label_dir_name: str, - n_comb: int = 1, - n_cands: int = 8, - seed: int = 0, + self, label_dir_name: str, n_comb: int = 1, n_cands: int = 8, seed: int = 0, ) -> List: dir_name = self._label_dir / label_dir_name path = dir_name / f"test_examples_ncomb_{n_comb}_ncands_{n_cands}.json" @@ -299,11 +285,7 @@ def get_fitb_data( return test_examples def _make_test_examples( - self, - path: pathlib.Path, - n_comb: int = 1, - n_cands: int = 8, - seed: int = 0, + self, path: pathlib.Path, n_comb: int = 1, n_cands: int = 8, seed: int = 0, ): print("Make test dataset.") np.random.seed(seed) From d632dfd6c31cde124e8a8eec8b332a9b9ead220b Mon Sep 17 00:00:00 2001 From: wildsnowman Date: Wed, 7 Sep 2022 11:10:18 +0900 Subject: [PATCH 2/2] black --- benchmarks/set_matching_pytorch/train_sm.py | 23 +++++++++++---- benchmarks/set_matching_pytorch/train_we.py | 17 ++++++++--- shift15m/datasets/outfitfeature.py | 31 +++++++++++++++++---- 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/benchmarks/set_matching_pytorch/train_sm.py b/benchmarks/set_matching_pytorch/train_sm.py index 7575bd3..8613534 100644 --- a/benchmarks/set_matching_pytorch/train_sm.py +++ b/benchmarks/set_matching_pytorch/train_sm.py @@ -118,14 +118,18 @@ def eval_process(engine, batch): # early stopping handler = EarlyStopping( - patience=5, score_function=exfn.stopping_score_function, trainer=trainer, + patience=5, + score_function=exfn.stopping_score_function, + trainer=trainer, ) valid_evaluator.add_event_handler(Events.COMPLETED, handler) # lr scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.7) trainer.add_event_handler( - Events.EPOCH_COMPLETED, exfn.lr_step, lr_scheduler, + Events.EPOCH_COMPLETED, + exfn.lr_step, + lr_scheduler, ) # logging @@ -166,11 +170,16 @@ def eval_process(engine, batch): save_handler=DiskSaver(args.log_dir, require_empty=False), ) trainer.add_event_handler( - Events.EPOCH_COMPLETED(every=args.checkpoint_interval), trainer_checkpointer, + Events.EPOCH_COMPLETED(every=args.checkpoint_interval), + trainer_checkpointer, ) model_checkpointer = ModelCheckpoint( - args.log_dir, "modelckpt", n_saved=1, create_dir=True, require_empty=False, + args.log_dir, + "modelckpt", + n_saved=1, + create_dir=True, + require_empty=False, ) trainer.add_event_handler( Events.EPOCH_COMPLETED(every=args.checkpoint_interval), @@ -197,7 +206,11 @@ def eval_process(engine, batch): parser.add_argument( "--model", "-m", - choices=["set_matching_sim", "cov_mean", "cov_max",], + choices=[ + "set_matching_sim", + "cov_mean", + "cov_max", + ], default="cov_max", ) parser.add_argument("--batchsize", "-b", type=int, default=32) diff --git a/benchmarks/set_matching_pytorch/train_we.py b/benchmarks/set_matching_pytorch/train_we.py index 177b770..7cbf06f 100644 --- a/benchmarks/set_matching_pytorch/train_we.py +++ b/benchmarks/set_matching_pytorch/train_we.py @@ -125,14 +125,18 @@ def eval_process(engine, batch): # early stopping handler = EarlyStopping( - patience=5, score_function=exfn.stopping_score_function, trainer=trainer, + patience=5, + score_function=exfn.stopping_score_function, + trainer=trainer, ) valid_evaluator.add_event_handler(Events.COMPLETED, handler) # lr scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.7) trainer.add_event_handler( - Events.EPOCH_COMPLETED, exfn.lr_step, lr_scheduler, + Events.EPOCH_COMPLETED, + exfn.lr_step, + lr_scheduler, ) # logging @@ -173,11 +177,16 @@ def eval_process(engine, batch): save_handler=DiskSaver(args.log_dir, require_empty=False), ) trainer.add_event_handler( - Events.EPOCH_COMPLETED(every=args.checkpoint_interval), trainer_checkpointer, + Events.EPOCH_COMPLETED(every=args.checkpoint_interval), + trainer_checkpointer, ) model_checkpointer = ModelCheckpoint( - args.log_dir, "modelckpt", n_saved=1, create_dir=True, require_empty=False, + args.log_dir, + "modelckpt", + n_saved=1, + create_dir=True, + require_empty=False, ) trainer.add_event_handler( Events.EPOCH_COMPLETED(every=args.checkpoint_interval), diff --git a/shift15m/datasets/outfitfeature.py b/shift15m/datasets/outfitfeature.py index 4a82063..af3ab5d 100644 --- a/shift15m/datasets/outfitfeature.py +++ b/shift15m/datasets/outfitfeature.py @@ -114,10 +114,14 @@ def __init__( self.root = root self.n_cand_sets = n_cand_sets self.transform_q = FeatureListTransform( - max_set_size=max_set_size_query, apply_shuffle=False, apply_padding=True, + max_set_size=max_set_size_query, + apply_shuffle=False, + apply_padding=True, ) self.transform_a = FeatureListTransform( - max_set_size=max_set_size_answer, apply_shuffle=False, apply_padding=True, + max_set_size=max_set_size_answer, + apply_shuffle=False, + apply_padding=True, ) def __len__(self): @@ -174,7 +178,11 @@ def _read_feature(self, path): class IQONOutfits: - def __init__(self, root: str = C.ROOT, split: int = 0,) -> None: # not used + def __init__( + self, + root: str = C.ROOT, + split: int = 0, + ) -> None: # not used self.root = pathlib.Path(root) self.root.mkdir(parents=True, exist_ok=True) if not (self.root / "iqon_outfits.json").exists(): @@ -221,7 +229,10 @@ def _download_outfit_label(self): res.check_returncode() def _make_trainval_dataset( - self, min_num_categories: int = 4, min_like_num: int = 50, seed: int = 0, + self, + min_num_categories: int = 4, + min_like_num: int = 50, + seed: int = 0, ): np.random.seed(seed) num_train, num_val, num_test = 30816, 3851, 3851 # max size @@ -275,7 +286,11 @@ def get_test_data(self, label_dir_name: str) -> List[Dict]: return test def get_fitb_data( - self, label_dir_name: str, n_comb: int = 1, n_cands: int = 8, seed: int = 0, + self, + label_dir_name: str, + n_comb: int = 1, + n_cands: int = 8, + seed: int = 0, ) -> List: dir_name = self._label_dir / label_dir_name path = dir_name / f"test_examples_ncomb_{n_comb}_ncands_{n_cands}.json" @@ -285,7 +300,11 @@ def get_fitb_data( return test_examples def _make_test_examples( - self, path: pathlib.Path, n_comb: int = 1, n_cands: int = 8, seed: int = 0, + self, + path: pathlib.Path, + n_comb: int = 1, + n_cands: int = 8, + seed: int = 0, ): print("Make test dataset.") np.random.seed(seed)