diff --git a/dpgen2/flow/block.py b/dpgen2/flow/block.py index 9a5c61ed..043c633a 100644 --- a/dpgen2/flow/block.py +++ b/dpgen2/flow/block.py @@ -42,6 +42,7 @@ def block_cl( "type_map" : InputParameter(), "numb_models": InputParameter(type=int), "template_script" : InputParameter(), + "train_config" : InputParameter(), "lmp_task_grp" : InputParameter(), "conf_selector" : InputParameter(), "fp_inputs" : InputParameter(), @@ -68,6 +69,7 @@ def block_cl( name + '-prep-run-dp-train', template = prep_run_dp_train_op, parameters={ + "train_config" : block_steps.inputs.parameters['train_config'], "numb_models": block_steps.inputs.parameters['numb_models'], "template_script": block_steps.inputs.parameters['template_script'], }, diff --git a/dpgen2/flow/loop.py b/dpgen2/flow/loop.py index c7023526..eded09f8 100644 --- a/dpgen2/flow/loop.py +++ b/dpgen2/flow/loop.py @@ -113,6 +113,7 @@ def loop ( "type_map" : InputParameter(), "numb_models": InputParameter(type=int), "template_script" : InputParameter(), + "train_config" : InputParameter(), "lmp_task_grp" : InputParameter(), "conf_selector" : InputParameter(), "fp_inputs" : InputParameter(), @@ -146,6 +147,7 @@ def loop ( "type_map" : steps.inputs.parameters["type_map"], "numb_models" : steps.inputs.parameters["numb_models"], "template_script" : steps.inputs.parameters["template_script"], + "train_config" : steps.inputs.parameters["train_config"], "lmp_task_grp" : steps.inputs.parameters["lmp_task_grp"], "conf_selector" : steps.inputs.parameters["conf_selector"], "fp_inputs" : steps.inputs.parameters["fp_inputs"], @@ -198,6 +200,7 @@ def loop ( "type_map" : steps.inputs.parameters["type_map"], "numb_models" : steps.inputs.parameters["numb_models"], "template_script" : steps.inputs.parameters["template_script"], + "train_config" : steps.inputs.parameters["train_config"], "lmp_task_grp" : scheduler_step.outputs.parameters["lmp_task_grp"], "conf_selector" : scheduler_step.outputs.parameters["conf_selector"], "fp_inputs" : steps.inputs.parameters["fp_inputs"], @@ -245,6 +248,7 @@ def dpgen( "type_map" : InputParameter(), "numb_models": InputParameter(type=int), "template_script" : InputParameter(), + "train_config" : InputParameter(), "fp_inputs" : InputParameter(), "exploration_scheduler" : InputParameter(), }, @@ -305,6 +309,7 @@ def dpgen( "type_map" : steps.inputs.parameters['type_map'], "numb_models" : steps.inputs.parameters['numb_models'], "template_script" : steps.inputs.parameters['template_script'], + "train_config" : steps.inputs.parameters['train_config'], "lmp_task_grp" : scheduler_step.outputs.parameters['lmp_task_grp'], "conf_selector" : scheduler_step.outputs.parameters['conf_selector'], "fp_inputs" : steps.inputs.parameters['fp_inputs'], diff --git a/dpgen2/flow/prep_run_dp_train.py b/dpgen2/flow/prep_run_dp_train.py index 00181d29..bc268b0c 100644 --- a/dpgen2/flow/prep_run_dp_train.py +++ b/dpgen2/flow/prep_run_dp_train.py @@ -29,6 +29,8 @@ def prep_run_dp_train( name : str, prep_train_op : OP, run_train_op : OP, + prep_train_image : str = "dflow:v1.0", + run_train_image : str = "dflow:v1.0", ): train_steps = Steps( name=name, @@ -36,6 +38,7 @@ def prep_run_dp_train( parameters={ "numb_models": InputParameter(type=int), "template_script" : InputParameter(), + "train_config" : InputParameter(), }, artifacts={ "init_models" : InputArtifact(), @@ -56,7 +59,7 @@ def prep_run_dp_train( 'prep-train', template=PythonOPTemplate( prep_train_op, - image="dflow:v1.0", + image=prep_train_image, output_artifact_archive={ "task_paths": None }, @@ -75,7 +78,7 @@ def prep_run_dp_train( 'run-train', template=PythonOPTemplate( run_train_op, - image="dflow:v1.0", + image=run_train_image, slices = Slices( "{{item}}", input_parameter = ["task_name"], @@ -85,6 +88,7 @@ def prep_run_dp_train( python_packages = "..//dpgen2", ), parameters={ + "config" : train_steps.inputs.parameters["train_config"], "task_name" : prep_train.outputs.parameters["task_names"], }, artifacts={ diff --git a/dpgen2/op/run_dp_train.py b/dpgen2/op/run_dp_train.py index 018178b1..3582c532 100644 --- a/dpgen2/op/run_dp_train.py +++ b/dpgen2/op/run_dp_train.py @@ -2,11 +2,28 @@ OP, OPIO, OPIOSign, - Artifact + Artifact, + TransientError, + FatalError, +) +import os, json, dpdata +from typing import ( + Tuple, + List, ) -import os, json -from typing import Tuple, List, Set from pathlib import Path +from dpgen2.constants import ( + train_task_pattern, + train_script_name, +) +from dpgen2.utils.run_command import run_command +from dargs import ( + dargs, + Argument, + Variant, + ArgumentEncoder, +) + class RunDPTrain(OP): r"""Execute a DP training task. Train and freeze a DP model. @@ -21,10 +38,11 @@ class RunDPTrain(OP): @classmethod def get_input_sign(cls): return OPIOSign({ + "config" : dict, "task_name" : str, "task_path" : Artifact(Path), "init_model" : Artifact(Path), - "init_data" : Artifact(Set[Path]), + "init_data" : Artifact(List[Path]), "iter_data" : Artifact(List[Path]), }) @@ -49,10 +67,11 @@ def execute( ip : dict Input dict with components: + - `task_name`: (`dict`) The config of training task. Check `RunDPTrain._training_args` for definitions. - `task_name`: (`str`) The name of training task. - `task_path`: (`Artifact(Path)`) The path that contains all input files prepareed by `PrepDPTrain`. - `init_model`: (`Artifact(Path)`) A frozen model to initialize the training. - - `init_data`: (`Artifact(set[Path])`) Initial training data. + - `init_data`: (`Artifact(List[Path])`) Initial training data. - `iter_data`: (`Artifact(List[Path])`) Training data generated in the DPGEN iterations. Returns @@ -69,5 +88,210 @@ def execute( FatalError On the failure of training or freezing. Human intervention needed. """ - raise NotImplementedError + config = ip['config'] if ip['config'] is not None else {} + config = RunDPTrain.normalize_config(config) + task_name = ip['task_name'] + task_path = ip['task_path'] + init_model = ip['init_model'] + init_data = ip['init_data'] + iter_data = ip['iter_data'] + + # update the input script + input_script = Path(task_path)/train_script_name + with open(input_script) as fp: + train_dict = json.load(fp) + if "systems" in train_dict['training']: + major_version = "1" + else: + major_version = "2" + + # auto prob style + do_init_model = RunDPTrain.decide_init_model(config, init_model, init_data, iter_data) + auto_prob_str = "prob_sys_size" + if do_init_model: + old_ratio = config['init_model_old_ratio'] + numb_old = len(init_data) + len(iter_data[:-1]) + auto_prob_str = f"prob_sys_size; 0:{numb_old}:{old_ratio}; {numb_old}:{numb_old+1}:{1.-old_ratio:g}" + + # update the input dict + train_dict = RunDPTrain.write_data_to_input_script( + train_dict, init_data, iter_data, auto_prob_str, major_version) + train_dict = RunDPTrain.write_other_to_input_script( + train_dict, config, do_init_model, major_version) + + # mkdir output dir + work_dir = Path(task_name) + cwd = os.getcwd() + work_dir.mkdir(exist_ok=True, parents=True) + os.chdir(work_dir) + # open log + fplog = open('train.log', 'w') + def clean_before_quit(): + fplog.close() + os.chdir(cwd) + + # dump train script + with open(train_script_name, 'w') as fp: + json.dump(train_dict, fp, indent=4) + + # train model + if do_init_model: + command = ['dp', 'train', '--init-frz-model', str(init_model), train_script_name] + else: + command = ['dp', 'train', train_script_name] + ret, out, err = run_command(command) + if ret != 0: + clean_before_quit() + raise FatalError('dp train failed') + fplog.write(out) + + # freeze model + ret, out, err = run_command(['dp', 'freeze', '-o', 'frozen_model.pb']) + if ret != 0: + clean_before_quit() + raise FatalError('dp freeze failed') + fplog.write(out) + + clean_before_quit() + + return OPIO({ + "script" : work_dir / train_script_name, + "model" : work_dir / "frozen_model.pb", + "lcurve" : work_dir / "lcurve.out", + "log" : work_dir / "train.log", + }) + + + @staticmethod + def write_data_to_input_script( + idict : dict, + init_data : List[Path], + iter_data : List[Path], + auto_prob_str : str = "prob_sys_size", + major_version : str = "1", + ): + odict = idict.copy() + data_list = [str(ii) for ii in init_data] + [str(ii) for ii in iter_data] + if major_version == "1": + # v1 behavior + odict['training']['systems'] = data_list + odict['training']['batch_size'] = "auto" + odict['training']['auto_prob_style'] = auto_prob_str + elif major_version == "2": + # v2 behavior + odict['training']['training_data']['systems'] = data_list + odict['training']['training_data']['batch_size'] = "auto" + odict['training']['training_data']['auto_prob'] = auto_prob_str + odict['training'].pop('validation_data', None) + else: + raise RuntimeError('unsupported DeePMD-kit major version', major_version) + return odict + + @staticmethod + def write_other_to_input_script( + idict, + config, + do_init_model, + major_version : str = "1", + ): + odict = idict.copy() + odict['training']['disp_file'] = "lcurve.out" + if do_init_model: + odict['learning_rate']['start_lr'] = config['init_model_start_lr'] + odict['loss']['start_pref_e'] = config['init_model_start_pref_e'] + odict['loss']['start_pref_f'] = config['init_model_start_pref_f'] + odict['loss']['start_pref_v'] = config['init_model_start_pref_v'] + if major_version == "1": + odict['training']['stop_batch'] = config['init_model_numb_steps'] + elif major_version == "2": + odict['training']['numb_steps'] = config['init_model_numb_steps'] + else: + raise RuntimeError('unsupported DeePMD-kit major version', major_version) + return odict + + @staticmethod + def decide_init_model( + config, + init_model, + init_data, + iter_data, + ): + do_init_model = False + # decide if we do init-model + ## cases we do definitely not + if init_model is None or \ + iter_data is None or \ + len(iter_data) == 0 : + do_init_model = False + ## cases controlled by the policy + else: + if config['init_model_policy'] == 'no': + do_init_model = False + elif config['init_model_policy'] == 'yes': + do_init_model = True + elif 'old_data_larger_than' in config['init_model_policy']: + old_data_size_level = int(config['init_model_policy'].split(':')[-1]) + init_data_size = _get_data_size_of_all_systems(init_data) + iter_data_old_size = _get_data_size_of_all_mult_sys(iter_data[:-1]) + old_data_size = init_data_size + iter_data_old_size + if old_data_size > old_data_size_level: + do_init_model = True + return do_init_model + + + @staticmethod + def _training_args(): + doc_init_model_prolicy = "The policy of init-model training. It can be\n\n\ + - 'no': No init-model training. Traing from scratch.\n\n\ + - 'yes': Do init-model training.\n\n\ + - 'old_data_larger_than:XXX': Do init-model if the training data size of the previous model is larger than XXX. XXX is an int number." + doc_init_model_old_ratio = "The frequency ratio of old data over new data" + doc_init_model_numb_steps = "The number of training steps when init-model" + doc_init_model_start_lr = "The start learning rate when init-model" + doc_init_model_start_pref_e = "The start energy prefactor in loss when init-model" + doc_init_model_start_pref_f = "The start force prefactor in loss when init-model" + doc_init_model_start_pref_v = "The start virial prefactor in loss when init-model" + + return [ + Argument("init_model_policy", str, optional=True, default='no', doc=doc_init_model_prolicy), + Argument("init_model_old_ratio", float, optional=True, default=0.9, doc=doc_init_model_old_ratio), + Argument("init_model_numb_steps", int, optional=True, default=400000, doc=doc_init_model_numb_steps, alias = ['init_model_stop_batch']), + Argument("init_model_start_lr", float, optional=True, default=1e-4, doc=doc_init_model_start_lr), + Argument("init_model_start_pref_e", float, optional=True, default=0.1, doc=doc_init_model_start_pref_e), + Argument("init_model_start_pref_f", float, optional=True, default=100, doc=doc_init_model_start_pref_f), + Argument("init_model_start_pref_v", float, optional=True, default=0.0, doc=doc_init_model_start_pref_v), + ] + + + @staticmethod + def normalize_config(data = {}): + ta = RunDPTrain._training_args() + + base = Argument("base", dict, ta) + data = base.normalize_value(data, trim_pattern="_*") + base.check_value(data, strict=True) + + return data + + +def _get_data_size_of_system(data_dir): + ss = dpdata.System(data_dir, fmt='deepmd/npy') + return ss.get_nframes() + +def _get_data_size_of_all_systems(data_dirs): + count = 0 + for ii in data_dirs: + count += _get_data_size_of_system(ii) + return count + +def _get_data_size_of_mult_sys(data_dir): + ms = dpdata.MultiSystems() + ms.from_deepmd_npy(data_dir) + return ms.get_nframes() + +def _get_data_size_of_all_mult_sys(data_dirs): + count = 0 + for ii in data_dirs: + count += _get_data_size_of_mult_sys(ii) + return count diff --git a/tests/fake_data_set.py b/tests/fake_data_set.py new file mode 100644 index 00000000..bc5f78bc --- /dev/null +++ b/tests/fake_data_set.py @@ -0,0 +1,31 @@ +import dpdata +import numpy as np + +def fake_system( + nframes, + natoms, + atom_name = 'foo', +): + ss = dpdata.LabeledSystem() + ss.data['atom_names'] = [atom_name] + ss.data['atom_numbs'] = [natoms] + ss.data['atom_types'] = [0 for ii in range(natoms)] + ss.data['cells'] = np.zeros([nframes, 3, 3]) + ss.data['coords'] = np.zeros([nframes, natoms, 3]) + ss.data['forces'] = np.zeros([nframes, natoms, 3]) + ss.data['energies'] = np.zeros([nframes]) + return ss + +def fake_multi_sys( + nframs, + natoms, + natom_name = 'foo', +): + nsys = len(nframs) + ms = dpdata.MultiSystems() + for ii in range(nsys): + ss = fake_system(nframs[ii], natoms[ii], natom_name) + ms.append(ss) + return ms + + diff --git a/tests/test_block_cl.py b/tests/test_block_cl.py index 610fd0b9..e022dbdf 100644 --- a/tests/test_block_cl.py +++ b/tests/test_block_cl.py @@ -158,6 +158,7 @@ def test(self): "type_map" : self.type_map, "numb_models" : self.numb_models, "template_script" : self.template_script, + "train_config" : {}, "lmp_task_grp" : self.task_group_list, "conf_selector" : self.conf_selector, 'fp_inputs' : self.vasp_inputs, diff --git a/tests/test_loop.py b/tests/test_loop.py index 745f65cb..83a1ea65 100644 --- a/tests/test_loop.py +++ b/tests/test_loop.py @@ -145,7 +145,7 @@ def _setUp_data(self): (ii/'a').write_text('data a') (ii/'b').write_text('data b') self.init_data = upload_artifact(tmp_init_data) - self.path_init_data = set(tmp_init_data) + self.path_init_data = tmp_init_data self.iter_data = upload_artifact([]) self.path_iter_data = None @@ -204,6 +204,7 @@ def test(self): "type_map" : self.type_map, "numb_models" : self.numb_models, "template_script" : self.template_script, + "train_config" : {}, 'fp_inputs' : self.vasp_inputs, "exploration_scheduler" : self.scheduler, }, @@ -227,8 +228,6 @@ def test(self): scheduler = jsonpickle.decode(step.outputs.parameters['exploration_scheduler'].value) download_artifact(step.outputs.artifacts["iter_data"], path = 'iter_data') download_artifact(step.outputs.artifacts["models"], path = Path('models')/self.name) - print(scheduler) - print(scheduler.get_stage(), scheduler.get_iteration()) self.assertEqual(scheduler.get_stage(), 2) self.assertEqual(scheduler.get_iteration(), 1) diff --git a/tests/test_prep_run_dp_train.py b/tests/test_prep_run_dp_train.py index c6c2120d..95a99ecc 100644 --- a/tests/test_prep_run_dp_train.py +++ b/tests/test_prep_run_dp_train.py @@ -161,7 +161,7 @@ def setUp(self): self.init_models = make_mocked_init_models(self.numb_models) tmp_init_data = make_mocked_init_data() - self.init_data = set(tmp_init_data) + self.init_data = tmp_init_data tmp_iter_data = [Path('iter_data/foo'), Path('iter_data/bar')] for ii in tmp_iter_data: @@ -193,6 +193,7 @@ def test(self): for ii in range(3): run = MockedRunDPTrain() ip = OPIO({ + "config" : {}, "task_name": self.task_names[ii], "task_path": self.task_paths[ii], "init_model" : self.init_models[ii], @@ -233,7 +234,7 @@ def setUp (self) : (ii/'a').write_text('data a') (ii/'b').write_text('data b') self.iter_data = upload_artifact(tmp_iter_data) - self.path_iter_data = set(tmp_iter_data) + self.path_iter_data = tmp_iter_data self.template_script = mocked_template_script.copy() @@ -267,6 +268,7 @@ def test_train(self): parameters = { "numb_models" : self.numb_models, "template_script" : self.template_script, + "train_config" : {}, }, artifacts = { "init_models" : self.init_models, diff --git a/tests/test_run_dp_train.py b/tests/test_run_dp_train.py new file mode 100644 index 00000000..d812c004 --- /dev/null +++ b/tests/test_run_dp_train.py @@ -0,0 +1,480 @@ +import context +import numpy as np +import unittest, json, shutil, os +from pathlib import Path +from dpgen2.op.run_dp_train import RunDPTrain +from fake_data_set import fake_system, fake_multi_sys +from dpgen2.constants import ( + train_task_pattern, + train_script_name, +) +from mock import patch, call +from dflow.python import ( + OP, + OPIO, + OPIOSign, + Artifact, + TransientError, + FatalError, +) + +idict_v2 = { + 'training' : { + 'training_data':{ + "systems" : [], + }, + 'validation_data':{ + "systems" : [], + }, + }, + "learning_rate" : { + "start_lr" : 1., + }, + "loss" : { + "start_pref_e" : 1., + "start_pref_f" : 1., + "start_pref_v" : 1., + }, +} +expected_odict_v2 = { + 'training' : { + 'training_data':{ + "systems" : [ + 'init/data-0', 'init/data-1', 'data-0', 'data-1' + ], + "batch_size" : "auto", + "auto_prob" : "prob_sys_size", + }, + "disp_file" : "lcurve.out", + }, + "learning_rate" : { + "start_lr" : 1., + }, + "loss" : { + "start_pref_e" : 1., + "start_pref_f" : 1., + "start_pref_v" : 1., + }, +} +expected_init_model_odict_v2 = { + "training" : { + "training_data" : { + "systems" : [ + 'init/data-0', 'init/data-1', 'data-0', 'data-1' + ], + "batch_size" : "auto", + "auto_prob" : "prob_sys_size; 0:3:0.9; 3:4:0.1", + }, + "disp_file" : "lcurve.out", + "numb_steps" : 400000, + }, + "learning_rate" : { + "start_lr" : 1e-4, + }, + "loss" : { + "start_pref_e" : 0.1, + "start_pref_f" : 100, + "start_pref_v" : 0.0, + }, +} + +idict_v1 = { + 'training' : { + "systems" : [], + }, + "learning_rate" : { + "start_lr" : 1., + }, + "loss" : { + "start_pref_e" : 1., + "start_pref_f" : 1., + "start_pref_v" : 1., + }, +} +expected_odict_v1 = { + 'training' : { + "systems" : [ + 'init/data-0', 'init/data-1', 'data-0', 'data-1' + ], + "batch_size" : "auto", + "auto_prob_style" : "prob_sys_size", + "disp_file" : "lcurve.out", + }, + "learning_rate" : { + "start_lr" : 1., + }, + "loss" : { + "start_pref_e" : 1., + "start_pref_f" : 1., + "start_pref_v" : 1., + }, +} +expected_init_model_odict_v1 = { + "training" : { + "systems" : [ + 'init/data-0', 'init/data-1', 'data-0', 'data-1' + ], + "batch_size" : "auto", + "auto_prob_style" : "prob_sys_size; 0:3:0.9; 3:4:0.1", + "disp_file" : "lcurve.out", + "stop_batch" : 400000, + }, + "learning_rate" : { + "start_lr" : 1e-4, + }, + "loss" : { + "start_pref_e" : 0.1, + "start_pref_f" : 100, + "start_pref_v" : 0.0, + }, +} + + +class TestRunDPTrain(unittest.TestCase): + def setUp(self): + self.atom_name = 'foo' + self.nframes_0 = [2, 5, 3] + self.natoms_0 = [4, 3, 4] + self.nframes_1 = [3, 4, 2] + self.natoms_1 = [5, 3, 2] + ms_0 = fake_multi_sys( self.nframes_0, self.natoms_0, self.atom_name) + ms_1 = fake_multi_sys( self.nframes_1, self.natoms_1, self.atom_name) + ms_0.to_deepmd_npy('data-0') + ms_1.to_deepmd_npy('data-1') + self.iter_data = [Path('data-0'), Path('data-1')] + + self.init_nframs_0 = 3 + self.init_natoms_0 = 5 + self.init_nframs_1 = 4 + self.init_natoms_1 = 2 + ss_0 = fake_system(self.init_nframs_0, self.init_natoms_0, self.atom_name) + ss_1 = fake_system(self.init_nframs_1, self.init_natoms_1, self.atom_name) + ss_0.to_deepmd_npy('init/data-0') + ss_1.to_deepmd_npy('init/data-1') + self.init_data = [Path('init/data-0'), Path('init/data-1')] + self.init_data = sorted(list(self.init_data)) + + self.init_model = Path('bar.pb') + + self.config = { + "init_model_policy" : "no", + "init_model_old_ratio" : 0.9, + "init_model_numb_steps" : 400000, + "init_model_start_lr" : 1e-4, + "init_model_start_pref_e" : 0.1, + "init_model_start_pref_f" : 100, + "init_model_start_pref_v" : 0.0, + } + self.config = RunDPTrain.normalize_config(self.config) + + self.old_data_size = self.init_nframs_0 + self.init_nframs_1 + sum(self.nframes_0) + self.task_name = 'task-000' + + def tearDown(self): + for ii in ['init', 'data-000', 'data-001', self.task_name ]: + if Path(ii).exists(): + shutil.rmtree(str(ii)) + + def test_normalize_config(self): + config = self.config + self.assertEqual(config['init_model_policy'], 'no') + self.assertAlmostEqual(config['init_model_old_ratio'], 0.9) + self.assertEqual(config['init_model_numb_steps'], 400000) + self.assertAlmostEqual(config['init_model_start_lr'], 1e-4) + self.assertAlmostEqual(config['init_model_start_pref_e'], 0.1) + self.assertAlmostEqual(config['init_model_start_pref_f'], 100) + self.assertAlmostEqual(config['init_model_start_pref_v'], 0.0) + + + def test_decide_init_model_no_model(self): + do_init_model = RunDPTrain.decide_init_model( + self.config, None, self.init_data, self.iter_data) + self.assertFalse(do_init_model) + + def test_decide_init_model_none_iter_data(self): + do_init_model = RunDPTrain.decide_init_model( + self.config, self.init_model, self.init_data, None) + self.assertFalse(do_init_model) + + def test_decide_init_model_no_iter_data(self): + do_init_model = RunDPTrain.decide_init_model( + self.config, self.init_model, self.init_data, []) + self.assertFalse(do_init_model) + + def test_decide_init_model_config_no(self): + config = self.config.copy() + config['init_model_policy'] = 'no' + do_init_model = RunDPTrain.decide_init_model( + config, self.init_model, self.init_data, self.iter_data) + self.assertFalse(do_init_model) + + def test_decide_init_model_config_yes(self): + config = self.config.copy() + config['init_model_policy'] = 'yes' + do_init_model = RunDPTrain.decide_init_model( + config, self.init_model, self.init_data, self.iter_data) + self.assertTrue(do_init_model) + + def test_decide_init_model_config_larger_than_no(self): + config = self.config.copy() + config['init_model_policy'] = f'old_data_larger_than:{self.old_data_size}' + do_init_model = RunDPTrain.decide_init_model( + config, self.init_model, self.init_data, self.iter_data) + self.assertFalse(do_init_model) + + def test_decide_init_model_config_larger_than_yes(self): + config = self.config.copy() + config['init_model_policy'] = f'old_data_larger_than:{self.old_data_size-1}' + do_init_model = RunDPTrain.decide_init_model( + config, self.init_model, self.init_data, self.iter_data) + self.assertTrue(do_init_model) + + + def test_update_input_dict_v1_init_model(self): + odict = RunDPTrain.write_data_to_input_script( + idict_v1, self.init_data, self.iter_data, auto_prob_str = "prob_sys_size; 0:3:0.9; 3:4:0.1", major_version = "1") + config = self.config.copy() + config['init_model_policy'] = 'yes' + odict = RunDPTrain.write_other_to_input_script( + odict, config, True, major_version = "1") + self.assertDictEqual(odict, expected_init_model_odict_v1) + + def test_update_input_dict_v1(self): + odict = RunDPTrain.write_data_to_input_script( + idict_v1, self.init_data, self.iter_data, auto_prob_str = "prob_sys_size", major_version = "1") + config = self.config.copy() + config['init_model_policy'] = 'no' + odict = RunDPTrain.write_other_to_input_script( + odict, config, False, major_version = "1") + self.assertDictEqual(odict, expected_odict_v1) + + + def test_update_input_dict_v2_init_model(self): + idict = idict_v2 + odict = RunDPTrain.write_data_to_input_script( + idict, self.init_data, self.iter_data, auto_prob_str = "prob_sys_size; 0:3:0.9; 3:4:0.1", major_version = "2") + config = self.config.copy() + config['init_model_policy'] = 'yes' + odict = RunDPTrain.write_other_to_input_script( + odict, config, True, major_version = "2") + self.assertDictEqual(odict, expected_init_model_odict_v2) + + + def test_update_input_dict_v2(self): + idict = idict_v2 + odict = RunDPTrain.write_data_to_input_script( + idict, self.init_data, self.iter_data, auto_prob_str = "prob_sys_size", major_version = "2") + config = self.config.copy() + config['init_model_policy'] = 'no' + odict = RunDPTrain.write_other_to_input_script( + odict, config, False, major_version = "2") + self.assertDictEqual(odict, expected_odict_v2) + + + @patch('dpgen2.op.run_dp_train.run_command') + def test_exec_v1(self, mocked_run): + mocked_run.side_effect = [ (0, 'foo\n', ''), (0, 'bar\n', '') ] + + config = self.config.copy() + config['init_model_policy'] = 'no' + + task_path = "input-000" + Path(task_path).mkdir(exist_ok=True) + with open(Path(task_path)/train_script_name, 'w') as fp: + json.dump(idict_v1, fp, indent=4) + task_name = self.task_name + work_dir = Path(task_name) + + ptrain = RunDPTrain() + out = ptrain.execute( + OPIO({ + "config" : config, + "task_name" : task_name, + "task_path" : Path(task_path), + "init_model" : Path(self.init_model), + "init_data" : [Path(ii) for ii in self.init_data], + "iter_data" : [Path(ii) for ii in self.iter_data], + }) + ) + self.assertEqual(out['script'], work_dir/train_script_name) + self.assertEqual(out['model'], work_dir/'frozen_model.pb') + self.assertEqual(out['lcurve'], work_dir/'lcurve.out') + self.assertEqual(out['log'], work_dir/'train.log') + + calls = [ + call(['dp', 'train', train_script_name]), + call(['dp', 'freeze', '-o', 'frozen_model.pb']), + ] + mocked_run.assert_has_calls(calls) + + self.assertTrue(work_dir.is_dir()) + self.assertTrue(out['log'].is_file()) + self.assertEqual(out['log'].read_text(), 'foo\nbar\n') + with open(out['script']) as fp: + jdata = json.load(fp) + self.assertDictEqual(jdata, expected_odict_v1) + + + @patch('dpgen2.op.run_dp_train.run_command') + def test_exec_v2(self, mocked_run): + mocked_run.side_effect = [ (0, 'foo\n', ''), (0, 'bar\n', '') ] + + config = self.config.copy() + config['init_model_policy'] = 'no' + + task_path = "input-000" + Path(task_path).mkdir(exist_ok=True) + with open(Path(task_path)/train_script_name, 'w') as fp: + json.dump(idict_v2, fp, indent=4) + task_name = self.task_name + work_dir = Path(task_name) + + ptrain = RunDPTrain() + out = ptrain.execute( + OPIO({ + "config" : config, + "task_name" : task_name, + "task_path" : Path(task_path), + "init_model" : Path(self.init_model), + "init_data" : [Path(ii) for ii in self.init_data], + "iter_data" : [Path(ii) for ii in self.iter_data], + }) + ) + self.assertEqual(out['script'], work_dir/train_script_name) + self.assertEqual(out['model'], work_dir/'frozen_model.pb') + self.assertEqual(out['lcurve'], work_dir/'lcurve.out') + self.assertEqual(out['log'], work_dir/'train.log') + + calls = [ + call(['dp', 'train', train_script_name]), + call(['dp', 'freeze', '-o', 'frozen_model.pb']), + ] + mocked_run.assert_has_calls(calls) + + self.assertTrue(work_dir.is_dir()) + self.assertTrue(out['log'].is_file()) + self.assertEqual(out['log'].read_text(), 'foo\nbar\n') + with open(out['script']) as fp: + jdata = json.load(fp) + self.assertDictEqual(jdata, expected_odict_v2) + + + @patch('dpgen2.op.run_dp_train.run_command') + def test_exec_v2_init_model(self, mocked_run): + mocked_run.side_effect = [ (0, 'foo\n', ''), (0, 'bar\n', '') ] + + config = self.config.copy() + config['init_model_policy'] = 'yes' + + task_path = "input-000" + Path(task_path).mkdir(exist_ok=True) + with open(Path(task_path)/train_script_name, 'w') as fp: + json.dump(idict_v2, fp, indent=4) + task_name = self.task_name + work_dir = Path(task_name) + + ptrain = RunDPTrain() + out = ptrain.execute( + OPIO({ + "config" : config, + "task_name" : task_name, + "task_path" : Path(task_path), + "init_model" : Path(self.init_model), + "init_data" : [Path(ii) for ii in self.init_data], + "iter_data" : [Path(ii) for ii in self.iter_data], + }) + ) + self.assertEqual(out['script'], work_dir/train_script_name) + self.assertEqual(out['model'], work_dir/'frozen_model.pb') + self.assertEqual(out['lcurve'], work_dir/'lcurve.out') + self.assertEqual(out['log'], work_dir/'train.log') + + calls = [ + call(['dp', 'train', '--init-frz-model', str(self.init_model), train_script_name]), + call(['dp', 'freeze', '-o', 'frozen_model.pb']), + ] + mocked_run.assert_has_calls(calls) + + self.assertTrue(work_dir.is_dir()) + self.assertTrue(out['log'].is_file()) + self.assertEqual(out['log'].read_text(), 'foo\nbar\n') + with open(out['script']) as fp: + jdata = json.load(fp) + self.assertDictEqual(jdata, expected_init_model_odict_v2) + + + @patch('dpgen2.op.run_dp_train.run_command') + def test_exec_v2_train_error(self, mocked_run): + mocked_run.side_effect = [ (1, '', 'foo\n'), (0, 'bar\n', '') ] + + config = self.config.copy() + config['init_model_policy'] = 'no' + + task_path = "input-000" + Path(task_path).mkdir(exist_ok=True) + with open(Path(task_path)/train_script_name, 'w') as fp: + json.dump(idict_v2, fp, indent=4) + task_name = self.task_name + work_dir = Path(task_name) + + ptrain = RunDPTrain() + with self.assertRaises(FatalError) as ee: + out = ptrain.execute( + OPIO({ + "config" : config, + "task_name" : task_name, + "task_path" : Path(task_path), + "init_model" : Path(self.init_model), + "init_data" : [Path(ii) for ii in self.init_data], + "iter_data" : [Path(ii) for ii in self.iter_data], + }) + ) + + calls = [ + call(['dp', 'train', train_script_name]), + ] + mocked_run.assert_has_calls(calls) + + self.assertTrue(work_dir.is_dir()) + with open(work_dir/train_script_name) as fp: + jdata = json.load(fp) + self.assertDictEqual(jdata, expected_odict_v2) + + + @patch('dpgen2.op.run_dp_train.run_command') + def test_exec_v2_freeze_error(self, mocked_run): + mocked_run.side_effect = [ (0, 'foo\n', ''), (1, '', 'bar\n') ] + + config = self.config.copy() + config['init_model_policy'] = 'no' + + task_path = "input-000" + Path(task_path).mkdir(exist_ok=True) + with open(Path(task_path)/train_script_name, 'w') as fp: + json.dump(idict_v2, fp, indent=4) + task_name = self.task_name + work_dir = Path(task_name) + + ptrain = RunDPTrain() + with self.assertRaises(FatalError) as ee: + out = ptrain.execute( + OPIO({ + "config" : config, + "task_name" : task_name, + "task_path" : Path(task_path), + "init_model" : Path(self.init_model), + "init_data" : [Path(ii) for ii in self.init_data], + "iter_data" : [Path(ii) for ii in self.iter_data], + }) + ) + + calls = [ + call(['dp', 'train', train_script_name]), + call(['dp', 'freeze', '-o', 'frozen_model.pb']), + ] + mocked_run.assert_has_calls(calls) + + self.assertTrue(work_dir.is_dir()) + with open(work_dir/train_script_name) as fp: + jdata = json.load(fp) + self.assertDictEqual(jdata, expected_odict_v2)