Skip to content

Commit

Permalink
breaking: deprecate set_prefix (#3753)
Browse files Browse the repository at this point in the history
Fix #3477.

Deprecate `set_prefix` in the training parameter and `--set-prefix` in
CLI. It has been broken since several years ago, and dpdata only
supports `set`.

---------

Signed-off-by: Jinzhe Zeng <[email protected]>
  • Loading branch information
njzjz authored May 7, 2024
1 parent 3d87e57 commit dda6bcc
Show file tree
Hide file tree
Showing 88 changed files with 101 additions and 120 deletions.
5 changes: 1 addition & 4 deletions deepmd/entrypoints/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def test(
model: str,
system: str,
datafile: str,
set_prefix: str,
numb_test: int,
rand_seed: Optional[int],
shuffle_test: bool,
Expand All @@ -86,8 +85,6 @@ def test(
system directory
datafile : str
the path to the list of systems to test
set_prefix : str
string prefix of set
numb_test : int
munber of tests to do. 0 means all data.
rand_seed : Optional[int]
Expand Down Expand Up @@ -137,7 +134,7 @@ def test(
tmap = dp.get_type_map() if isinstance(dp, DeepPot) else None
data = DeepmdData(
system,
set_prefix,
set_prefix="set",
shuffle_test=shuffle_test,
type_map=tmap,
sort_atoms=False,
Expand Down
5 changes: 1 addition & 4 deletions deepmd/infer/model_devi.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,6 @@ def make_model_devi(
*,
models: list,
system: str,
set_prefix: str,
output: str,
frequency: int,
real_error: bool = False,
Expand All @@ -367,8 +366,6 @@ def make_model_devi(
A list of paths of models to use for making model deviation
system : str
The path of system to make model deviation calculation
set_prefix : str
The set prefix of the system
output : str
The output file for model deviation results
frequency : int
Expand Down Expand Up @@ -410,7 +407,7 @@ def make_model_devi(
for system in all_sys:
# create data-system
dp_data = DeepmdData(
system, set_prefix, shuffle_test=False, type_map=tmap, sort_atoms=False
system, "set", shuffle_test=False, type_map=tmap, sort_atoms=False
)
if first_dp.get_dim_fparam() > 0:
dp_data.add(
Expand Down
26 changes: 22 additions & 4 deletions deepmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging
import os
import textwrap
import warnings
from collections import (
defaultdict,
)
Expand Down Expand Up @@ -69,6 +70,24 @@ def __call__(self, parser, namespace, values, option_string=None):
setattr(namespace, self.dest, BACKEND_TABLE[values])


class DeprecateAction(argparse.Action):
# See https://stackoverflow.com/a/69052677/9567349 by Ibolit under CC BY-SA 4.0
def __init__(self, *args, **kwargs):
self.call_count = 0
if "help" in kwargs:
kwargs["help"] = f'[DEPRECATED] {kwargs["help"]}'
super().__init__(*args, **kwargs)

def __call__(self, parser, namespace, values, option_string=None):
if self.call_count == 0:
warnings.warn(
f"The option `{option_string}` is deprecated. It will be ignored.",
FutureWarning,
)
delattr(namespace, self.dest)
self.call_count += 1


def main_parser() -> argparse.ArgumentParser:
"""DeePMD-Kit commandline options argument parser.
Expand Down Expand Up @@ -355,9 +374,8 @@ def main_parser() -> argparse.ArgumentParser:
parser_tst.add_argument(
"-S",
"--set-prefix",
default="set",
type=str,
help="(Supported backend: TensorFlow) The set prefix",
action=DeprecateAction,
help="Deprecated argument.",
)
parser_tst.add_argument(
"-n",
Expand Down Expand Up @@ -528,7 +546,7 @@ def main_parser() -> argparse.ArgumentParser:
help="The system directory. Recursively detect systems in this directory.",
)
parser_model_devi.add_argument(
"-S", "--set-prefix", default="set", type=str, help="The set prefix"
"-S", "--set-prefix", action=DeprecateAction, help="Deprecated argument."
)
parser_model_devi.add_argument(
"-o",
Expand Down
4 changes: 2 additions & 2 deletions deepmd/tf/nvnmd/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@
"disp_training": True,
"time_training": True,
"profiling": False,
"training_data": {"systems": "dataset", "set_prefix": "set", "batch_size": 1},
"training_data": {"systems": "dataset", "batch_size": 1},
},
}

Expand Down Expand Up @@ -385,7 +385,7 @@
"disp_training": True,
"time_training": True,
"profiling": False,
"training_data": {"systems": "dataset", "set_prefix": "set", "batch_size": 1},
"training_data": {"systems": "dataset", "batch_size": 1},
},
}

Expand Down
25 changes: 21 additions & 4 deletions deepmd/utils/argcheck.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
import json
import logging
import warnings
from typing import (
Callable,
List,
Expand Down Expand Up @@ -54,6 +55,24 @@ def make_link(content, ref_key):
)


def deprecate_argument_extra_check(key: str) -> Callable[[dict], bool]:
"""Generate an extra check to deprecate an argument in sub fields.
Parameters
----------
key : str
The name of the deprecated argument.
"""

def deprecate_something(data: Optional[dict]):
if data is not None and key in data:
warnings.warn(f"{key} has been removed and takes no effect.", FutureWarning)
data.pop(key)
return True

return deprecate_something


def type_embedding_args():
doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
Expand Down Expand Up @@ -1951,7 +1970,6 @@ def training_data_args(): # ! added by Ziyao: new specification style for data
"This key can be provided with a list that specifies the systems, or be provided with a string "
"by which the prefix of all systems are given and the list of the systems is automatically generated."
)
doc_set_prefix = f"The prefix of the sets in the {link_sys}."
doc_batch_size = f'This key can be \n\n\
- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
- int: all {link_sys} use the same batch size.\n\n\
Expand All @@ -1973,7 +1991,6 @@ def training_data_args(): # ! added by Ziyao: new specification style for data
Argument(
"systems", [List[str], str], optional=False, default=".", doc=doc_systems
),
Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
Argument(
"batch_size",
[List[int], int, str],
Expand Down Expand Up @@ -2009,6 +2026,7 @@ def training_data_args(): # ! added by Ziyao: new specification style for data
sub_fields=args,
sub_variants=[],
doc=doc_training_data,
extra_check=deprecate_argument_extra_check("set_prefix"),
)


Expand All @@ -2019,7 +2037,6 @@ def validation_data_args(): # ! added by Ziyao: new specification style for dat
"This key can be provided with a list that specifies the systems, or be provided with a string "
"by which the prefix of all systems are given and the list of the systems is automatically generated."
)
doc_set_prefix = f"The prefix of the sets in the {link_sys}."
doc_batch_size = f'This key can be \n\n\
- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
- int: all {link_sys} use the same batch size.\n\n\
Expand All @@ -2040,7 +2057,6 @@ def validation_data_args(): # ! added by Ziyao: new specification style for dat
Argument(
"systems", [List[str], str], optional=False, default=".", doc=doc_systems
),
Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
Argument(
"batch_size",
[List[int], int, str],
Expand Down Expand Up @@ -2090,6 +2106,7 @@ def validation_data_args(): # ! added by Ziyao: new specification style for dat
sub_fields=args,
sub_variants=[],
doc=doc_validation_data,
extra_check=deprecate_argument_extra_check("set_prefix"),
)


Expand Down
3 changes: 2 additions & 1 deletion deepmd/utils/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@ def _jcopy(src: Dict[str, Any], dst: Dict[str, Any], keys: Sequence[str]):
list of keys to copy
"""
for k in keys:
dst[k] = src[k]
if k in src:
dst[k] = src[k]


def remove_decay_rate(jdata: Dict[str, Any]):
Expand Down
2 changes: 0 additions & 2 deletions doc/nvnmd/nvnmd.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ The "training" section is defined as
"save_freq": 10000,
"training_data": {
"systems": ["system1_path", "system2_path", "..."],
"set_prefix": "set",
"batch_size": ["batch_size_of_system1", "batch_size_of_system2", "..."]
}
}
Expand All @@ -171,7 +170,6 @@ where items are defined as:
| save_ckpt | path prefix of check point files | a string |
| save_freq | save frequency | a positive integer |
| systems | a list of data directory which contains the dataset | string list |
| set_prefix | the prefix of dataset | a string |
| batch_size | a list of batch size of corresponding dataset | a integer list |

## Training
Expand Down
1 change: 0 additions & 1 deletion doc/train/tensorboard.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ directory by modifying the input script, setting {ref}`tensorboard <training/ten
```json
"training" : {
"systems": ["../data/"],
"set_prefix": "set",
"stop_batch": 1000000,
"batch_size": 1,

Expand Down
1 change: 0 additions & 1 deletion examples/dos/train/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
"systems": [
"../data/heat-221/"
],
"set_prefix": "set",
"batch_size": 1
}
},
Expand Down
1 change: 0 additions & 1 deletion examples/dos/train/input_torch.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
"../data/heat-221-reformat/atomic_system/",
"../data/heat-221-reformat/global_system/"
],
"set_prefix": "set",
"batch_size": 1
}
},
Expand Down
1 change: 0 additions & 1 deletion examples/fparam/train/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
"../data/e3000_i2000/",
"../data/e8000_i2000/"
],
"set_prefix": "set",
"batch_size": 1
},
"stop_batch": 1000000,
Expand Down
1 change: 0 additions & 1 deletion examples/fparam/train/input_aparam.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
"../data/e3000_i2000/",
"../data/e8000_i2000/"
],
"set_prefix": "set",
"batch_size": 1
},
"stop_batch": 1000000,
Expand Down
1 change: 0 additions & 1 deletion examples/nvnmd/train/train_cnn.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
"systems": [
"../data"
],
"set_prefix": "set",
"batch_size": [
1
]
Expand Down
1 change: 0 additions & 1 deletion examples/nvnmd/train/train_qnn.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
"systems": [
"../data"
],
"set_prefix": "set",
"batch_size": [
1
]
Expand Down
1 change: 0 additions & 1 deletion examples/water/dplr/train/dw.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
"data"
],
"batch_size": "auto",
"set_prefix": "set",
"_comment8": "that's all"
},

Expand Down
2 changes: 1 addition & 1 deletion source/lmp/tests/lrmodel.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ node {
dtype: DT_STRING
tensor_shape {
}
string_val: "{\"model\":{\"type_map\":[\"O\",\"H\"],\"descriptor\":{\"type\":\"hybrid\",\"list\":[{\"type\":\"se_e2_a\",\"sel\":[20,40],\"rcut_smth\":0.5,\"rcut\":4.0,\"neuron\":[2,4,8],\"resnet_dt\":false,\"axis_neuron\":8,\"seed\":1,\"activation_function\":\"tanh\",\"type_one_side\":false,\"precision\":\"default\",\"trainable\":true,\"exclude_types\":[],\"set_davg_zero\":false}]},\"fitting_net\":{\"neuron\":[2,2,2],\"resnet_dt\":true,\"seed\":1,\"type\":\"ener\",\"numb_fparam\":0,\"numb_aparam\":0,\"activation_function\":\"tanh\",\"precision\":\"default\",\"trainable\":true,\"rcond\":0.001,\"atom_ener\":[],\"use_aparam_as_mask\":false},\"modifier\":{\"type\":\"dipole_charge\",\"model_name\":\"lrdipole.pb\",\"model_charge_map\":[-8],\"sys_charge_map\":[6,1],\"ewald_h\":1.0,\"ewald_beta\":0.4},\"data_stat_nbatch\":10,\"data_stat_protect\":0.01,\"data_bias_nsample\":10},\"learning_rate\":{\"type\":\"exp\",\"decay_steps\":5000,\"start_lr\":0.001,\"stop_lr\":3.51e-08,\"scale_by_worker\":\"linear\"},\"loss\":{\"type\":\"ener\",\"start_pref_e\":0.02,\"limit_pref_e\":1,\"start_pref_f\":1000,\"limit_pref_f\":1,\"start_pref_v\":0,\"limit_pref_v\":0,\"start_pref_ae\":0.0,\"limit_pref_ae\":0.0,\"start_pref_pf\":0.0,\"limit_pref_pf\":0.0,\"enable_atom_ener_coeff\":false},\"training\":{\"training_data\":{\"systems\":[\"../../4-deep-wannier/data/dpgen.lowd.merged/iter.000016/02.fp/data.012\",\"../../4-deep-wannier/data/dpgen.lowd.merged/iter.000016/02.fp/data.016\",\"../../4-deep-wannier/data/dpgen.lowd.merged/iter.000016/02.fp/data.020\",\"../../4-deep-wannier/data/dpgen.merged/init/00.liq/state.0300.1.00e00.00/vasp/data\",\"../../4-deep-wannier/data/dpgen.merged/init/00.liq/state.0400.1.00e04.00/vasp/data\",\"../../4-deep-wannier/data/dpgen.merged/init/01.liq/state.0300.1.00e00.00/vasp/data\",\"../../4-deep-wannier/data/dpgen.merged/init/01.liq/state.0400.1.00e04.00/vasp/data\",\"../../4-deep-wannier/data/dpgen.merged/iter.000000/02.fp/data.000\",\"../../4-deep-wannier/data/dpgen.merged/iter.000001/02.fp/data.001\",\"../../4-deep-wannier/data/dpgen.merged/iter.000002/02.fp/data.002\",\"../../4-deep-wannier/data/dpgen.merged/iter.000005/02.fp/data.001\",\"../../4-deep-wannier/data/dpgen.merged/iter.000008/02.fp/data.004\",\"../../4-deep-wannier/data/dpgen.merged/iter.000009/02.fp/data.005\",\"../../4-deep-wannier/data/dpgen.merged/iter.000010/02.fp/data.006\"],\"set_prefix\":\"set\",\"batch_size\":\"auto\",\"auto_prob\":\"prob_sys_size\",\"sys_probs\":null},\"numb_steps\":1,\"seed\":1,\"disp_file\":\"lcurve.out\",\"disp_freq\":1,\"save_freq\":1,\"validation_data\":null,\"save_ckpt\":\"model.ckpt\",\"disp_training\":true,\"time_training\":true,\"profiling\":false,\"profiling_file\":\"timeline.json\",\"enable_profiler\":false,\"tensorboard\":false,\"tensorboard_log_dir\":\"log\",\"tensorboard_freq\":1}}"
string_val: "{\"model\":{\"type_map\":[\"O\",\"H\"],\"descriptor\":{\"type\":\"hybrid\",\"list\":[{\"type\":\"se_e2_a\",\"sel\":[20,40],\"rcut_smth\":0.5,\"rcut\":4.0,\"neuron\":[2,4,8],\"resnet_dt\":false,\"axis_neuron\":8,\"seed\":1,\"activation_function\":\"tanh\",\"type_one_side\":false,\"precision\":\"default\",\"trainable\":true,\"exclude_types\":[],\"set_davg_zero\":false}]},\"fitting_net\":{\"neuron\":[2,2,2],\"resnet_dt\":true,\"seed\":1,\"type\":\"ener\",\"numb_fparam\":0,\"numb_aparam\":0,\"activation_function\":\"tanh\",\"precision\":\"default\",\"trainable\":true,\"rcond\":0.001,\"atom_ener\":[],\"use_aparam_as_mask\":false},\"modifier\":{\"type\":\"dipole_charge\",\"model_name\":\"lrdipole.pb\",\"model_charge_map\":[-8],\"sys_charge_map\":[6,1],\"ewald_h\":1.0,\"ewald_beta\":0.4},\"data_stat_nbatch\":10,\"data_stat_protect\":0.01,\"data_bias_nsample\":10},\"learning_rate\":{\"type\":\"exp\",\"decay_steps\":5000,\"start_lr\":0.001,\"stop_lr\":3.51e-08,\"scale_by_worker\":\"linear\"},\"loss\":{\"type\":\"ener\",\"start_pref_e\":0.02,\"limit_pref_e\":1,\"start_pref_f\":1000,\"limit_pref_f\":1,\"start_pref_v\":0,\"limit_pref_v\":0,\"start_pref_ae\":0.0,\"limit_pref_ae\":0.0,\"start_pref_pf\":0.0,\"limit_pref_pf\":0.0,\"enable_atom_ener_coeff\":false},\"training\":{\"training_data\":{\"systems\":[\"../../4-deep-wannier/data/dpgen.lowd.merged/iter.000016/02.fp/data.012\",\"../../4-deep-wannier/data/dpgen.lowd.merged/iter.000016/02.fp/data.016\",\"../../4-deep-wannier/data/dpgen.lowd.merged/iter.000016/02.fp/data.020\",\"../../4-deep-wannier/data/dpgen.merged/init/00.liq/state.0300.1.00e00.00/vasp/data\",\"../../4-deep-wannier/data/dpgen.merged/init/00.liq/state.0400.1.00e04.00/vasp/data\",\"../../4-deep-wannier/data/dpgen.merged/init/01.liq/state.0300.1.00e00.00/vasp/data\",\"../../4-deep-wannier/data/dpgen.merged/init/01.liq/state.0400.1.00e04.00/vasp/data\",\"../../4-deep-wannier/data/dpgen.merged/iter.000000/02.fp/data.000\",\"../../4-deep-wannier/data/dpgen.merged/iter.000001/02.fp/data.001\",\"../../4-deep-wannier/data/dpgen.merged/iter.000002/02.fp/data.002\",\"../../4-deep-wannier/data/dpgen.merged/iter.000005/02.fp/data.001\",\"../../4-deep-wannier/data/dpgen.merged/iter.000008/02.fp/data.004\",\"../../4-deep-wannier/data/dpgen.merged/iter.000009/02.fp/data.005\",\"../../4-deep-wannier/data/dpgen.merged/iter.000010/02.fp/data.006\"],\"batch_size\":\"auto\",\"auto_prob\":\"prob_sys_size\",\"sys_probs\":null},\"numb_steps\":1,\"seed\":1,\"disp_file\":\"lcurve.out\",\"disp_freq\":1,\"save_freq\":1,\"validation_data\":null,\"save_ckpt\":\"model.ckpt\",\"disp_training\":true,\"time_training\":true,\"profiling\":false,\"profiling_file\":\"timeline.json\",\"enable_profiler\":false,\"tensorboard\":false,\"tensorboard_log_dir\":\"log\",\"tensorboard_freq\":1}}"
}
}
}
Expand Down
2 changes: 0 additions & 2 deletions source/tests/common/test_argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,6 @@ def test_parser_test(self):
ARGS = {
"--model": {"type": str, "value": "MODEL.PB"},
"--system": {"type": str, "value": "SYSTEM_DIR"},
"--set-prefix": {"type": str, "value": "SET_PREFIX"},
"--numb-test": {"type": int, "value": 1},
"--rand-seed": {"type": (int, type(None)), "value": 12321},
"--detail-file": {"type": (str, type(None)), "value": "TARGET.FILE"},
Expand Down Expand Up @@ -355,7 +354,6 @@ def test_parser_model_devi(self):
"expected": ["GRAPH.000.pb", "GRAPH.001.pb"],
},
"--system": {"type": str, "value": "SYSTEM_DIR"},
"--set-prefix": {"type": str, "value": "SET_PREFIX"},
"--output": {"type": str, "value": "OUTFILE"},
"--frequency": {"type": int, "value": 1},
}
Expand Down
Loading

0 comments on commit dda6bcc

Please sign in to comment.