Skip to content

Commit

Permalink
fix floating point exception when nloc or nall is zero (#2923)
Browse files Browse the repository at this point in the history
Fix #2668. Fix #2921. Fix #2873. Fix #2521.

---------

Signed-off-by: Jinzhe Zeng <[email protected]>
Co-authored-by: Han Wang <[email protected]>
  • Loading branch information
njzjz and wanghan-iapcm authored Oct 16, 2023
1 parent d610a82 commit 8dcfbf5
Show file tree
Hide file tree
Showing 28 changed files with 147 additions and 69 deletions.
7 changes: 5 additions & 2 deletions deepmd/infer/deep_pot.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,10 @@ def _get_natoms_and_nframes(
natoms = len(atom_types[0])
else:
natoms = len(atom_types)
coords = np.reshape(np.array(coords), [-1, natoms * 3])
if natoms == 0:
assert coords.size == 0
else:
coords = np.reshape(np.array(coords), [-1, natoms * 3])
nframes = coords.shape[0]
return natoms, nframes

Expand Down Expand Up @@ -415,7 +418,7 @@ def _prepare_feed_dict(
atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
else:
atom_types = np.array(atom_types, dtype=int).reshape([-1])
coords = np.reshape(np.array(coords), [-1, natoms * 3])
coords = np.reshape(np.array(coords), [nframes, natoms * 3])
if cells is None:
pbc = False
# make cells to work around the requirement of pbc
Expand Down
8 changes: 5 additions & 3 deletions deepmd/utils/batch_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,12 @@ def execute(
OutOfMemoryError
OOM when batch size is 1
"""
if natoms > 0:
batch_nframes = self.current_batch_size // natoms
else:
batch_nframes = self.current_batch_size
try:
n_batch, result = callable(
max(self.current_batch_size // natoms, 1), start_index
)
n_batch, result = callable(max(batch_nframes, 1), start_index)
except OutOfMemoryError as e:
# TODO: it's very slow to catch OOM error; I don't know what TF is doing here
# but luckily we only need to catch once
Expand Down
12 changes: 8 additions & 4 deletions source/api_c/include/deepmd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,7 @@ class DeepPot {
* nframes x natoms x dim_aparam.
* natoms x dim_aparam. Then all frames are assumed to be provided with the
*same aparam.
* @warning Natoms should not be zero when computing multiple frames.
**/
template <typename VALUETYPE, typename ENERGYVTYPE>
void compute(
Expand All @@ -630,7 +631,7 @@ class DeepPot {
const std::vector<VALUETYPE> &fparam = std::vector<VALUETYPE>(),
const std::vector<VALUETYPE> &aparam = std::vector<VALUETYPE>()) {
unsigned int natoms = atype.size();
unsigned int nframes = coord.size() / natoms / 3;
unsigned int nframes = natoms > 0 ? coord.size() / natoms / 3 : 1;
assert(nframes * natoms * 3 == coord.size());
if (!box.empty()) {
assert(box.size() == nframes * 9);
Expand Down Expand Up @@ -676,6 +677,7 @@ class DeepPot {
* nframes x natoms x dim_aparam.
* natoms x dim_aparam. Then all frames are assumed to be provided with the
*same aparam.
* @warning Natoms should not be zero when computing multiple frames.
**/
template <typename VALUETYPE, typename ENERGYVTYPE>
void compute(
Expand All @@ -690,7 +692,7 @@ class DeepPot {
const std::vector<VALUETYPE> &fparam = std::vector<VALUETYPE>(),
const std::vector<VALUETYPE> &aparam = std::vector<VALUETYPE>()) {
unsigned int natoms = atype.size();
unsigned int nframes = coord.size() / natoms / 3;
unsigned int nframes = natoms > 0 ? coord.size() / natoms / 3 : 1;
assert(nframes * natoms * 3 == coord.size());
if (!box.empty()) {
assert(box.size() == nframes * 9);
Expand Down Expand Up @@ -743,6 +745,7 @@ class DeepPot {
* nframes x natoms x dim_aparam.
* natoms x dim_aparam. Then all frames are assumed to be provided with the
*same aparam.
* @warning Natoms should not be zero when computing multiple frames.
**/
template <typename VALUETYPE, typename ENERGYVTYPE>
void compute(
Expand All @@ -758,7 +761,7 @@ class DeepPot {
const std::vector<VALUETYPE> &fparam = std::vector<VALUETYPE>(),
const std::vector<VALUETYPE> &aparam = std::vector<VALUETYPE>()) {
unsigned int natoms = atype.size();
unsigned int nframes = coord.size() / natoms / 3;
unsigned int nframes = natoms > 0 ? coord.size() / natoms / 3 : 1;
assert(nframes * natoms * 3 == coord.size());
if (!box.empty()) {
assert(box.size() == nframes * 9);
Expand Down Expand Up @@ -810,6 +813,7 @@ class DeepPot {
* nframes x natoms x dim_aparam.
* natoms x dim_aparam. Then all frames are assumed to be provided with the
*same aparam.
* @warning Natoms should not be zero when computing multiple frames.
**/
template <typename VALUETYPE, typename ENERGYVTYPE>
void compute(
Expand All @@ -827,7 +831,7 @@ class DeepPot {
const std::vector<VALUETYPE> &fparam = std::vector<VALUETYPE>(),
const std::vector<VALUETYPE> &aparam = std::vector<VALUETYPE>()) {
unsigned int natoms = atype.size();
unsigned int nframes = coord.size() / natoms / 3;
unsigned int nframes = natoms > 0 ? coord.size() / natoms / 3 : 1;
assert(nframes * natoms * 3 == coord.size());
if (!box.empty()) {
assert(box.size() == nframes * 9);
Expand Down
17 changes: 17 additions & 0 deletions source/api_c/tests/test_deeppot_a_hpp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,23 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_type_sel) {
}
}

TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist_empty_input) {
using VALUETYPE = TypeParam;
std::vector<VALUETYPE> coord;
std::vector<int> atype;
std::vector<VALUETYPE>& box = this->box;
unsigned int natoms = 0;
deepmd::hpp::DeepPot& dp = this->dp;
double ener;
std::vector<VALUETYPE> force, virial;

dp.compute(ener, force, virial, coord, atype, box);
// no errors will be fine
EXPECT_EQ(force.size(), natoms * 3);
EXPECT_EQ(virial.size(), 9);
EXPECT_LT(fabs(ener), EPSILON);
}

TYPED_TEST(TestInferDeepPotAHPP, print_summary) {
deepmd::hpp::DeepPot& dp = this->dp;
dp.print_summary("");
Expand Down
15 changes: 10 additions & 5 deletions source/api_cc/src/DeepPot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
const std::vector<VALUETYPE>& fparam_,
const std::vector<VALUETYPE>& aparam_) {
int nall = datype_.size();
int nframes = dcoord_.size() / nall / 3;
// if nall==0, unclear nframes, but 1 is ok
int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
int nloc = nall;
atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
assert(nloc == atommap.get_type().size());
Expand Down Expand Up @@ -658,7 +659,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
const std::vector<VALUETYPE>& fparam_,
const std::vector<VALUETYPE>& aparam__) {
int nall = datype_.size();
int nframes = dcoord_.size() / nall / 3;
// if nall==0, unclear nframes, but 1 is ok
int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
std::vector<VALUETYPE> fparam;
std::vector<VALUETYPE> aparam_;
validate_fparam_aparam(nframes, (aparam_nall ? nall : (nall - nghost)),
Expand Down Expand Up @@ -753,7 +755,8 @@ void DeepPot::compute_inner(ENERGYVTYPE& dener,
const std::vector<VALUETYPE>& fparam,
const std::vector<VALUETYPE>& aparam) {
int nall = datype_.size();
int nframes = dcoord_.size() / nall / 3;
// if nall==0, unclear nframes, but 1 is ok
int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
int nloc = nall - nghost;

std::vector<std::pair<std::string, Tensor>> input_tensors;
Expand Down Expand Up @@ -841,7 +844,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
const std::vector<VALUETYPE>& dbox,
const std::vector<VALUETYPE>& fparam_,
const std::vector<VALUETYPE>& aparam_) {
int nframes = dcoord_.size() / 3 / datype_.size();
// if datype.size is 0, not clear nframes; but 1 is just ok
int nframes = datype_.size() > 0 ? (dcoord_.size() / 3 / datype_.size()) : 1;
atommap = deepmd::AtomMap(datype_.begin(), datype_.end());
int nloc = datype_.size();
std::vector<VALUETYPE> fparam;
Expand Down Expand Up @@ -930,7 +934,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
const std::vector<VALUETYPE>& fparam_,
const std::vector<VALUETYPE>& aparam__) {
int nall = datype_.size();
int nframes = dcoord_.size() / 3 / nall;
// if nall==0, unclear nframes, but 1 is ok
int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
int nloc = nall - nghost;
std::vector<VALUETYPE> fparam;
std::vector<VALUETYPE> aparam_;
Expand Down
39 changes: 25 additions & 14 deletions source/api_cc/src/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,8 @@ int deepmd::session_input_tensors(
const deepmd::AtomMap& atommap,
const std::string scope,
const bool aparam_nall) {
int nframes = dcoord_.size() / 3 / datype_.size();
// if datype.size is 0, not clear nframes; but 1 is just ok
int nframes = datype_.size() > 0 ? (dcoord_.size() / 3 / datype_.size()) : 1;
int nall = datype_.size();
int nloc = nall;
assert(nall * 3 * nframes == dcoord_.size());
Expand Down Expand Up @@ -445,10 +446,13 @@ int deepmd::session_input_tensors(
std::vector<VALUETYPE> dcoord(dcoord_);
atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
std::vector<VALUETYPE> aparam_(aparam__);
atommap.forward<VALUETYPE>(
aparam_.begin(), aparam__.begin(),
aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
(aparam_nall ? nall : nloc));
if ((aparam_nall ? nall : nloc) > 0) {
atommap.forward<VALUETYPE>(
aparam_.begin(), aparam__.begin(),
aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
(aparam_nall ? nall : nloc));
}
// if == 0, aparam__.size should also be 0, so no need to forward

for (int ii = 0; ii < nframes; ++ii) {
for (int jj = 0; jj < nall * 3; ++jj) {
Expand Down Expand Up @@ -520,7 +524,8 @@ int deepmd::session_input_tensors(
const int ago,
const std::string scope,
const bool aparam_nall) {
int nframes = dcoord_.size() / 3 / datype_.size();
// if datype.size is 0, not clear nframes; but 1 is just ok
int nframes = datype_.size() > 0 ? (dcoord_.size() / 3 / datype_.size()) : 1;
int nall = datype_.size();
int nloc = nall - nghost;
assert(nall * 3 * nframes == dcoord_.size());
Expand Down Expand Up @@ -581,10 +586,13 @@ int deepmd::session_input_tensors(
std::vector<VALUETYPE> dcoord(dcoord_);
atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
std::vector<VALUETYPE> aparam_(aparam__);
atommap.forward<VALUETYPE>(
aparam_.begin(), aparam__.begin(),
aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
(aparam_nall ? nall : nloc));
if ((aparam_nall ? nall : nloc) > 0) {
atommap.forward<VALUETYPE>(
aparam_.begin(), aparam__.begin(),
aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
(aparam_nall ? nall : nloc));
}
// if == 0, aparam__.size should also be 0, so no need to forward

for (int ii = 0; ii < nframes; ++ii) {
for (int jj = 0; jj < nall * 3; ++jj) {
Expand Down Expand Up @@ -717,10 +725,13 @@ int deepmd::session_input_tensors_mixed_type(
std::vector<VALUETYPE> dcoord(dcoord_);
atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
std::vector<VALUETYPE> aparam_(aparam__);
atommap.forward<VALUETYPE>(
aparam_.begin(), aparam__.begin(),
aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
(aparam_nall ? nall : nloc));
if ((aparam_nall ? nall : nloc) > 0) {
atommap.forward<VALUETYPE>(
aparam_.begin(), aparam__.begin(),
aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
(aparam_nall ? nall : nloc));
}
// if == 0, aparam__.size should also be 0, so no need to forward

for (int ii = 0; ii < nframes; ++ii) {
for (int jj = 0; jj < nall * 3; ++jj) {
Expand Down
4 changes: 2 additions & 2 deletions source/op/map_aparam.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ class MapAparamOp : public OpKernel {
int nframes = aparam_tensor.shape().dim_size(0);
int nloc = natoms(0);
int nall = natoms(1);
int nnei = nlist_tensor.shape().dim_size(1) / nloc;
int numb_aparam = aparam_tensor.shape().dim_size(1) / nall;
int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
int numb_aparam = nall > 0 ? aparam_tensor.shape().dim_size(1) / nall : 0;

// check the sizes
OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
Expand Down
2 changes: 1 addition & 1 deletion source/op/pairwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class PairwiseIdxOp : public OpKernel {
nloc_qmmm.push_back(nloc_qmmm_ii);
nghost_qm.push_back(nghost_qm_ii);
nghost_qmmm.push_back(nghost_qmmm_ii);
nframes_qmmm.push_back(backward_qmmm_map.size() / nall);
nframes_qmmm.push_back(nall > 0 ? backward_qmmm_map.size() / nall : 0);
}
int max_nloc_qm = 1, max_nloc_qmmm = 1, max_nghost_qm = 0,
max_nghost_qmmm = 0;
Expand Down
4 changes: 2 additions & 2 deletions source/op/prod_force.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ class ProdForceOp : public OpKernel {
int nframes = net_deriv_tensor.shape().dim_size(0);
int nloc = natoms(0);
int nall = natoms(1);
int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
int nnei = nlist_tensor.shape().dim_size(1) / nloc;
int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;

// check the sizes
OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
Expand Down
4 changes: 2 additions & 2 deletions source/op/prod_force_grad.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ class ProdForceGradOp : public OpKernel {

int nframes = net_deriv_tensor.shape().dim_size(0);
int nloc = natoms(0);
int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
int nnei = nlist_tensor.shape().dim_size(1) / nloc;
int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;

// check the sizes
OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
Expand Down
8 changes: 4 additions & 4 deletions source/op/prod_force_grad_multi_device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ class ProdForceSeAGradOp : public OpKernel {

int nframes = net_deriv_tensor.shape().dim_size(0);
int nloc = natoms(0);
int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
int nnei = nlist_tensor.shape().dim_size(1) / nloc;
int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;

// check the sizes
OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
Expand Down Expand Up @@ -180,8 +180,8 @@ class ProdForceSeRGradOp : public OpKernel {

int nframes = net_deriv_tensor.shape().dim_size(0);
int nloc = natoms(0);
int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
int nnei = nlist_tensor.shape().dim_size(1) / nloc;
int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;

// check the sizes
OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
Expand Down
8 changes: 4 additions & 4 deletions source/op/prod_force_multi_device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ class ProdForceSeAOp : public OpKernel {
int nloc = natoms[0];
int nall = natoms[1];
int nframes = net_deriv_tensor.shape().dim_size(0);
int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
int nnei = nlist_tensor.shape().dim_size(1) / nloc;
int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
// check the sizes
OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
errors::InvalidArgument("number of samples should match"));
Expand Down Expand Up @@ -187,8 +187,8 @@ class ProdForceSeROp : public OpKernel {
int nloc = natoms[0];
int nall = natoms[1];
int nframes = net_deriv_tensor.shape().dim_size(0);
int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
int nnei = nlist_tensor.shape().dim_size(1) / nloc;
int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
// check the sizes
OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
errors::InvalidArgument("number of samples should match"));
Expand Down
4 changes: 2 additions & 2 deletions source/op/prod_force_se_a_grad.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ class ProdForceSeAGradOp : public OpKernel {

int nframes = net_deriv_tensor.shape().dim_size(0);
int nloc = natoms(0);
int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
int nnei = nlist_tensor.shape().dim_size(1) / nloc;
int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;

// check the sizes
OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
Expand Down
2 changes: 1 addition & 1 deletion source/op/prod_force_se_a_mask.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class ProdForceSeAMaskOp : public OpKernel {
int nloc = total_atom_num;
int nall = total_atom_num;
int ndescrpt = nall * 4;
int nnei = nlist_tensor.shape().dim_size(1) / nloc;
int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;

// check the sizes
OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
Expand Down
2 changes: 1 addition & 1 deletion source/op/prod_force_se_a_mask_grad.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class ProdForceSeAMaskGradOp : public OpKernel {

int nframes = net_deriv_tensor.shape().dim_size(0);
int nloc = total_atom_num;
int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
int nnei = total_atom_num;

// check the sizes
Expand Down
4 changes: 2 additions & 2 deletions source/op/prod_force_se_r_grad.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ class ProdForceSeRGradOp : public OpKernel {

int nframes = net_deriv_tensor.shape().dim_size(0);
int nloc = natoms(0);
int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
int nnei = nlist_tensor.shape().dim_size(1) / nloc;
int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;

// check the sizes
OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
Expand Down
4 changes: 2 additions & 2 deletions source/op/prod_virial.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ class ProdVirialOp : public OpKernel {
int nframes = net_deriv_tensor.shape().dim_size(0);
int nloc = natoms(0);
int nall = natoms(1);
int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
int nnei = nlist_tensor.shape().dim_size(1) / nloc;
int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;

// check the sizes
OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
Expand Down
Loading

0 comments on commit 8dcfbf5

Please sign in to comment.