This issue is part of a Codex global repository code scan.
The DeepMD raw and npy writers skip empty optional frame arrays, but the HDF5 writer reshapes and writes them. The HDF5 loader then tries to reshape a zero-width dataset back to the full required shape and fails.
Affected code:
|
for dt, prop in data_types.items(): |
|
if dt in data: |
|
if prop["dump"]: |
|
ddata = np.reshape(data[dt], prop["shape"]) |
|
if np.issubdtype(ddata.dtype, np.floating): |
|
ddata = ddata.astype(comp_prec) |
|
reshaped_data[dt] = ddata |
|
|
|
# dump frame properties: cell, coord, energy, force and virial |
|
nsets = nframes // set_size |
|
if set_size * nsets < nframes: |
|
nsets += 1 |
|
for ii in range(nsets): |
|
set_stt = ii * set_size |
|
set_end = (ii + 1) * set_size |
|
set_folder = g.create_group("set.%03d" % ii) # noqa: UP031 |
|
for dt, prop in data_types.items(): |
|
if dt in reshaped_data: |
|
set_folder.create_dataset( |
|
"{}.npy".format(prop["fn"]), data=reshaped_data[dt][set_stt:set_end] |
|
for dt, prop in data_types.items(): |
|
all_data = [] |
|
|
|
for ii in sets: |
|
set = g[ii] |
|
fn = "{}.npy".format(prop["fn"]) |
|
if fn in set.keys(): |
|
dd = set[fn][:] |
|
nframes = dd.shape[0] |
|
all_data.append(np.reshape(dd, (nframes, *prop["shape"]))) |
|
elif prop["required"]: |
|
raise RuntimeError(f"{folder}/{ii}/{fn} not found") |
|
|
|
if len(all_data) > 0: |
|
data[dt] = np.concatenate(all_data, axis=0) |
For comparison, raw and npy already skip this case:
|
if nframes > 0 and np.asarray(data[dtype.name]).size == 0: |
|
# an optional frame property (e.g. forces/virials when |
|
# cal_force/cal_stress is disabled) may be empty while the |
|
# system still has frames. Skip it instead of writing a |
|
# meaningless (nframes, 0) array that cannot be reshaped on load. |
|
continue |
|
ddata = np.reshape(data[dtype.name], [nframes, -1]) |
|
if nframes > 0 and np.asarray(data[dtype.name]).size == 0: |
|
# an optional frame property (e.g. forces/virials when |
|
# cal_force/cal_stress is disabled) may be empty while the |
|
# system still has frames. Skip it instead of writing a |
|
# meaningless (nframes, 0) array that cannot be reshaped on load. |
|
continue |
|
ddata = np.reshape(data[dtype.name], [nframes, -1]) |
Minimal reproducer:
import os
import tempfile
import numpy as np
import dpdata
from dpdata.plugins.deepmd import DeePMDHDF5Format
base = dpdata.System("tests/poscars/POSCAR.oh.d", fmt="vasp/poscar")
data = base.data.copy()
data["energies"] = np.array([0.0])
data["forces"] = np.array([])
fd, path = tempfile.mkstemp(suffix=".h5")
os.close(fd)
try:
DeePMDHDF5Format().to_system(data, path)
dpdata.LabeledSystem(path, fmt="deepmd/hdf5")
finally:
os.remove(path)
Current reload error:
ValueError: cannot reshape array of size 0 into shape (1,2,3)
The HDF5 writer should follow raw/npy behavior and skip empty optional frame arrays when nframes > 0.
This issue is part of a Codex global repository code scan.
The DeepMD raw and npy writers skip empty optional frame arrays, but the HDF5 writer reshapes and writes them. The HDF5 loader then tries to reshape a zero-width dataset back to the full required shape and fails.
Affected code:
dpdata/dpdata/formats/deepmd/hdf5.py
Lines 205 to 224 in a7a50bf
dpdata/dpdata/formats/deepmd/hdf5.py
Lines 106 to 120 in a7a50bf
For comparison, raw and npy already skip this case:
dpdata/dpdata/formats/deepmd/raw.py
Lines 139 to 145 in a7a50bf
dpdata/dpdata/formats/deepmd/comp.py
Lines 155 to 161 in a7a50bf
Minimal reproducer:
Current reload error:
The HDF5 writer should follow raw/npy behavior and skip empty optional frame arrays when
nframes > 0.