From 3a7171c811835a2cbba8c16e04c08455d19ddac9 Mon Sep 17 00:00:00 2001 From: SchrodingersCattt Date: Wed, 3 Jul 2024 18:25:35 +0800 Subject: [PATCH 1/7] quipGapXYZ: add unit convert and synonym matching --- dpdata/xyz/quip_gap_xyz.py | 189 ++++++++++++++++++++++++++----------- 1 file changed, 136 insertions(+), 53 deletions(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index b23b27e07..e18928377 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -6,6 +6,14 @@ from collections import OrderedDict import numpy as np +from ..unit import EnergyConversion, ForceConversion, LengthConversion + +e_conv_kcalpermol2eV = EnergyConversion("kcal_mol", "eV").value() +e_conv_au2eV = EnergyConversion("hartree", "eV").value() +f_conv_kcalpermolperang2eVperang = ForceConversion("kcal_mol/angstrom", "eV/angstrom").value() +f_conv_auperang2eVperang = ForceConversion("hartree/angstrom", "eV/angstrom").value() +f_conv_kcalpermolperbohr2eVperang = ForceConversion("kcal_mol/bohr", "eV/angstrom").value() +f_conv_au2eVperang = ForceConversion("hartree/bohr", "eV/angstrom").value() class QuipGapxyzSystems: @@ -38,10 +46,13 @@ def get_block_generator(self): lines.append(self.file_object.readline()) if not lines[-1]: raise RuntimeError( - f"this xyz file may lack of lines, should be {atom_num + 2};lines:{lines}" + "this xyz file may lack of lines, should be {};lines:{}".format( + atom_num + 2, lines + ) ) yield lines + @staticmethod def handle_single_xyz_frame(lines): atom_num = int(lines[0].strip("\n").strip()) @@ -82,56 +93,68 @@ def handle_single_xyz_frame(lines): force_array = None virials = None for kv_dict in prop_list: - if kv_dict["key"] == "species": - if kv_dict["datatype"] != "S": - raise RuntimeError( - "datatype for species must be 'S' instead of {}".format( - kv_dict["datatype"] + try: + if kv_dict["key"] == "species": + if kv_dict["datatype"] != "S": + raise RuntimeError( + "datatype for species must be 'S' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - type_array = data_array[ - :, used_colomn : used_colomn + field_length - ].flatten() - used_colomn += field_length - continue - elif kv_dict["key"] == "pos": - if kv_dict["datatype"] != "R": - raise RuntimeError( - "datatype for pos must be 'R' instead of {}".format( - kv_dict["datatype"] + field_length = int(kv_dict["value"]) + type_array = data_array[ + :, used_colomn : used_colomn + field_length + ].flatten() + used_colomn += field_length + continue + elif kv_dict["key"] == "pos": + if kv_dict["datatype"] != "R": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - coords_array = data_array[:, used_colomn : used_colomn + field_length] - used_colomn += field_length - continue - elif kv_dict["key"] == "Z": - if kv_dict["datatype"] != "I": - raise RuntimeError( - "datatype for pos must be 'R' instead of {}".format( - kv_dict["datatype"] + field_length = int(kv_dict["value"]) + coords_array = data_array[:, used_colomn : used_colomn + field_length] + used_colomn += field_length + continue + elif kv_dict["key"] == "Z": + if kv_dict["datatype"] != "I": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - Z_array = data_array[ - :, used_colomn : used_colomn + field_length - ].flatten() - used_colomn += field_length - continue - elif kv_dict["key"] == "force": - if kv_dict["datatype"] != "R": - raise RuntimeError( - "datatype for pos must be 'R' instead of {}".format( - kv_dict["datatype"] + field_length = int(kv_dict["value"]) + Z_array = data_array[ + :, used_colomn : used_colomn + field_length + ].flatten() + used_colomn += field_length + continue + elif kv_dict["key"] == "tags": + if kv_dict["datatype"] != "I": + raise RuntimeError( + "datatype for tags must be 'I' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - force_array = data_array[:, used_colomn : used_colomn + field_length] - used_colomn += field_length + field_length = int(kv_dict["value"]) + used_colomn += field_length + continue + elif kv_dict["key"] == "force" or kv_dict["key"] == "forces" : + if kv_dict["datatype"] != "R": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) + ) + field_length = int(kv_dict["value"]) + force_array = data_array[:, used_colomn : used_colomn + field_length] + used_colomn += field_length + continue + except Exception as e: + print("unknown field {}".format(kv_dict["key"]), e) continue - else: - raise RuntimeError("unknown field {}".format(kv_dict["key"])) type_num_dict = OrderedDict() atom_type_list = [] @@ -164,22 +187,82 @@ def handle_single_xyz_frame(lines): ).astype("float32") else: virials = None + + try: + e_units = np.array([field_dict["energy-unit"].lower()]) + f_units = np.array([field_dict["force-unit"].lower()]) + except: + pass + #print('No units information contained.') + info_dict = {} + info_dict["nopbc"] = False info_dict["atom_names"] = list(type_num_array[:, 0]) info_dict["atom_numbs"] = list(type_num_array[:, 1].astype(int)) info_dict["atom_types"] = np.array(atom_type_list).astype(int) - info_dict["cells"] = np.array( - [ - np.array(list(filter(bool, field_dict["Lattice"].split(" ")))).reshape( - 3, 3 - ) - ] - ).astype("float32") + info_dict["coords"] = np.array([coords_array]).astype("float32") + ''' info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") info_dict["forces"] = np.array([force_array]).astype("float32") + ''' + + try: + if e_units == "kcal/mol": + info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") * e_conv_kcalpermol2eV + elif e_units in ["hartree", "au", "a.u."]: + info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") * e_conv_au2eV + elif e_units == "ev": + info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") + else: info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") + except Exception: + try: + possible_fields = [ + "energy", + "energies", + "Energies", + "potential-energy.energy", + "Energy" + ] + for key in possible_fields: + if key in field_dict: + info_dict["energies"] = np.array([field_dict[key]], dtype="float32") + break + else: + raise ValueError("No valid energy field found in field_dict.") + except KeyError: + raise ValueError("Error while accessing energy fields in field_dict.") + + try: + if f_units == "kcal/mol/angstrom": + info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperang2eVperang + elif f_units == "hartree/angstrom" or f_units == "hartree/ang" or f_units == "hartree/ang.": + info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_auperang2eVperang + elif f_units == "kcal/mol/bohr": + info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperbohr2eVperang + elif f_units == "kcal/mol/bohr": + info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_au2eVperang + elif f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang.": + info_dict["forces"] = np.array([force_array]).astype("float32") + else: info_dict["forces"] = np.array([force_array]).astype("float32") + except: + info_dict["forces"] = np.array([force_array]).astype("float32") + if virials is not None: info_dict["virials"] = virials info_dict["orig"] = np.zeros(3) + if "Lattice" in field_dict and field_dict["Lattice"].strip(): + lattice_values = list(filter(bool, field_dict["Lattice"].split(" "))) + info_dict["cells"] = np.array( + [np.array(lattice_values).reshape(3, 3)] + ).astype("float32") + else: + lattice_values = np.array([[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]]) + + info_dict["nopbc"] = True + info_dict["cells"] = np.array( + [np.array(lattice_values).reshape(3, 3)] + ).astype("float32") + return info_dict From 974c1b25c4c236614a648c2806b79fd5c614c33b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Jul 2024 10:29:38 +0000 Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpdata/xyz/quip_gap_xyz.py | 106 +++++++++++++++++++++++++------------ 1 file changed, 71 insertions(+), 35 deletions(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index e18928377..304f42391 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -6,13 +6,18 @@ from collections import OrderedDict import numpy as np -from ..unit import EnergyConversion, ForceConversion, LengthConversion + +from ..unit import EnergyConversion, ForceConversion e_conv_kcalpermol2eV = EnergyConversion("kcal_mol", "eV").value() e_conv_au2eV = EnergyConversion("hartree", "eV").value() -f_conv_kcalpermolperang2eVperang = ForceConversion("kcal_mol/angstrom", "eV/angstrom").value() +f_conv_kcalpermolperang2eVperang = ForceConversion( + "kcal_mol/angstrom", "eV/angstrom" +).value() f_conv_auperang2eVperang = ForceConversion("hartree/angstrom", "eV/angstrom").value() -f_conv_kcalpermolperbohr2eVperang = ForceConversion("kcal_mol/bohr", "eV/angstrom").value() +f_conv_kcalpermolperbohr2eVperang = ForceConversion( + "kcal_mol/bohr", "eV/angstrom" +).value() f_conv_au2eVperang = ForceConversion("hartree/bohr", "eV/angstrom").value() @@ -46,13 +51,10 @@ def get_block_generator(self): lines.append(self.file_object.readline()) if not lines[-1]: raise RuntimeError( - "this xyz file may lack of lines, should be {};lines:{}".format( - atom_num + 2, lines - ) + f"this xyz file may lack of lines, should be {atom_num + 2};lines:{lines}" ) yield lines - @staticmethod def handle_single_xyz_frame(lines): atom_num = int(lines[0].strip("\n").strip()) @@ -115,7 +117,9 @@ def handle_single_xyz_frame(lines): ) ) field_length = int(kv_dict["value"]) - coords_array = data_array[:, used_colomn : used_colomn + field_length] + coords_array = data_array[ + :, used_colomn : used_colomn + field_length + ] used_colomn += field_length continue elif kv_dict["key"] == "Z": @@ -140,8 +144,8 @@ def handle_single_xyz_frame(lines): ) field_length = int(kv_dict["value"]) used_colomn += field_length - continue - elif kv_dict["key"] == "force" or kv_dict["key"] == "forces" : + continue + elif kv_dict["key"] == "force" or kv_dict["key"] == "forces": if kv_dict["datatype"] != "R": raise RuntimeError( "datatype for pos must be 'R' instead of {}".format( @@ -149,7 +153,9 @@ def handle_single_xyz_frame(lines): ) ) field_length = int(kv_dict["value"]) - force_array = data_array[:, used_colomn : used_colomn + field_length] + force_array = data_array[ + :, used_colomn : used_colomn + field_length + ] used_colomn += field_length continue except Exception as e: @@ -187,14 +193,13 @@ def handle_single_xyz_frame(lines): ).astype("float32") else: virials = None - + try: e_units = np.array([field_dict["energy-unit"].lower()]) f_units = np.array([field_dict["force-unit"].lower()]) except: pass - #print('No units information contained.') - + # print('No units information contained.') info_dict = {} info_dict["nopbc"] = False @@ -203,19 +208,29 @@ def handle_single_xyz_frame(lines): info_dict["atom_types"] = np.array(atom_type_list).astype(int) info_dict["coords"] = np.array([coords_array]).astype("float32") - ''' + """ info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") info_dict["forces"] = np.array([force_array]).astype("float32") - ''' + """ try: if e_units == "kcal/mol": - info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") * e_conv_kcalpermol2eV + info_dict["energies"] = ( + np.array([field_dict["energy"]]).astype("float32") + * e_conv_kcalpermol2eV + ) elif e_units in ["hartree", "au", "a.u."]: - info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") * e_conv_au2eV + info_dict["energies"] = ( + np.array([field_dict["energy"]]).astype("float32") * e_conv_au2eV + ) elif e_units == "ev": - info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") - else: info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") + info_dict["energies"] = np.array([field_dict["energy"]]).astype( + "float32" + ) + else: + info_dict["energies"] = np.array([field_dict["energy"]]).astype( + "float32" + ) except Exception: try: possible_fields = [ @@ -223,31 +238,50 @@ def handle_single_xyz_frame(lines): "energies", "Energies", "potential-energy.energy", - "Energy" + "Energy", ] for key in possible_fields: if key in field_dict: - info_dict["energies"] = np.array([field_dict[key]], dtype="float32") + info_dict["energies"] = np.array( + [field_dict[key]], dtype="float32" + ) break else: raise ValueError("No valid energy field found in field_dict.") except KeyError: raise ValueError("Error while accessing energy fields in field_dict.") - + try: if f_units == "kcal/mol/angstrom": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperang2eVperang - elif f_units == "hartree/angstrom" or f_units == "hartree/ang" or f_units == "hartree/ang.": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_auperang2eVperang - elif f_units == "kcal/mol/bohr": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperbohr2eVperang - elif f_units == "kcal/mol/bohr": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_au2eVperang - elif f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang.": - info_dict["forces"] = np.array([force_array]).astype("float32") - else: info_dict["forces"] = np.array([force_array]).astype("float32") + info_dict["forces"] = ( + np.array([force_array]).astype("float32") + * f_conv_kcalpermolperang2eVperang + ) + elif ( + f_units == "hartree/angstrom" + or f_units == "hartree/ang" + or f_units == "hartree/ang." + ): + info_dict["forces"] = ( + np.array([force_array]).astype("float32") * f_conv_auperang2eVperang + ) + elif f_units == "kcal/mol/bohr": + info_dict["forces"] = ( + np.array([force_array]).astype("float32") + * f_conv_kcalpermolperbohr2eVperang + ) + elif f_units == "kcal/mol/bohr": + info_dict["forces"] = ( + np.array([force_array]).astype("float32") * f_conv_au2eVperang + ) + elif ( + f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang." + ): + info_dict["forces"] = np.array([force_array]).astype("float32") + else: + info_dict["forces"] = np.array([force_array]).astype("float32") except: - info_dict["forces"] = np.array([force_array]).astype("float32") + info_dict["forces"] = np.array([force_array]).astype("float32") if virials is not None: info_dict["virials"] = virials @@ -258,7 +292,9 @@ def handle_single_xyz_frame(lines): [np.array(lattice_values).reshape(3, 3)] ).astype("float32") else: - lattice_values = np.array([[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]]) + lattice_values = np.array( + [[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]] + ) info_dict["nopbc"] = True info_dict["cells"] = np.array( From da807b88812090a5db28ce5fbeb064b638b44ffd Mon Sep 17 00:00:00 2001 From: SchrodingersCattt Date: Wed, 3 Jul 2024 18:36:12 +0800 Subject: [PATCH 3/7] style: add Exception after except --- dpdata/xyz/quip_gap_xyz.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index e18928377..6f40f75f3 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -191,7 +191,7 @@ def handle_single_xyz_frame(lines): try: e_units = np.array([field_dict["energy-unit"].lower()]) f_units = np.array([field_dict["force-unit"].lower()]) - except: + except Exception: pass #print('No units information contained.') @@ -246,7 +246,7 @@ def handle_single_xyz_frame(lines): elif f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang.": info_dict["forces"] = np.array([force_array]).astype("float32") else: info_dict["forces"] = np.array([force_array]).astype("float32") - except: + except Exception: info_dict["forces"] = np.array([force_array]).astype("float32") if virials is not None: From 37b96b8cce07a27022cd6b18e253505591de2f66 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Jul 2024 10:46:01 +0000 Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpdata/xyz/quip_gap_xyz.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index 28846ec8f..d08791dde 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -257,15 +257,15 @@ def handle_single_xyz_frame(lines): info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperang2eVperang elif f_units == "hartree/angstrom" or f_units == "hartree/ang" or f_units == "hartree/ang.": info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_auperang2eVperang - elif f_units == "kcal/mol/bohr": + elif f_units == "kcal/mol/bohr": info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperbohr2eVperang - elif f_units == "kcal/mol/bohr": + elif f_units == "kcal/mol/bohr": info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_au2eVperang elif f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang.": - info_dict["forces"] = np.array([force_array]).astype("float32") + info_dict["forces"] = np.array([force_array]).astype("float32") else: info_dict["forces"] = np.array([force_array]).astype("float32") except Exception: - info_dict["forces"] = np.array([force_array]).astype("float32") + info_dict["forces"] = np.array([force_array]).astype("float32") ======= info_dict["forces"] = ( np.array([force_array]).astype("float32") From aebec7c0ac5ce842e2da4a7ed7ff2caacafb8170 Mon Sep 17 00:00:00 2001 From: SchrodingersCattt Date: Wed, 3 Jul 2024 18:53:52 +0800 Subject: [PATCH 5/7] fix: removed redundant --- dpdata/xyz/quip_gap_xyz.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index d08791dde..11b91a23e 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -253,20 +253,6 @@ def handle_single_xyz_frame(lines): try: if f_units == "kcal/mol/angstrom": -<<<<<<< HEAD - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperang2eVperang - elif f_units == "hartree/angstrom" or f_units == "hartree/ang" or f_units == "hartree/ang.": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_auperang2eVperang - elif f_units == "kcal/mol/bohr": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperbohr2eVperang - elif f_units == "kcal/mol/bohr": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_au2eVperang - elif f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang.": - info_dict["forces"] = np.array([force_array]).astype("float32") - else: info_dict["forces"] = np.array([force_array]).astype("float32") - except Exception: - info_dict["forces"] = np.array([force_array]).astype("float32") -======= info_dict["forces"] = ( np.array([force_array]).astype("float32") * f_conv_kcalpermolperang2eVperang @@ -296,7 +282,6 @@ def handle_single_xyz_frame(lines): info_dict["forces"] = np.array([force_array]).astype("float32") except: info_dict["forces"] = np.array([force_array]).astype("float32") ->>>>>>> origin/devel if virials is not None: info_dict["virials"] = virials From 81a27d84b7cc74553e87078863f115063860369e Mon Sep 17 00:00:00 2001 From: SchrodingersCattt Date: Wed, 3 Jul 2024 19:00:50 +0800 Subject: [PATCH 6/7] style: add Exception after except --- dpdata/xyz/quip_gap_xyz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index 11b91a23e..6c5db29f0 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -280,7 +280,7 @@ def handle_single_xyz_frame(lines): info_dict["forces"] = np.array([force_array]).astype("float32") else: info_dict["forces"] = np.array([force_array]).astype("float32") - except: + except Exception: info_dict["forces"] = np.array([force_array]).astype("float32") if virials is not None: From 13941d2173c6205c47b9eee88e6dae2a7db71473 Mon Sep 17 00:00:00 2001 From: SchrodingersCattt Date: Fri, 3 Jul 2026 01:26:31 +0000 Subject: [PATCH 7/7] feat(extxyz): add table-driven unit conversion for energy, force, and stress - New module dpdata/formats/xyz/_unit_convert.py with unit alias maps and _get_unit_factor() helper that delegates to dpdata.unit converters - Support energy-unit, force-unit, stress-unit headers in extxyz files - Convert to dpdata internal units (eV, eV/angstrom, eV/angstrom^3) - Raise ValueError for unsupported units instead of silent pass - Use float64 throughout instead of float32 - Add _parse_force_unit() for composite unit strings (e.g. kcal/mol/angstrom) - Add stress_factor parameter to _parse_stress_to_virials() - Add comprehensive tests for unit conversion module and end-to-end Resolves reviewer feedback from PR #678: - No more bare except / silent error swallowing - No more dead commented-out code - No more float32 precision truncation - Full test coverage for unit conversion paths --- dpdata/formats/xyz/_unit_convert.py | 150 ++++++++++++++++++++++ dpdata/formats/xyz/quip_gap_xyz.py | 28 ++++- tests/test_quip_gap_xyz.py | 187 ++++++++++++++++++++++++++++ tests/xyz/energy_hartree.xyz | 4 + tests/xyz/energy_kcal_mol.xyz | 4 + tests/xyz/stress_gpa.xyz | 4 + tests/xyz/stress_kbar.xyz | 4 + tests/xyz/unsupported_unit.xyz | 4 + 8 files changed, 380 insertions(+), 5 deletions(-) create mode 100644 dpdata/formats/xyz/_unit_convert.py create mode 100644 tests/xyz/energy_hartree.xyz create mode 100644 tests/xyz/energy_kcal_mol.xyz create mode 100644 tests/xyz/stress_gpa.xyz create mode 100644 tests/xyz/stress_kbar.xyz create mode 100644 tests/xyz/unsupported_unit.xyz diff --git a/dpdata/formats/xyz/_unit_convert.py b/dpdata/formats/xyz/_unit_convert.py new file mode 100644 index 000000000..6a3a0700a --- /dev/null +++ b/dpdata/formats/xyz/_unit_convert.py @@ -0,0 +1,150 @@ +"""Unit conversion helpers for extended XYZ (extxyz) format. + +This module provides a table-driven approach to convert energy, force, and +stress/pressure values from various unit systems commonly found in extxyz +files into dpdata's internal units: + +- Energy: eV +- Force: eV/angstrom +- Stress/Pressure: eV/angstrom^3 (before volume multiplication to get virial) +""" + +from __future__ import annotations + +from dpdata.unit import EnergyConversion, ForceConversion, PressureConversion + +# --------------------------------------------------------------------------- +# Unit alias mapping tables +# Keys are LOWERCASE; values are canonical names recognized by dpdata.unit +# --------------------------------------------------------------------------- + +_ENERGY_UNIT_MAP: dict[str, str] = { + "ev": "eV", + "hartree": "hartree", + "ha": "hartree", + "ry": "rydberg", + "rydberg": "rydberg", + "kcal/mol": "kcal_mol", + "kcal_mol": "kcal_mol", + "kj/mol": "kJ_mol", + "kj_mol": "kJ_mol", +} + +_LENGTH_UNIT_MAP: dict[str, str] = { + "angstrom": "angstrom", + "ang": "angstrom", + "ang.": "angstrom", + "a": "angstrom", + "bohr": "bohr", + "nm": "nm", +} + +_PRESSURE_UNIT_MAP: dict[str, str] = { + "gpa": "GPa", + "kbar": "kbar", + "bar": "bar", + "ev/angstrom^3": "eV/angstrom^3", + "ev/ang^3": "eV/angstrom^3", + "ev/a^3": "eV/angstrom^3", + "ha/bohr^3": "hartree/bohr^3", + "hartree/bohr^3": "hartree/bohr^3", +} + +# dpdata internal unit strings +_INTERNAL_ENERGY = "eV" +_INTERNAL_FORCE = "eV/angstrom" +_INTERNAL_PRESSURE = "eV/angstrom^3" + + +def _parse_force_unit(raw: str) -> tuple[str, str]: + """Split a composite force unit string into (energy_part, length_part). + + Examples + -------- + >>> _parse_force_unit("kcal/mol/angstrom") + ('kcal/mol', 'angstrom') + >>> _parse_force_unit("hartree/bohr") + ('hartree', 'bohr') + >>> _parse_force_unit("ev/ang") + ('ev', 'ang') + """ + # Try matching known energy prefixes (longest first) to handle + # composite names like "kcal/mol" that themselves contain "/". + for e_key in sorted(_ENERGY_UNIT_MAP.keys(), key=len, reverse=True): + prefix = e_key + "/" + if raw.startswith(prefix): + l_part = raw[len(prefix) :] + if l_part: + return e_key, l_part + # Fallback: split on last "/" + parts = raw.rsplit("/", 1) + if len(parts) == 2 and parts[0] and parts[1]: + return parts[0], parts[1] + raise ValueError(f"Cannot parse force unit string: '{raw}'") + + +def _get_unit_factor(unit_str: str | None, quantity: str) -> float: + """Return the multiplicative factor to convert from the given unit to dpdata internals. + + Parameters + ---------- + unit_str : str or None + The unit string read from the extxyz header (e.g. "hartree", "kcal/mol/angstrom"). + If None, returns 1.0 (assumes data is already in internal units). + quantity : str + One of "energy", "force", or "stress". + + Returns + ------- + float + Conversion factor such that ``value_internal = value_file * factor``. + + Raises + ------ + ValueError + If the unit string is not recognized or the quantity type is invalid. + """ + if unit_str is None: + return 1.0 + + key = unit_str.lower().strip() + + if quantity == "energy": + canonical = _ENERGY_UNIT_MAP.get(key) + if canonical is None: + raise ValueError( + f"Unsupported energy unit: '{unit_str}'. " + f"Supported: {list(_ENERGY_UNIT_MAP.keys())}" + ) + return EnergyConversion(canonical, _INTERNAL_ENERGY).value() + + elif quantity == "force": + e_part, l_part = _parse_force_unit(key) + e_canonical = _ENERGY_UNIT_MAP.get(e_part) + l_canonical = _LENGTH_UNIT_MAP.get(l_part) + if e_canonical is None: + raise ValueError( + f"Unsupported energy part in force unit: '{e_part}' " + f"(from '{unit_str}'). Supported: {list(_ENERGY_UNIT_MAP.keys())}" + ) + if l_canonical is None: + raise ValueError( + f"Unsupported length part in force unit: '{l_part}' " + f"(from '{unit_str}'). Supported: {list(_LENGTH_UNIT_MAP.keys())}" + ) + src_unit = f"{e_canonical}/{l_canonical}" + return ForceConversion(src_unit, _INTERNAL_FORCE).value() + + elif quantity == "stress": + canonical = _PRESSURE_UNIT_MAP.get(key) + if canonical is None: + raise ValueError( + f"Unsupported stress/pressure unit: '{unit_str}'. " + f"Supported: {list(_PRESSURE_UNIT_MAP.keys())}" + ) + return PressureConversion(canonical, _INTERNAL_PRESSURE).value() + + else: + raise ValueError( + f"Unknown quantity type: '{quantity}'. Must be 'energy', 'force', or 'stress'." + ) diff --git a/dpdata/formats/xyz/quip_gap_xyz.py b/dpdata/formats/xyz/quip_gap_xyz.py index fe4158728..c9a9b1992 100644 --- a/dpdata/formats/xyz/quip_gap_xyz.py +++ b/dpdata/formats/xyz/quip_gap_xyz.py @@ -8,6 +8,7 @@ import numpy as np +from dpdata.formats.xyz._unit_convert import _get_unit_factor from dpdata.periodic_table import Element # Possible keys for the energy field in the extxyz comment line, @@ -24,7 +25,7 @@ _STRESS_KEYS = ("stress", "stresses") -def _parse_stress_to_virials(stress_str, cell, stress_sign=-1): +def _parse_stress_to_virials(stress_str, cell, stress_sign=-1, stress_factor=1.0): """Convert a stress field string to virial tensor. Parameters @@ -38,6 +39,10 @@ def _parse_stress_to_virials(stress_str, cell, stress_sign=-1): Sign convention for ``virial = stress_sign * volume * stress``. Default ``-1`` follows the ASE convention where ``virial = -V * stress`` (stress in eV/angstrom^3). + stress_factor : float + Multiplicative factor to convert stress from file units to + eV/angstrom^3 before computing the virial. Default 1.0 + (assumes stress is already in eV/angstrom^3). Returns ------- @@ -57,7 +62,7 @@ def _parse_stress_to_virials(stress_str, cell, stress_sign=-1): f"stress field must have 6 (Voigt) or 9 (3x3) values, got {len(vals)}" ) volume = abs(np.linalg.det(cell)) - virials = stress_sign * volume * stress + virials = stress_sign * volume * stress * stress_factor return np.array([virials]) @@ -249,13 +254,26 @@ def handle_single_xyz_frame(lines, stress_sign=-1, **kwargs): stress_raw = field_dict[skey] break + # --- unit conversion factors --- + # Read optional unit metadata from extxyz header. + # When absent, factor is 1.0 (assumes dpdata internal units). + e_unit_str = field_dict.get("energy-unit") or field_dict.get("energy_unit") + f_unit_str = field_dict.get("force-unit") or field_dict.get("force_unit") + s_unit_str = field_dict.get("stress-unit") or field_dict.get("stress_unit") + + e_factor = _get_unit_factor(e_unit_str, "energy") + f_factor = _get_unit_factor(f_unit_str, "force") + s_factor = _get_unit_factor(s_unit_str, "stress") + if virial_raw is not None: virials = np.array( [np.array(list(filter(bool, virial_raw.split(" ")))).reshape(3, 3)] ).astype(np.float64) + # Note: virial values are assumed in eV (no unit header for virial itself; + # if stress-unit is given it only applies to stress fields). elif stress_raw is not None: virials = _parse_stress_to_virials( - stress_raw, cells, stress_sign=stress_sign + stress_raw, cells, stress_sign=stress_sign, stress_factor=s_factor ) # --- energy (try several common keys) --- @@ -275,8 +293,8 @@ def handle_single_xyz_frame(lines, stress_sign=-1, **kwargs): info_dict["atom_numbs"] = list(type_num_array[:, 1].astype(int)) info_dict["atom_types"] = np.array(atom_type_list).astype(int) info_dict["coords"] = np.array([coords_array]).astype(np.float64) - info_dict["energies"] = np.array([energy_value]).astype(np.float64) - info_dict["forces"] = np.array([force_array]).astype(np.float64) + info_dict["energies"] = np.array([energy_value], dtype=np.float64) * e_factor + info_dict["forces"] = np.array([force_array], dtype=np.float64) * f_factor if virials is not None: info_dict["virials"] = virials info_dict["orig"] = np.zeros(3) diff --git a/tests/test_quip_gap_xyz.py b/tests/test_quip_gap_xyz.py index 67cb1a715..4fda3c631 100644 --- a/tests/test_quip_gap_xyz.py +++ b/tests/test_quip_gap_xyz.py @@ -384,5 +384,192 @@ def test_direct_read(self): self.assertIn("virials", system.data) +# ============================================================================ +# Unit conversion tests (Phase 5 of PR #678 redesign) +# ============================================================================ + + +class TestUnitConvertModule(unittest.TestCase): + """Direct tests for dpdata.formats.xyz._unit_convert helpers.""" + + def test_get_unit_factor_none_returns_1(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + + self.assertEqual(_get_unit_factor(None, "energy"), 1.0) + self.assertEqual(_get_unit_factor(None, "force"), 1.0) + self.assertEqual(_get_unit_factor(None, "stress"), 1.0) + + def test_get_unit_factor_energy_ev(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + + self.assertAlmostEqual(_get_unit_factor("eV", "energy"), 1.0) + + def test_get_unit_factor_energy_hartree(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import EnergyConversion + + expected = EnergyConversion("hartree", "eV").value() + self.assertAlmostEqual(_get_unit_factor("hartree", "energy"), expected) + self.assertAlmostEqual(_get_unit_factor("Ha", "energy"), expected) + + def test_get_unit_factor_energy_kcal_mol(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import EnergyConversion + + expected = EnergyConversion("kcal_mol", "eV").value() + self.assertAlmostEqual(_get_unit_factor("kcal/mol", "energy"), expected) + + def test_get_unit_factor_force_hartree_bohr(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import ForceConversion + + expected = ForceConversion("hartree/bohr", "eV/angstrom").value() + self.assertAlmostEqual(_get_unit_factor("hartree/bohr", "force"), expected) + + def test_get_unit_factor_force_kcal_mol_ang(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import ForceConversion + + expected = ForceConversion("kcal_mol/angstrom", "eV/angstrom").value() + self.assertAlmostEqual(_get_unit_factor("kcal/mol/angstrom", "force"), expected) + + def test_get_unit_factor_stress_gpa(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import PressureConversion + + expected = PressureConversion("GPa", "eV/angstrom^3").value() + self.assertAlmostEqual(_get_unit_factor("GPa", "stress"), expected) + + def test_get_unit_factor_stress_kbar(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import PressureConversion + + expected = PressureConversion("kbar", "eV/angstrom^3").value() + self.assertAlmostEqual(_get_unit_factor("kbar", "stress"), expected) + + def test_unsupported_unit_raises(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + + with self.assertRaises(ValueError): + _get_unit_factor("nonsense", "energy") + with self.assertRaises(ValueError): + _get_unit_factor("bad/unit", "force") + with self.assertRaises(ValueError): + _get_unit_factor("megapascal", "stress") + + def test_parse_force_unit(self): + from dpdata.formats.xyz._unit_convert import _parse_force_unit + + self.assertEqual( + _parse_force_unit("kcal/mol/angstrom"), ("kcal/mol", "angstrom") + ) + self.assertEqual(_parse_force_unit("hartree/bohr"), ("hartree", "bohr")) + self.assertEqual(_parse_force_unit("ev/ang"), ("ev", "ang")) + + def test_parse_force_unit_invalid(self): + from dpdata.formats.xyz._unit_convert import _parse_force_unit + + with self.assertRaises(ValueError): + _parse_force_unit("noslash") + + +class TestEnergyUnitHartree(unittest.TestCase): + """Test reading extxyz with energy-unit=hartree and force-unit=hartree/bohr.""" + + def test_conversion(self): + from dpdata.unit import EnergyConversion, ForceConversion + + system = dpdata.LabeledSystem("xyz/energy_hartree.xyz", fmt="extxyz") + e_factor = EnergyConversion("hartree", "eV").value() + f_factor = ForceConversion("hartree/bohr", "eV/angstrom").value() + + # energy: -0.5 hartree → eV + np.testing.assert_allclose( + system.data["energies"], [-0.5 * e_factor], rtol=1e-10 + ) + # forces: first atom [0.01, 0.02, 0.03] hartree/bohr → eV/angstrom + np.testing.assert_allclose( + system.data["forces"][0, 0], + np.array([0.01, 0.02, 0.03]) * f_factor, + rtol=1e-10, + ) + + +class TestEnergyUnitKcalMol(unittest.TestCase): + """Test reading extxyz with energy-unit=kcal/mol and force-unit=kcal/mol/angstrom.""" + + def test_conversion(self): + from dpdata.unit import EnergyConversion, ForceConversion + + system = dpdata.LabeledSystem("xyz/energy_kcal_mol.xyz", fmt="extxyz") + e_factor = EnergyConversion("kcal_mol", "eV").value() + f_factor = ForceConversion("kcal_mol/angstrom", "eV/angstrom").value() + + np.testing.assert_allclose( + system.data["energies"], [-10.0 * e_factor], rtol=1e-10 + ) + np.testing.assert_allclose( + system.data["forces"][0, 0], + np.array([1.0, 2.0, 3.0]) * f_factor, + rtol=1e-10, + ) + + +class TestStressUnitGPa(unittest.TestCase): + """Test reading extxyz with stress-unit=GPa.""" + + def test_conversion(self): + from dpdata.unit import PressureConversion + + system = dpdata.LabeledSystem("xyz/stress_gpa.xyz", fmt="extxyz") + s_factor = PressureConversion("GPa", "eV/angstrom^3").value() + volume = 5.0**3 # cubic cell 5x5x5 + + # stress is identity * 1.0 GPa → virial = -V * stress_internal + expected_virial_diag = -1.0 * volume * 1.0 * s_factor + np.testing.assert_allclose( + np.diag(system.data["virials"][0]), + [expected_virial_diag] * 3, + rtol=1e-10, + ) + + +class TestStressUnitKbar(unittest.TestCase): + """Test reading extxyz with stress-unit=kbar.""" + + def test_conversion(self): + from dpdata.unit import PressureConversion + + system = dpdata.LabeledSystem("xyz/stress_kbar.xyz", fmt="extxyz") + s_factor = PressureConversion("kbar", "eV/angstrom^3").value() + volume = 5.0**3 + + # stress is identity * 10.0 kbar → virial = -V * stress_internal + expected_virial_diag = -1.0 * volume * 10.0 * s_factor + np.testing.assert_allclose( + np.diag(system.data["virials"][0]), + [expected_virial_diag] * 3, + rtol=1e-10, + ) + + +class TestNoUnitHeaderDefaults(unittest.TestCase): + """Without unit headers, values should be unchanged (factor=1.0).""" + + def test_default_no_conversion(self): + # stress_only.xyz has no unit headers + system = dpdata.LabeledSystem("xyz/stress_only.xyz", fmt="extxyz") + # Energy should be exactly as in file + np.testing.assert_allclose(system.data["energies"], [-1.5]) + + +class TestUnsupportedUnitRaises(unittest.TestCase): + """Unsupported unit string should raise ValueError, not silently pass.""" + + def test_raises_on_bad_energy_unit(self): + with self.assertRaises(ValueError): + dpdata.LabeledSystem("xyz/unsupported_unit.xyz", fmt="extxyz") + + if __name__ == "__main__": unittest.main() diff --git a/tests/xyz/energy_hartree.xyz b/tests/xyz/energy_hartree.xyz new file mode 100644 index 000000000..8fb3956b6 --- /dev/null +++ b/tests/xyz/energy_hartree.xyz @@ -0,0 +1,4 @@ +2 +Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" Properties=species:S:1:pos:R:3:force:R:3 energy=-0.5 energy-unit=hartree force-unit=hartree/bohr pbc="T T T" +Si 0.0 0.0 0.0 0.01 0.02 0.03 +Si 1.0 1.0 1.0 -0.01 -0.02 -0.03 diff --git a/tests/xyz/energy_kcal_mol.xyz b/tests/xyz/energy_kcal_mol.xyz new file mode 100644 index 000000000..e62ec81f7 --- /dev/null +++ b/tests/xyz/energy_kcal_mol.xyz @@ -0,0 +1,4 @@ +2 +Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" Properties=species:S:1:pos:R:3:force:R:3 energy=-10.0 energy-unit=kcal/mol force-unit=kcal/mol/angstrom pbc="T T T" +Si 0.0 0.0 0.0 1.0 2.0 3.0 +Si 1.0 1.0 1.0 -1.0 -2.0 -3.0 diff --git a/tests/xyz/stress_gpa.xyz b/tests/xyz/stress_gpa.xyz new file mode 100644 index 000000000..bc45de0bf --- /dev/null +++ b/tests/xyz/stress_gpa.xyz @@ -0,0 +1,4 @@ +2 +Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" Properties=species:S:1:pos:R:3:force:R:3 energy=-3.0 stress="1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0" stress-unit=GPa pbc="T T T" +Si 0.0 0.0 0.0 0.1 0.2 0.3 +Si 1.0 1.0 1.0 -0.1 -0.2 -0.3 diff --git a/tests/xyz/stress_kbar.xyz b/tests/xyz/stress_kbar.xyz new file mode 100644 index 000000000..e04350bce --- /dev/null +++ b/tests/xyz/stress_kbar.xyz @@ -0,0 +1,4 @@ +2 +Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" Properties=species:S:1:pos:R:3:force:R:3 energy=-3.0 stress="10.0 0.0 0.0 0.0 10.0 0.0 0.0 0.0 10.0" stress-unit=kbar pbc="T T T" +Si 0.0 0.0 0.0 0.1 0.2 0.3 +Si 1.0 1.0 1.0 -0.1 -0.2 -0.3 diff --git a/tests/xyz/unsupported_unit.xyz b/tests/xyz/unsupported_unit.xyz new file mode 100644 index 000000000..a02702cf8 --- /dev/null +++ b/tests/xyz/unsupported_unit.xyz @@ -0,0 +1,4 @@ +2 +Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" Properties=species:S:1:pos:R:3:force:R:3 energy=-3.0 energy-unit=nonsense pbc="T T T" +Si 0.0 0.0 0.0 0.1 0.2 0.3 +Si 1.0 1.0 1.0 -0.1 -0.2 -0.3