diff --git a/dpdata/formats/xyz/_unit_convert.py b/dpdata/formats/xyz/_unit_convert.py new file mode 100644 index 00000000..6a3a0700 --- /dev/null +++ b/dpdata/formats/xyz/_unit_convert.py @@ -0,0 +1,150 @@ +"""Unit conversion helpers for extended XYZ (extxyz) format. + +This module provides a table-driven approach to convert energy, force, and +stress/pressure values from various unit systems commonly found in extxyz +files into dpdata's internal units: + +- Energy: eV +- Force: eV/angstrom +- Stress/Pressure: eV/angstrom^3 (before volume multiplication to get virial) +""" + +from __future__ import annotations + +from dpdata.unit import EnergyConversion, ForceConversion, PressureConversion + +# --------------------------------------------------------------------------- +# Unit alias mapping tables +# Keys are LOWERCASE; values are canonical names recognized by dpdata.unit +# --------------------------------------------------------------------------- + +_ENERGY_UNIT_MAP: dict[str, str] = { + "ev": "eV", + "hartree": "hartree", + "ha": "hartree", + "ry": "rydberg", + "rydberg": "rydberg", + "kcal/mol": "kcal_mol", + "kcal_mol": "kcal_mol", + "kj/mol": "kJ_mol", + "kj_mol": "kJ_mol", +} + +_LENGTH_UNIT_MAP: dict[str, str] = { + "angstrom": "angstrom", + "ang": "angstrom", + "ang.": "angstrom", + "a": "angstrom", + "bohr": "bohr", + "nm": "nm", +} + +_PRESSURE_UNIT_MAP: dict[str, str] = { + "gpa": "GPa", + "kbar": "kbar", + "bar": "bar", + "ev/angstrom^3": "eV/angstrom^3", + "ev/ang^3": "eV/angstrom^3", + "ev/a^3": "eV/angstrom^3", + "ha/bohr^3": "hartree/bohr^3", + "hartree/bohr^3": "hartree/bohr^3", +} + +# dpdata internal unit strings +_INTERNAL_ENERGY = "eV" +_INTERNAL_FORCE = "eV/angstrom" +_INTERNAL_PRESSURE = "eV/angstrom^3" + + +def _parse_force_unit(raw: str) -> tuple[str, str]: + """Split a composite force unit string into (energy_part, length_part). + + Examples + -------- + >>> _parse_force_unit("kcal/mol/angstrom") + ('kcal/mol', 'angstrom') + >>> _parse_force_unit("hartree/bohr") + ('hartree', 'bohr') + >>> _parse_force_unit("ev/ang") + ('ev', 'ang') + """ + # Try matching known energy prefixes (longest first) to handle + # composite names like "kcal/mol" that themselves contain "/". + for e_key in sorted(_ENERGY_UNIT_MAP.keys(), key=len, reverse=True): + prefix = e_key + "/" + if raw.startswith(prefix): + l_part = raw[len(prefix) :] + if l_part: + return e_key, l_part + # Fallback: split on last "/" + parts = raw.rsplit("/", 1) + if len(parts) == 2 and parts[0] and parts[1]: + return parts[0], parts[1] + raise ValueError(f"Cannot parse force unit string: '{raw}'") + + +def _get_unit_factor(unit_str: str | None, quantity: str) -> float: + """Return the multiplicative factor to convert from the given unit to dpdata internals. + + Parameters + ---------- + unit_str : str or None + The unit string read from the extxyz header (e.g. "hartree", "kcal/mol/angstrom"). + If None, returns 1.0 (assumes data is already in internal units). + quantity : str + One of "energy", "force", or "stress". + + Returns + ------- + float + Conversion factor such that ``value_internal = value_file * factor``. + + Raises + ------ + ValueError + If the unit string is not recognized or the quantity type is invalid. + """ + if unit_str is None: + return 1.0 + + key = unit_str.lower().strip() + + if quantity == "energy": + canonical = _ENERGY_UNIT_MAP.get(key) + if canonical is None: + raise ValueError( + f"Unsupported energy unit: '{unit_str}'. " + f"Supported: {list(_ENERGY_UNIT_MAP.keys())}" + ) + return EnergyConversion(canonical, _INTERNAL_ENERGY).value() + + elif quantity == "force": + e_part, l_part = _parse_force_unit(key) + e_canonical = _ENERGY_UNIT_MAP.get(e_part) + l_canonical = _LENGTH_UNIT_MAP.get(l_part) + if e_canonical is None: + raise ValueError( + f"Unsupported energy part in force unit: '{e_part}' " + f"(from '{unit_str}'). Supported: {list(_ENERGY_UNIT_MAP.keys())}" + ) + if l_canonical is None: + raise ValueError( + f"Unsupported length part in force unit: '{l_part}' " + f"(from '{unit_str}'). Supported: {list(_LENGTH_UNIT_MAP.keys())}" + ) + src_unit = f"{e_canonical}/{l_canonical}" + return ForceConversion(src_unit, _INTERNAL_FORCE).value() + + elif quantity == "stress": + canonical = _PRESSURE_UNIT_MAP.get(key) + if canonical is None: + raise ValueError( + f"Unsupported stress/pressure unit: '{unit_str}'. " + f"Supported: {list(_PRESSURE_UNIT_MAP.keys())}" + ) + return PressureConversion(canonical, _INTERNAL_PRESSURE).value() + + else: + raise ValueError( + f"Unknown quantity type: '{quantity}'. Must be 'energy', 'force', or 'stress'." + ) diff --git a/dpdata/formats/xyz/quip_gap_xyz.py b/dpdata/formats/xyz/quip_gap_xyz.py index fe415872..c9a9b199 100644 --- a/dpdata/formats/xyz/quip_gap_xyz.py +++ b/dpdata/formats/xyz/quip_gap_xyz.py @@ -8,6 +8,7 @@ import numpy as np +from dpdata.formats.xyz._unit_convert import _get_unit_factor from dpdata.periodic_table import Element # Possible keys for the energy field in the extxyz comment line, @@ -24,7 +25,7 @@ _STRESS_KEYS = ("stress", "stresses") -def _parse_stress_to_virials(stress_str, cell, stress_sign=-1): +def _parse_stress_to_virials(stress_str, cell, stress_sign=-1, stress_factor=1.0): """Convert a stress field string to virial tensor. Parameters @@ -38,6 +39,10 @@ def _parse_stress_to_virials(stress_str, cell, stress_sign=-1): Sign convention for ``virial = stress_sign * volume * stress``. Default ``-1`` follows the ASE convention where ``virial = -V * stress`` (stress in eV/angstrom^3). + stress_factor : float + Multiplicative factor to convert stress from file units to + eV/angstrom^3 before computing the virial. Default 1.0 + (assumes stress is already in eV/angstrom^3). Returns ------- @@ -57,7 +62,7 @@ def _parse_stress_to_virials(stress_str, cell, stress_sign=-1): f"stress field must have 6 (Voigt) or 9 (3x3) values, got {len(vals)}" ) volume = abs(np.linalg.det(cell)) - virials = stress_sign * volume * stress + virials = stress_sign * volume * stress * stress_factor return np.array([virials]) @@ -249,13 +254,26 @@ def handle_single_xyz_frame(lines, stress_sign=-1, **kwargs): stress_raw = field_dict[skey] break + # --- unit conversion factors --- + # Read optional unit metadata from extxyz header. + # When absent, factor is 1.0 (assumes dpdata internal units). + e_unit_str = field_dict.get("energy-unit") or field_dict.get("energy_unit") + f_unit_str = field_dict.get("force-unit") or field_dict.get("force_unit") + s_unit_str = field_dict.get("stress-unit") or field_dict.get("stress_unit") + + e_factor = _get_unit_factor(e_unit_str, "energy") + f_factor = _get_unit_factor(f_unit_str, "force") + s_factor = _get_unit_factor(s_unit_str, "stress") + if virial_raw is not None: virials = np.array( [np.array(list(filter(bool, virial_raw.split(" ")))).reshape(3, 3)] ).astype(np.float64) + # Note: virial values are assumed in eV (no unit header for virial itself; + # if stress-unit is given it only applies to stress fields). elif stress_raw is not None: virials = _parse_stress_to_virials( - stress_raw, cells, stress_sign=stress_sign + stress_raw, cells, stress_sign=stress_sign, stress_factor=s_factor ) # --- energy (try several common keys) --- @@ -275,8 +293,8 @@ def handle_single_xyz_frame(lines, stress_sign=-1, **kwargs): info_dict["atom_numbs"] = list(type_num_array[:, 1].astype(int)) info_dict["atom_types"] = np.array(atom_type_list).astype(int) info_dict["coords"] = np.array([coords_array]).astype(np.float64) - info_dict["energies"] = np.array([energy_value]).astype(np.float64) - info_dict["forces"] = np.array([force_array]).astype(np.float64) + info_dict["energies"] = np.array([energy_value], dtype=np.float64) * e_factor + info_dict["forces"] = np.array([force_array], dtype=np.float64) * f_factor if virials is not None: info_dict["virials"] = virials info_dict["orig"] = np.zeros(3) diff --git a/tests/test_quip_gap_xyz.py b/tests/test_quip_gap_xyz.py index 67cb1a71..4fda3c63 100644 --- a/tests/test_quip_gap_xyz.py +++ b/tests/test_quip_gap_xyz.py @@ -384,5 +384,192 @@ def test_direct_read(self): self.assertIn("virials", system.data) +# ============================================================================ +# Unit conversion tests (Phase 5 of PR #678 redesign) +# ============================================================================ + + +class TestUnitConvertModule(unittest.TestCase): + """Direct tests for dpdata.formats.xyz._unit_convert helpers.""" + + def test_get_unit_factor_none_returns_1(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + + self.assertEqual(_get_unit_factor(None, "energy"), 1.0) + self.assertEqual(_get_unit_factor(None, "force"), 1.0) + self.assertEqual(_get_unit_factor(None, "stress"), 1.0) + + def test_get_unit_factor_energy_ev(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + + self.assertAlmostEqual(_get_unit_factor("eV", "energy"), 1.0) + + def test_get_unit_factor_energy_hartree(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import EnergyConversion + + expected = EnergyConversion("hartree", "eV").value() + self.assertAlmostEqual(_get_unit_factor("hartree", "energy"), expected) + self.assertAlmostEqual(_get_unit_factor("Ha", "energy"), expected) + + def test_get_unit_factor_energy_kcal_mol(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import EnergyConversion + + expected = EnergyConversion("kcal_mol", "eV").value() + self.assertAlmostEqual(_get_unit_factor("kcal/mol", "energy"), expected) + + def test_get_unit_factor_force_hartree_bohr(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import ForceConversion + + expected = ForceConversion("hartree/bohr", "eV/angstrom").value() + self.assertAlmostEqual(_get_unit_factor("hartree/bohr", "force"), expected) + + def test_get_unit_factor_force_kcal_mol_ang(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import ForceConversion + + expected = ForceConversion("kcal_mol/angstrom", "eV/angstrom").value() + self.assertAlmostEqual(_get_unit_factor("kcal/mol/angstrom", "force"), expected) + + def test_get_unit_factor_stress_gpa(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import PressureConversion + + expected = PressureConversion("GPa", "eV/angstrom^3").value() + self.assertAlmostEqual(_get_unit_factor("GPa", "stress"), expected) + + def test_get_unit_factor_stress_kbar(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + from dpdata.unit import PressureConversion + + expected = PressureConversion("kbar", "eV/angstrom^3").value() + self.assertAlmostEqual(_get_unit_factor("kbar", "stress"), expected) + + def test_unsupported_unit_raises(self): + from dpdata.formats.xyz._unit_convert import _get_unit_factor + + with self.assertRaises(ValueError): + _get_unit_factor("nonsense", "energy") + with self.assertRaises(ValueError): + _get_unit_factor("bad/unit", "force") + with self.assertRaises(ValueError): + _get_unit_factor("megapascal", "stress") + + def test_parse_force_unit(self): + from dpdata.formats.xyz._unit_convert import _parse_force_unit + + self.assertEqual( + _parse_force_unit("kcal/mol/angstrom"), ("kcal/mol", "angstrom") + ) + self.assertEqual(_parse_force_unit("hartree/bohr"), ("hartree", "bohr")) + self.assertEqual(_parse_force_unit("ev/ang"), ("ev", "ang")) + + def test_parse_force_unit_invalid(self): + from dpdata.formats.xyz._unit_convert import _parse_force_unit + + with self.assertRaises(ValueError): + _parse_force_unit("noslash") + + +class TestEnergyUnitHartree(unittest.TestCase): + """Test reading extxyz with energy-unit=hartree and force-unit=hartree/bohr.""" + + def test_conversion(self): + from dpdata.unit import EnergyConversion, ForceConversion + + system = dpdata.LabeledSystem("xyz/energy_hartree.xyz", fmt="extxyz") + e_factor = EnergyConversion("hartree", "eV").value() + f_factor = ForceConversion("hartree/bohr", "eV/angstrom").value() + + # energy: -0.5 hartree → eV + np.testing.assert_allclose( + system.data["energies"], [-0.5 * e_factor], rtol=1e-10 + ) + # forces: first atom [0.01, 0.02, 0.03] hartree/bohr → eV/angstrom + np.testing.assert_allclose( + system.data["forces"][0, 0], + np.array([0.01, 0.02, 0.03]) * f_factor, + rtol=1e-10, + ) + + +class TestEnergyUnitKcalMol(unittest.TestCase): + """Test reading extxyz with energy-unit=kcal/mol and force-unit=kcal/mol/angstrom.""" + + def test_conversion(self): + from dpdata.unit import EnergyConversion, ForceConversion + + system = dpdata.LabeledSystem("xyz/energy_kcal_mol.xyz", fmt="extxyz") + e_factor = EnergyConversion("kcal_mol", "eV").value() + f_factor = ForceConversion("kcal_mol/angstrom", "eV/angstrom").value() + + np.testing.assert_allclose( + system.data["energies"], [-10.0 * e_factor], rtol=1e-10 + ) + np.testing.assert_allclose( + system.data["forces"][0, 0], + np.array([1.0, 2.0, 3.0]) * f_factor, + rtol=1e-10, + ) + + +class TestStressUnitGPa(unittest.TestCase): + """Test reading extxyz with stress-unit=GPa.""" + + def test_conversion(self): + from dpdata.unit import PressureConversion + + system = dpdata.LabeledSystem("xyz/stress_gpa.xyz", fmt="extxyz") + s_factor = PressureConversion("GPa", "eV/angstrom^3").value() + volume = 5.0**3 # cubic cell 5x5x5 + + # stress is identity * 1.0 GPa → virial = -V * stress_internal + expected_virial_diag = -1.0 * volume * 1.0 * s_factor + np.testing.assert_allclose( + np.diag(system.data["virials"][0]), + [expected_virial_diag] * 3, + rtol=1e-10, + ) + + +class TestStressUnitKbar(unittest.TestCase): + """Test reading extxyz with stress-unit=kbar.""" + + def test_conversion(self): + from dpdata.unit import PressureConversion + + system = dpdata.LabeledSystem("xyz/stress_kbar.xyz", fmt="extxyz") + s_factor = PressureConversion("kbar", "eV/angstrom^3").value() + volume = 5.0**3 + + # stress is identity * 10.0 kbar → virial = -V * stress_internal + expected_virial_diag = -1.0 * volume * 10.0 * s_factor + np.testing.assert_allclose( + np.diag(system.data["virials"][0]), + [expected_virial_diag] * 3, + rtol=1e-10, + ) + + +class TestNoUnitHeaderDefaults(unittest.TestCase): + """Without unit headers, values should be unchanged (factor=1.0).""" + + def test_default_no_conversion(self): + # stress_only.xyz has no unit headers + system = dpdata.LabeledSystem("xyz/stress_only.xyz", fmt="extxyz") + # Energy should be exactly as in file + np.testing.assert_allclose(system.data["energies"], [-1.5]) + + +class TestUnsupportedUnitRaises(unittest.TestCase): + """Unsupported unit string should raise ValueError, not silently pass.""" + + def test_raises_on_bad_energy_unit(self): + with self.assertRaises(ValueError): + dpdata.LabeledSystem("xyz/unsupported_unit.xyz", fmt="extxyz") + + if __name__ == "__main__": unittest.main() diff --git a/tests/xyz/energy_hartree.xyz b/tests/xyz/energy_hartree.xyz new file mode 100644 index 00000000..8fb3956b --- /dev/null +++ b/tests/xyz/energy_hartree.xyz @@ -0,0 +1,4 @@ +2 +Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" Properties=species:S:1:pos:R:3:force:R:3 energy=-0.5 energy-unit=hartree force-unit=hartree/bohr pbc="T T T" +Si 0.0 0.0 0.0 0.01 0.02 0.03 +Si 1.0 1.0 1.0 -0.01 -0.02 -0.03 diff --git a/tests/xyz/energy_kcal_mol.xyz b/tests/xyz/energy_kcal_mol.xyz new file mode 100644 index 00000000..e62ec81f --- /dev/null +++ b/tests/xyz/energy_kcal_mol.xyz @@ -0,0 +1,4 @@ +2 +Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" Properties=species:S:1:pos:R:3:force:R:3 energy=-10.0 energy-unit=kcal/mol force-unit=kcal/mol/angstrom pbc="T T T" +Si 0.0 0.0 0.0 1.0 2.0 3.0 +Si 1.0 1.0 1.0 -1.0 -2.0 -3.0 diff --git a/tests/xyz/stress_gpa.xyz b/tests/xyz/stress_gpa.xyz new file mode 100644 index 00000000..bc45de0b --- /dev/null +++ b/tests/xyz/stress_gpa.xyz @@ -0,0 +1,4 @@ +2 +Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" Properties=species:S:1:pos:R:3:force:R:3 energy=-3.0 stress="1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0" stress-unit=GPa pbc="T T T" +Si 0.0 0.0 0.0 0.1 0.2 0.3 +Si 1.0 1.0 1.0 -0.1 -0.2 -0.3 diff --git a/tests/xyz/stress_kbar.xyz b/tests/xyz/stress_kbar.xyz new file mode 100644 index 00000000..e04350bc --- /dev/null +++ b/tests/xyz/stress_kbar.xyz @@ -0,0 +1,4 @@ +2 +Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" Properties=species:S:1:pos:R:3:force:R:3 energy=-3.0 stress="10.0 0.0 0.0 0.0 10.0 0.0 0.0 0.0 10.0" stress-unit=kbar pbc="T T T" +Si 0.0 0.0 0.0 0.1 0.2 0.3 +Si 1.0 1.0 1.0 -0.1 -0.2 -0.3 diff --git a/tests/xyz/unsupported_unit.xyz b/tests/xyz/unsupported_unit.xyz new file mode 100644 index 00000000..a02702cf --- /dev/null +++ b/tests/xyz/unsupported_unit.xyz @@ -0,0 +1,4 @@ +2 +Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" Properties=species:S:1:pos:R:3:force:R:3 energy=-3.0 energy-unit=nonsense pbc="T T T" +Si 0.0 0.0 0.0 0.1 0.2 0.3 +Si 1.0 1.0 1.0 -0.1 -0.2 -0.3