diff --git a/.pylintdict b/.pylintdict index 639391e78..5db6904e0 100644 --- a/.pylintdict +++ b/.pylintdict @@ -676,3 +676,36 @@ zz φ_ij Δ π +annni +antiphase +antiferromagnetic +canonicalize +diagonalization +eigsh +ferromagnetic +haldane +hamiltonian +hamiltonians +heisenberg +kappa +lattice +paramagnetic +pauli +paulis +spt +topological +trivial +atol +ddt +eigh +geq +idata +namespace +rng +simulable +zxz +simulatable +bermejo +ceil +doctest +eq diff --git a/qiskit_machine_learning/datasets/__init__.py b/qiskit_machine_learning/datasets/__init__.py index ce9733430..3faf78de7 100644 --- a/qiskit_machine_learning/datasets/__init__.py +++ b/qiskit_machine_learning/datasets/__init__.py @@ -29,9 +29,11 @@ ad_hoc_data entanglement_concentration_data + phase_of_matter_data """ from .ad_hoc import ad_hoc_data from .entanglement_concentration import entanglement_concentration_data +from .phase_of_matter import phase_of_matter_data -__all__ = ["ad_hoc_data", "entanglement_concentration_data"] +__all__ = ["ad_hoc_data", "entanglement_concentration_data", "phase_of_matter_data"] diff --git a/qiskit_machine_learning/datasets/phase_of_matter/__init__.py b/qiskit_machine_learning/datasets/phase_of_matter/__init__.py new file mode 100644 index 000000000..6690d2c8b --- /dev/null +++ b/qiskit_machine_learning/datasets/phase_of_matter/__init__.py @@ -0,0 +1,39 @@ +# This code is part of a Qiskit project. +# +# (C) Copyright IBM 2019, 2026. +# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +""" +Phase of Matter dataset (:mod:`phase_of_matter`) + +Quantum Phase of Matter classification dataset generator. + +Each supported model lives in its own module: + +* :mod:`._heisenberg` — Bond-alternating XXX Heisenberg chain +* :mod:`._haldane` — Haldane chain +* :mod:`._annni` — Axial Next-Nearest-Neighbor Ising (ANNNI) model +* :mod:`._cluster` — Cluster Hamiltonian + +The :func:`phase_of_matter_data` function is the single public entry point. + +.. currentmodule:: phase_of_matter + +.. autosummary:: + :toctree: ../stubs/ + :nosignatures: + + phase_of_matter_data +""" + +from .phase_of_matter import phase_of_matter_data + +__all__ = ["phase_of_matter_data"] diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_annni.py b/qiskit_machine_learning/datasets/phase_of_matter/_annni.py new file mode 100644 index 000000000..1c15e9056 --- /dev/null +++ b/qiskit_machine_learning/datasets/phase_of_matter/_annni.py @@ -0,0 +1,105 @@ +# This code is part of a Qiskit project. +# +# (C) Copyright IBM 2019, 2026. +# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Axial Next-Nearest-Neighbor Ising (ANNNI) Hamiltonian and phase sampler. + +Reference: Bermejo et al., arXiv:2408.12739, eq. (8). +""" + +from __future__ import annotations + +import numpy as np +from qiskit.quantum_info import SparsePauliOp + +from ._base import pauli_term + +#: Ordered list of phase labels for the ANNNI model. +PHASE_LABELS: list[str] = ["ferromagnetic", "paramagnetic", "floating", "antiphase"] + + +def build_hamiltonian(n: int, kappa: float, h: float, j1: float = 1.0) -> SparsePauliOp: + r"""ANNNI Hamiltonian (Paper eq. 8). + + .. math:: + + H = -J_1 \sum_{i=1}^{n-1} X_i X_{i+1} + - J_2 \sum_{i=1}^{n-2} X_i X_{i+2} + - B \sum_{i=1}^{n} Z_i + + with :math:`J_2 = -\kappa J_1` and :math:`B = h J_1`. + + Phase diagram (see Fig. 5 in the reference, axes :math:`\kappa` vs + :math:`h` with :math:`J_1 = 1`): + + * **ferromagnetic** (I) -- small :math:`\kappa`, small :math:`h` + * **paramagnetic** (II) -- small :math:`\kappa`, large :math:`h` + * **floating** (III) -- large :math:`\kappa`, moderate :math:`h` + * **antiphase** (IV) -- large :math:`\kappa`, small :math:`h` + + Args: + n: Number of lattice sites (qubits). + kappa: Dimensionless ratio :math:`\kappa = -J_2 / J_1`. + h: Dimensionless ratio :math:`h = B / J_1`. + j1: Overall energy scale (default 1.0). + + Returns: + SparsePauliOp for the Hamiltonian on *n* qubits. + """ + j2 = -kappa * j1 + b = h * j1 + terms: list[SparsePauliOp] = [] + for i in range(n - 1): + terms.append(-j1 * pauli_term([("X", i), ("X", i + 1)], n)) + for i in range(n - 2): + terms.append(-j2 * pauli_term([("X", i), ("X", i + 2)], n)) + for i in range(n): + terms.append(-b * pauli_term([("Z", i)], n)) + return SparsePauliOp.sum(terms).simplify() + + +def sample_parameters(n_samples: int, rng: np.random.Generator) -> list[tuple[dict, str]]: + """Sample coupling parameters uniformly from the interior of each phase. + + Sampling regions (see Fig. 5 in the reference) are placed well inside + each phase to avoid mislabeled points near boundaries. + + Args: + n_samples: Number of samples to draw *per class*. + rng: NumPy random Generator instance. + + Returns: + List of ``(params_dict, phase_label)`` tuples. The list contains + *n_samples* entries for each phase in :data:`PHASE_LABELS`, in order. + """ + samples: list[tuple[dict, str]] = [] + # ferromagnetic (I): kappa in (0, 0.3), h in (0, 0.25) + ks = rng.uniform(0.0, 0.3, size=n_samples) + hs = rng.uniform(0.0, 0.25, size=n_samples) + for k, hv in zip(ks, hs): + samples.append(({"kappa": float(k), "h": float(hv)}, "ferromagnetic")) + # paramagnetic (II): kappa in (0, 0.45), h in (0.9, 1.5) + ks = rng.uniform(0.0, 0.45, size=n_samples) + hs = rng.uniform(0.9, 1.5, size=n_samples) + for k, hv in zip(ks, hs): + samples.append(({"kappa": float(k), "h": float(hv)}, "paramagnetic")) + # floating (III): kappa in (0.55, 0.9), h in (0.25, 0.65) + ks = rng.uniform(0.55, 0.9, size=n_samples) + hs = rng.uniform(0.25, 0.65, size=n_samples) + for k, hv in zip(ks, hs): + samples.append(({"kappa": float(k), "h": float(hv)}, "floating")) + # antiphase (IV): kappa in (0.55, 0.9), h in (0, 0.1) + ks = rng.uniform(0.55, 0.9, size=n_samples) + hs = rng.uniform(0.0, 0.1, size=n_samples) + for k, hv in zip(ks, hs): + samples.append(({"kappa": float(k), "h": float(hv)}, "antiphase")) + return samples diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_base.py b/qiskit_machine_learning/datasets/phase_of_matter/_base.py new file mode 100644 index 000000000..4ca939575 --- /dev/null +++ b/qiskit_machine_learning/datasets/phase_of_matter/_base.py @@ -0,0 +1,120 @@ +# This code is part of a Qiskit project. +# +# (C) Copyright IBM 2019, 2026. +# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Shared utilities for Phase of Matter dataset generators.""" + +from __future__ import annotations + +import numpy as np +import scipy.sparse +import scipy.sparse.linalg +from qiskit.quantum_info import SparsePauliOp, Statevector + + +def pauli_term(op_list: list[tuple[str, int]], n: int) -> SparsePauliOp: + """Build a single n-qubit Pauli term from a list of (pauli_char, site) pairs. + + Sites not listed are identity. Uses Qiskit's little-endian convention: + site 0 is the rightmost character in the Pauli string. + + Args: + op_list: List of (Pauli character, qubit site index) pairs. + n: Total number of qubits. + + Returns: + SparsePauliOp representing the term. + """ + chars = ["I"] * n + for pauli_char, site in op_list: + chars[site] = pauli_char + return SparsePauliOp("".join(reversed(chars))) + + +def _canonicalize_phase(vec: np.ndarray) -> np.ndarray: + """Fix the global phase so that the leading large-magnitude element is real positive. + + Eigenvectors are defined only up to a global complex phase; this + phase-fixing makes repeated calls to ``eigsh`` return numerically + identical arrays for the same Hamiltonian. + """ + threshold = 1e-10 * np.max(np.abs(vec)) + for val in vec: + if abs(val) > threshold: + return vec * (np.conj(val) / abs(val)) + return vec + + +def get_ground_state_exact(hamiltonian: SparsePauliOp) -> np.ndarray: + """Return the ground-state vector via sparse exact diagonalization. + + Uses ``scipy.sparse.linalg.eigsh`` with ``which='SA'`` (smallest algebraic + eigenvalue). Practical limit: n <= 16 qubits (2^16 x 2^16 matrix). + + The returned vector is phase-fixed so that repeated calls for the + same Hamiltonian yield identical arrays. + + Args: + hamiltonian: Hamiltonian as a SparsePauliOp. + + Returns: + Complex numpy array of shape ``(2**n,)`` -- the normalized ground state. + """ + mat = hamiltonian.to_matrix(sparse=True).astype(complex) + _, vecs = scipy.sparse.linalg.eigsh(mat, k=1, which="SA") + return _canonicalize_phase(vecs[:, 0]) + + +def get_ground_state_vqe( + hamiltonian: SparsePauliOp, + backend, # pylint: disable=unused-argument +) -> Statevector: + """Approximate the ground state via VQE using qiskit primitives. + + .. warning:: + + VQE is provided for hardware-experiment workflows only. For reliable + phase labels, use the default exact diagonalization (``backend=None``). + VQE approximations near phase boundaries may produce incorrect labels. + + Uses an ``EfficientSU2`` ansatz (1 repetition) with COBYLA optimization via + ``StatevectorEstimator`` from ``qiskit.primitives``. The ``backend`` + argument is accepted for API consistency and future hardware integration; + the current implementation uses ``StatevectorEstimator`` unconditionally. + + Args: + hamiltonian: Hamiltonian as a SparsePauliOp. + backend (object): Reserved for future hardware integration. Currently unused; + pass any non-``None`` value to activate this pathway. + + Returns: + Qiskit ``Statevector`` of the approximate ground state. + """ + # Deferred imports so qiskit-aer is only required when VQE is used. + from qiskit.circuit.library import EfficientSU2 # pylint: disable=import-outside-toplevel + from qiskit.primitives import StatevectorEstimator # pylint: disable=import-outside-toplevel + from scipy.optimize import minimize # pylint: disable=import-outside-toplevel + + n = hamiltonian.num_qubits + ansatz = EfficientSU2(n, reps=1, entanglement="linear") + num_params = ansatz.num_parameters + estimator = StatevectorEstimator() + + def cost(params: np.ndarray) -> float: + """Evaluate energy expectation value for given parameters.""" + pub = (ansatz, [hamiltonian], [params]) + return float(estimator.run([pub]).result()[0].data.evs[0]) + + rng = np.random.default_rng(0) + x0 = rng.uniform(-np.pi, np.pi, num_params) + result = minimize(cost, x0, method="COBYLA", options={"maxiter": 1000, "rhobeg": 0.5}) + return Statevector(ansatz.assign_parameters(result.x)) diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_cluster.py b/qiskit_machine_learning/datasets/phase_of_matter/_cluster.py new file mode 100644 index 000000000..3b3d5e767 --- /dev/null +++ b/qiskit_machine_learning/datasets/phase_of_matter/_cluster.py @@ -0,0 +1,102 @@ +# This code is part of a Qiskit project. +# +# (C) Copyright IBM 2019, 2026. +# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Cluster Hamiltonian (periodic boundary) and phase sampler. + +Reference: Bermejo et al., arXiv:2408.12739, eq. (9). +""" + +from __future__ import annotations + +import numpy as np +from qiskit.quantum_info import SparsePauliOp + +from ._base import pauli_term + +#: Ordered list of phase labels for the Cluster model. +PHASE_LABELS: list[str] = ["haldane", "ferromagnetic", "antiferromagnetic", "trivial"] + + +def build_hamiltonian(n: int, j1: float, j2: float) -> SparsePauliOp: + r"""Cluster Hamiltonian with periodic boundary conditions (Paper eq. 9). + + .. math:: + + H = \sum_{i=1}^{n} + \left( Z_i - J_1 X_i X_{i+1} - J_2 Z_{i-1} X_i Z_{i+1} \right) + + with periodic identifications :math:`X_{n+1} \equiv X_1` and + :math:`Z_0 \equiv Z_n`. + + Phase diagram (see Fig. 6 in the reference, axes :math:`J_1` vs + :math:`J_2`): + + * **haldane** (I) — large positive :math:`J_1`, large negative :math:`J_2` + * **ferromagnetic** (II) — large positive :math:`J_1` and :math:`J_2` + * **antiferromagnetic** (III) — large negative :math:`J_1` and :math:`J_2` + * **trivial** (IV) — both :math:`|J_1|` and :math:`|J_2|` small + + Args: + n: Number of lattice sites (qubits). + j1: Two-body coupling constant. + j2: Three-body cluster coupling constant. + + Returns: + SparsePauliOp for the Hamiltonian on *n* qubits. + """ + terms: list[SparsePauliOp] = [] + for i in range(n): + terms.append(pauli_term([("Z", i)], n)) + i_next = (i + 1) % n + i_prev = (i - 1) % n + terms.append(-j1 * pauli_term([("X", i), ("X", i_next)], n)) + terms.append(-j2 * pauli_term([("Z", i_prev), ("X", i), ("Z", i_next)], n)) + return SparsePauliOp.sum(terms).simplify() + + +def sample_parameters(n_samples: int, rng: np.random.Generator) -> list[tuple[dict, str]]: + """Sample coupling parameters uniformly from the interior of each phase. + + Sampling regions (see Fig. 6 in the reference) are placed well inside + each phase to avoid mislabeled points near boundaries. + + Args: + n_samples: Number of samples to draw *per class*. + rng: NumPy random Generator instance. + + Returns: + List of ``(params_dict, phase_label)`` tuples. The list contains + *n_samples* entries for each phase in :data:`PHASE_LABELS`, in order. + """ + samples: list[tuple[dict, str]] = [] + # haldane (I): J1 ∈ (0.8, 2.0), J2 ∈ (-2.0, -0.8) + j1s = rng.uniform(0.8, 2.0, size=n_samples) + j2s = rng.uniform(-2.0, -0.8, size=n_samples) + for j1, j2 in zip(j1s, j2s): + samples.append(({"j1": float(j1), "j2": float(j2)}, "haldane")) + # ferromagnetic (II): J1 ∈ (0.8, 2.5), J2 ∈ (0.8, 2.5) + j1s = rng.uniform(0.8, 2.5, size=n_samples) + j2s = rng.uniform(0.8, 2.5, size=n_samples) + for j1, j2 in zip(j1s, j2s): + samples.append(({"j1": float(j1), "j2": float(j2)}, "ferromagnetic")) + # antiferromagnetic (III): J1 ∈ (-2.5, -0.8), J2 ∈ (-2.5, -0.8) + j1s = rng.uniform(-2.5, -0.8, size=n_samples) + j2s = rng.uniform(-2.5, -0.8, size=n_samples) + for j1, j2 in zip(j1s, j2s): + samples.append(({"j1": float(j1), "j2": float(j2)}, "antiferromagnetic")) + # trivial (IV): |J1| < 0.15, |J2| < 0.15 + j1s = rng.uniform(-0.15, 0.15, size=n_samples) + j2s = rng.uniform(-0.15, 0.15, size=n_samples) + for j1, j2 in zip(j1s, j2s): + samples.append(({"j1": float(j1), "j2": float(j2)}, "trivial")) + return samples diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py b/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py new file mode 100644 index 000000000..858152d7b --- /dev/null +++ b/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py @@ -0,0 +1,99 @@ +# This code is part of a Qiskit project. +# +# (C) Copyright IBM 2019, 2026. +# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Haldane chain Hamiltonian and phase sampler. + +Reference: Bermejo et al., arXiv:2408.12739, eq. (7). +""" + +from __future__ import annotations + +import numpy as np +from qiskit.quantum_info import SparsePauliOp + +from ._base import pauli_term + +#: Ordered list of phase labels for the Haldane model. +PHASE_LABELS: list[str] = ["antiferromagnetic", "paramagnetic", "spt"] + + +def build_hamiltonian(n: int, h1: float, h2: float, j: float = 1.0) -> SparsePauliOp: + r"""Haldane chain Hamiltonian (Paper eq. 7). + + .. math:: + + H = -J \sum_{i=1}^{n-2} Z_i X_{i+1} Z_{i+2} + - h_1 \sum_{i=1}^{n} X_i + - h_2 \sum_{i=1}^{n-1} X_i X_{i+1} + + with :math:`J > 0`. + + Phase diagram (see Fig. 4 in the reference, :math:`h_1/J` vs + :math:`h_2/J`): + + * **antiferromagnetic** — small :math:`h_1`, negative :math:`h_2` + * **paramagnetic** — large :math:`h_1` + * **spt** (symmetry-protected topological) — small :math:`h_1`, + positive :math:`h_2 > 0.423` (at :math:`h_1 = 0.5`) + + Args: + n: Number of lattice sites (qubits). + h1: Transverse-field strength (units of *J*). + h2: Nearest-neighbor XX coupling (units of *J*). Positive values + favor the SPT phase; negative values favor antiferromagnetic. + j: Overall energy scale, default 1.0. + + Returns: + SparsePauliOp for the Hamiltonian on *n* qubits. + """ + terms: list[SparsePauliOp] = [] + for i in range(n - 2): + terms.append(-j * pauli_term([("Z", i), ("X", i + 1), ("Z", i + 2)], n)) + for i in range(n): + terms.append(-h1 * pauli_term([("X", i)], n)) + for i in range(n - 1): + terms.append(-h2 * pauli_term([("X", i), ("X", i + 1)], n)) + return SparsePauliOp.sum(terms).simplify() + + +def sample_parameters(n_samples: int, rng: np.random.Generator) -> list[tuple[dict, str]]: + """Sample coupling parameters uniformly from the interior of each phase. + + Sampling regions are chosen well away from phase boundaries (see Fig. 4 + in the reference) to ensure clean labels. + + Args: + n_samples: Number of samples to draw *per class*. + rng: NumPy random Generator instance. + + Returns: + List of ``(params_dict, phase_label)`` tuples. The list contains + *n_samples* entries for each phase in :data:`PHASE_LABELS`, in order. + """ + samples: list[tuple[dict, str]] = [] + # antiferromagnetic: small h1, negative h2 + h1s = rng.uniform(0.0, 0.15, size=n_samples) + h2s = rng.uniform(-0.3, -0.05, size=n_samples) + for h1, h2 in zip(h1s, h2s): + samples.append(({"h1": float(h1), "h2": float(h2)}, "antiferromagnetic")) + # paramagnetic: large h1, mildly positive h2 + h1s = rng.uniform(0.9, 1.5, size=n_samples) + h2s = rng.uniform(0.0, 0.35, size=n_samples) + for h1, h2 in zip(h1s, h2s): + samples.append(({"h1": float(h1), "h2": float(h2)}, "paramagnetic")) + # spt: small h1, h2 well above the ~0.423 boundary + h1s = rng.uniform(0.0, 0.3, size=n_samples) + h2s = rng.uniform(0.55, 1.0, size=n_samples) + for h1, h2 in zip(h1s, h2s): + samples.append(({"h1": float(h1), "h2": float(h2)}, "spt")) + return samples diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py b/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py new file mode 100644 index 000000000..c89b8dabf --- /dev/null +++ b/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py @@ -0,0 +1,86 @@ +# This code is part of a Qiskit project. +# +# (C) Copyright IBM 2019, 2026. +# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Bond-alternating XXX Heisenberg Hamiltonian and phase sampler. + +Reference: Bermejo et al., arXiv:2408.12739, eq. (6). +""" + +from __future__ import annotations + +import numpy as np +from qiskit.quantum_info import SparsePauliOp + +from ._base import pauli_term + +#: Ordered list of phase labels for the Heisenberg model. +PHASE_LABELS: list[str] = ["trivial", "topological"] + + +def build_hamiltonian(n: int, j1: float, j2: float) -> SparsePauliOp: + r"""Bond-alternating XXX Heisenberg Hamiltonian (Paper eq. 6). + + .. math:: + + H = \sum_{i=1}^{n-1} J_i + \left( X_i X_{i+1} + Y_i Y_{i+1} + Z_i Z_{i+1} \right) + + where :math:`J_i = J_1` for even *i* and :math:`J_i = J_2` for odd *i* + (1-indexed), with :math:`J_1, J_2 \geq 0`. + + Phase diagram (thermodynamic limit): + + * **trivial** — :math:`J_2 / J_1 < 1` + * **topological** — :math:`J_2 / J_1 > 1` + + Args: + n: Number of lattice sites (qubits). + j1: Coupling constant on even bonds (:math:`J_1 \geq 0`). + j2: Coupling constant on odd bonds (:math:`J_2 \geq 0`). + + Returns: + SparsePauliOp for the Hamiltonian on *n* qubits. + """ + terms: list[SparsePauliOp] = [] + for i in range(n - 1): + # i is 0-indexed; the paper is 1-indexed, so even bond = i % 2 == 0 + j = j1 if (i % 2 == 0) else j2 + for pauli in ("X", "Y", "Z"): + terms.append(j * pauli_term([(pauli, i), (pauli, i + 1)], n)) + return SparsePauliOp.sum(terms).simplify() + + +def sample_parameters(n_samples: int, rng: np.random.Generator) -> list[tuple[dict, str]]: + """Sample coupling parameters uniformly from the interior of each phase. + + Parameters are drawn well away from the phase boundary (:math:`J_2/J_1 = 1`) + to ensure clean labels. + + Args: + n_samples: Number of samples to draw *per class*. + rng: NumPy random Generator instance. + + Returns: + List of ``(params_dict, phase_label)`` tuples. The list contains + *n_samples* entries for each phase in :data:`PHASE_LABELS`, in order. + """ + samples: list[tuple[dict, str]] = [] + # trivial: J2/J1 ∈ (0.0, 0.8) — fix J1 = 1.0 + ratios = rng.uniform(0.0, 0.8, size=n_samples) + for r in ratios: + samples.append(({"j1": 1.0, "j2": float(r)}, "trivial")) + # topological: J2/J1 ∈ (1.2, 3.0) + ratios = rng.uniform(1.2, 3.0, size=n_samples) + for r in ratios: + samples.append(({"j1": 1.0, "j2": float(r)}, "topological")) + return samples diff --git a/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py b/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py new file mode 100644 index 000000000..2d7052165 --- /dev/null +++ b/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py @@ -0,0 +1,271 @@ +# This code is part of a Qiskit project. +# +# (C) Copyright IBM 2019, 2026. +# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Public API for the Phase of Matter dataset generator.""" + +from __future__ import annotations + +import math + +import numpy as np +from qiskit.quantum_info import SparsePauliOp, Statevector + +from . import _annni, _cluster, _haldane, _heisenberg +from ._base import get_ground_state_exact, get_ground_state_vqe + +# --------------------------------------------------------------------------- +# Registry — maps model name to its module +# --------------------------------------------------------------------------- + +_MODELS = { + "heisenberg": _heisenberg, + "haldane": _haldane, + "annni": _annni, + "cluster": _cluster, +} + +_BUILDERS = { + "heisenberg": lambda n, p: _heisenberg.build_hamiltonian(n, p["j1"], p["j2"]), + "haldane": lambda n, p: _haldane.build_hamiltonian(n, p["h1"], p["h2"]), + "annni": lambda n, p: _annni.build_hamiltonian(n, p["kappa"], p["h"]), + "cluster": lambda n, p: _cluster.build_hamiltonian(n, p["j1"], p["j2"]), +} + + +# --------------------------------------------------------------------------- +# Public function +# --------------------------------------------------------------------------- + + +def phase_of_matter_data( + training_size: int, + test_size: int, + n: int, + *, + model: str = "heisenberg", + one_hot: bool = True, + include_sample_total: bool = False, + class_labels: list | None = None, + formatting: str = "ndarray", + seed: int | None = None, + backend=None, +) -> ( + tuple[np.ndarray | list[Statevector], np.ndarray, np.ndarray | list[Statevector], np.ndarray] + | tuple[ + np.ndarray | list[Statevector], + np.ndarray, + np.ndarray | list[Statevector], + np.ndarray, + np.ndarray, + ] +): + r"""Generate a quantum Phase of Matter classification dataset. + + For each sample, coupling parameters are drawn uniformly from the interior + of a known phase region, the corresponding Hamiltonian is built as a + :class:`~qiskit.quantum_info.SparsePauliOp`, and its ground state is + computed via sparse exact diagonalization. The ground-state vector forms + the feature, and the phase name forms the label. + + Four spin-chain Hamiltonians are supported (see the reference for the + exact definitions and phase diagrams): + + * ``"heisenberg"`` — Bond-alternating XXX Heisenberg model (eq. 6). + Phases: *trivial*, *topological*. + * ``"haldane"`` — Haldane chain (eq. 7). + Phases: *antiferromagnetic*, *paramagnetic*, *spt*. + * ``"annni"`` — Axial Next-Nearest-Neighbor Ising model (eq. 8). + Phases: *ferromagnetic*, *paramagnetic*, *floating*, *antiphase*. + * ``"cluster"`` — Cluster Hamiltonian with periodic boundary (eq. 9). + Phases: *haldane*, *ferromagnetic*, *antiferromagnetic*, *trivial*. + + Args: + training_size: Total number of training samples (balanced across + classes). + test_size: Total number of test samples (balanced across classes). + n: Number of lattice sites (qubits). Must be ≥ 4. The feature + dimension is :math:`2^n`; practical limit for exact + diagonalization is ``n ≤ 16``. + model: Hamiltonian to use. One of ``"heisenberg"``, ``"haldane"``, + ``"annni"``, ``"cluster"``. + one_hot: If ``True`` (default), labels are one-hot encoded numpy + arrays. If ``False``, string phase names are returned. + include_sample_total: If ``True``, a fifth element is appended to the + return tuple with the number of ground states computed per class. + class_labels: Optional list of custom label names that replace the + model's default phase names. Length must equal the number of + phases for the chosen model. + formatting: ``"ndarray"`` (default) returns features as a complex + numpy array of shape ``(num_samples, 2**n)``. + ``"statevector"`` returns a list of + :class:`~qiskit.quantum_info.Statevector` objects. + seed: Integer seed for the parameter-sampling random number generator, + enabling reproducible datasets. + backend (object): When ``None`` (default), exact diagonalization via + ``scipy.sparse.linalg.eigsh`` is used -- the recommended path for + reliable phase labels. When a Qiskit backend is provided, a + VQE-based approximation is used instead. + + .. warning:: + + The VQE pathway is for hardware-experiment workflows only. + VQE approximations near phase boundaries may produce + incorrect labels. Use ``backend=None`` for dataset + generation. + + Returns: + A tuple ``(training_features, training_labels, test_features, + test_labels)`` where: + + * ``training_features`` / ``test_features`` — shape + ``(n_samples, 2**n)`` complex ndarray, or list of + :class:`~qiskit.quantum_info.Statevector` when + ``formatting="statevector"``. + * ``training_labels`` / ``test_labels`` — shape + ``(n_samples, n_classes)`` one-hot ndarray when ``one_hot=True``, + or list of strings when ``one_hot=False``. + + If ``include_sample_total=True``, a fifth element — a numpy array of + shape ``(n_classes,)`` containing the number of ground states + computed per class — is appended. + + Raises: + ValueError: If *model* is not one of the supported strings. + ValueError: If *formatting* is not ``"ndarray"`` or + ``"statevector"``. + ValueError: If ``n < 4``. + ValueError: If *class_labels* is provided but has the wrong length. + + References: + [1] Bermejo et al., "Quantum Convolutional Neural Networks are + (Effectively) Classically Simulatable", arXiv:2408.12739 (2024). + + Examples: + + >>> x_tr, y_tr, x_te, y_te = phase_of_matter_data( # doctest: +SKIP + ... 10, 5, 4, model="heisenberg", seed=0 + ... ) + >>> x_tr.shape # doctest: +SKIP + (10, 16) + >>> y_tr.shape # doctest: +SKIP + (10, 2) + """ + if model not in _MODELS: + raise ValueError(f"Unknown model '{model}'. Choose from: {sorted(_MODELS.keys())}.") + if formatting not in ("ndarray", "statevector"): + raise ValueError(f"Unknown formatting '{formatting}'. Choose 'ndarray' or 'statevector'.") + if n < 4: + raise ValueError(f"n must be at least 4, got {n}.") + + module = _MODELS[model] + default_labels: list[str] = module.PHASE_LABELS + n_classes = len(default_labels) + + if class_labels is not None: + if len(class_labels) != n_classes: + raise ValueError( + f"class_labels has {len(class_labels)} entries but model '{model}' " + f"has {n_classes} phases." + ) + label_names = list(class_labels) + else: + label_names = list(default_labels) + + rng = np.random.default_rng(seed) + + # ceil ensures every class gets at least the requested count even when + # training_size / test_size are not divisible by n_classes. + n_per_class_train = math.ceil(training_size / n_classes) + n_per_class_test = math.ceil(test_size / n_classes) + n_per_class = n_per_class_train + n_per_class_test + + # Samplers return blocks of n_per_class per class, class order preserved. + raw_samples = module.sample_parameters(n_per_class, rng) + + build_fn = _BUILDERS[model] + gs_fn = ( + (lambda h: get_ground_state_vqe(h, backend)) + if backend is not None + else get_ground_state_exact + ) + + # Compute ground states — preserve class-block order for the split below. + all_states: list[np.ndarray] = [] + all_labels: list[str] = [] + for params, phase in raw_samples: + ham: SparsePauliOp = build_fn(n, params) + gs = gs_fn(ham) + if isinstance(gs, Statevector): + gs = gs.data + all_states.append(gs) + idx = default_labels.index(phase) + all_labels.append(label_names[idx]) + + # Split per class into train / test. + train_states: list[np.ndarray] = [] + train_labels_raw: list[str] = [] + test_states: list[np.ndarray] = [] + test_labels_raw: list[str] = [] + sample_totals = np.zeros(n_classes, dtype=int) + + for cls_idx in range(n_classes): + start = cls_idx * n_per_class + cls_states = all_states[start : start + n_per_class] + cls_labels = all_labels[start : start + n_per_class] + train_states.extend(cls_states[:n_per_class_train]) + train_labels_raw.extend(cls_labels[:n_per_class_train]) + test_states.extend(cls_states[n_per_class_train:]) + test_labels_raw.extend(cls_labels[n_per_class_train:]) + sample_totals[cls_idx] = n_per_class + + # Trim to exact requested sizes (ceil may over-allocate by up to n_classes-1). + train_states = train_states[:training_size] + train_labels_raw = train_labels_raw[:training_size] + test_states = test_states[:test_size] + test_labels_raw = test_labels_raw[:test_size] + + # Shuffle train and test independently to interleave classes. + tr_idx = np.arange(len(train_states)) + rng.shuffle(tr_idx) + te_idx = np.arange(len(test_states)) + rng.shuffle(te_idx) + train_states = [train_states[i] for i in tr_idx] + train_labels_raw = [train_labels_raw[i] for i in tr_idx] + test_states = [test_states[i] for i in te_idx] + test_labels_raw = [test_labels_raw[i] for i in te_idx] + + # Format features. + if formatting == "ndarray": + x_train: np.ndarray | list[Statevector] = np.array(train_states) + x_test: np.ndarray | list[Statevector] = np.array(test_states) + else: + x_train = [Statevector(s) for s in train_states] + x_test = [Statevector(s) for s in test_states] + + # Format labels. + label_to_idx = {lbl: i for i, lbl in enumerate(label_names)} + + def _make_labels(raw: list[str]) -> np.ndarray: + if one_hot: + mat = np.zeros((len(raw), n_classes), dtype=float) + for row, lbl in enumerate(raw): + mat[row, label_to_idx[lbl]] = 1.0 + return mat + return np.array(raw) + + y_train = _make_labels(train_labels_raw) + y_test = _make_labels(test_labels_raw) + + if include_sample_total: + return x_train, y_train, x_test, y_test, sample_totals + return x_train, y_train, x_test, y_test diff --git a/test/datasets/test_phase_of_matter.py b/test/datasets/test_phase_of_matter.py new file mode 100644 index 000000000..2de8c9721 --- /dev/null +++ b/test/datasets/test_phase_of_matter.py @@ -0,0 +1,423 @@ +# This code is part of a Qiskit project. +# +# (C) Copyright IBM 2019, 2026. +# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Tests for the Phase of Matter dataset generator. + +Follows qiskit-machine-learning test conventions: + - QiskitMachineLearningTestCase base class + - parameterized tests via the ddt library + - np.testing.assert_* for array assertions +""" + +from __future__ import annotations + +import unittest +from test import QiskitMachineLearningTestCase +from typing import Callable + +import numpy as np +from ddt import ddt, idata, unpack +from qiskit.quantum_info import SparsePauliOp, Statevector + +from qiskit_machine_learning.datasets import phase_of_matter_data +from qiskit_machine_learning.datasets.phase_of_matter._annni import ( + build_hamiltonian as build_annni, +) +from qiskit_machine_learning.datasets.phase_of_matter._base import get_ground_state_exact +from qiskit_machine_learning.datasets.phase_of_matter._cluster import ( + build_hamiltonian as build_cluster, +) +from qiskit_machine_learning.datasets.phase_of_matter._haldane import ( + build_hamiltonian as build_haldane, +) +from qiskit_machine_learning.datasets.phase_of_matter._heisenberg import ( + build_hamiltonian as build_heisenberg, +) + +# --------------------------------------------------------------------------- +# Helper +# --------------------------------------------------------------------------- + + +def _is_hermitian(op: SparsePauliOp, tol: float = 1e-10) -> bool: + """Return True if op is Hermitian within the given tolerance.""" + mat = op.to_matrix() + return np.allclose(mat, mat.conj().T, atol=tol) + + +# --------------------------------------------------------------------------- +# TestHamiltonianBuilders +# --------------------------------------------------------------------------- + + +@ddt +class TestHamiltonianBuilders(QiskitMachineLearningTestCase): + """Verify that each Hamiltonian builder returns a valid Hermitian operator.""" + + @idata([(4,), (6,)]) + @unpack + def test_heisenberg_hermitian(self, n): + """Heisenberg Hamiltonian must be Hermitian.""" + ham = build_heisenberg(n, j1=1.0, j2=0.5) + self.assertTrue(_is_hermitian(ham), f"Heisenberg n={n} is not Hermitian") + + @idata([(4,), (6,)]) + @unpack + def test_haldane_hermitian(self, n): + """Haldane Hamiltonian must be Hermitian.""" + ham = build_haldane(n, h1=0.5, h2=0.3) + self.assertTrue(_is_hermitian(ham), f"Haldane n={n} is not Hermitian") + + @idata([(4,), (6,)]) + @unpack + def test_annni_hermitian(self, n): + """ANNNI Hamiltonian must be Hermitian.""" + ham = build_annni(n, kappa=0.3, h=0.5) + self.assertTrue(_is_hermitian(ham), f"ANNNI n={n} is not Hermitian") + + @idata([(4,), (6,)]) + @unpack + def test_cluster_hermitian(self, n): + """Cluster Hamiltonian must be Hermitian.""" + ham = build_cluster(n, j1=1.0, j2=-1.0) + self.assertTrue(_is_hermitian(ham), f"Cluster n={n} is not Hermitian") + + def test_cluster_periodic_boundary(self): + """Cluster Hamiltonian must have more terms than diagonal Z terms alone.""" + n = 4 + ham = build_cluster(n, j1=1.0, j2=1.0) + # n Z terms + n XX two-body terms + n Z-X-Z three-body terms = 3n unique terms minimum + self.assertGreater(len(ham), n) + + def test_matrix_dimension(self): + """All models should produce a 2^n x 2^n matrix for n=4.""" + n = 4 + dim = 2**n + hamiltonians = [ + build_heisenberg(n, 1.0, 0.5), + build_haldane(n, 0.5, 0.3), + build_annni(n, 0.3, 0.5), + build_cluster(n, 1.0, -1.0), + ] + for ham in hamiltonians: + mat = ham.to_matrix() + self.assertEqual(mat.shape, (dim, dim)) + + +# --------------------------------------------------------------------------- +# TestGroundState +# --------------------------------------------------------------------------- + + +@ddt +class TestGroundState(QiskitMachineLearningTestCase): + """Verify exact-diagonalization ground-state properties.""" + + def _fixed_hamiltonian(self, model: str, n: int) -> SparsePauliOp: + """Return a Hamiltonian at fixed safe parameters for the given model.""" + params: dict[str, tuple] = { + "heisenberg": (n, 1.0, 0.5), + "haldane": (n, 0.5, 0.3), + "annni": (n, 0.3, 0.5), + "cluster": (n, 1.0, -1.0), + } + builders: dict[str, Callable[..., SparsePauliOp]] = { + "heisenberg": build_heisenberg, + "haldane": build_haldane, + "annni": build_annni, + "cluster": build_cluster, + } + return builders[model](*params[model]) + + @idata([("heisenberg",), ("haldane",), ("annni",), ("cluster",)]) + @unpack + def test_normalization(self, model): + """Ground state must be normalized to unit norm.""" + ham = self._fixed_hamiltonian(model, n=4) + gs = get_ground_state_exact(ham) + self.assertAlmostEqual( + np.linalg.norm(gs), 1.0, places=8, msg=f"{model} ground state is not normalized" + ) + + @idata([("heisenberg",), ("haldane",), ("annni",), ("cluster",)]) + @unpack + def test_is_eigenstate(self, model): + """H|psi> must equal E|psi> up to numerical noise.""" + ham = self._fixed_hamiltonian(model, n=4) + gs = get_ground_state_exact(ham) + mat = ham.to_matrix() + h_psi = mat @ gs + energy = np.dot(gs.conj(), h_psi).real + residual = np.linalg.norm(h_psi - energy * gs) + self.assertLess(residual, 1e-8, msg=f"{model} eigenstate residual {residual:.2e}") + + def test_lowest_eigenvalue(self): + """Energy from eigsh must match the minimum eigenvalue from dense diagonalization.""" + ham = build_heisenberg(4, j1=1.0, j2=2.0) + gs = get_ground_state_exact(ham) + mat = ham.to_matrix() + e_eigsh = (gs.conj() @ mat @ gs).real + e_min = np.linalg.eigvalsh(mat).min() + self.assertAlmostEqual(e_eigsh, e_min, places=8) + + +# --------------------------------------------------------------------------- +# TestPhaseLabels +# --------------------------------------------------------------------------- + + +@ddt +class TestPhaseLabels(QiskitMachineLearningTestCase): + """Verify that phase-sampling regions produce correct labels.""" + + @idata( + [ + (0.2, "trivial"), + (2.5, "topological"), + ] + ) + @unpack + def test_heisenberg_phase_region( + self, j2_ratio, expected_label + ): # pylint: disable=unused-argument + """Heisenberg labels sampled far from boundary must include expected phase.""" + # j2_ratio is the parameter value used in the docstring example but the + # sampler draws from fixed interior regions; we verify that both phases + # appear across a dataset generated from the full interior. + _, y, _, _ = phase_of_matter_data(20, 4, 4, model="heisenberg", one_hot=False, seed=0) + self.assertIn(expected_label, set(y), msg=f"Label '{expected_label}' missing from dataset") + + @idata( + [ + ("ferromagnetic",), + ("paramagnetic",), + ("floating",), + ("antiphase",), + ] + ) + @unpack + def test_annni_all_phases_present(self, phase): + """All four ANNNI phases must appear in a sufficiently large dataset.""" + _, y, _, _ = phase_of_matter_data(40, 8, 4, model="annni", one_hot=False, seed=42) + self.assertIn(phase, set(y), msg=f"ANNNI phase '{phase}' missing from dataset") + + @idata( + [ + ("haldane",), + ("ferromagnetic",), + ("antiferromagnetic",), + ("trivial",), + ] + ) + @unpack + def test_cluster_all_phases_present(self, phase): + """All four Cluster phases must appear in a sufficiently large dataset.""" + _, y, _, _ = phase_of_matter_data(40, 8, 4, model="cluster", one_hot=False, seed=42) + self.assertIn(phase, set(y), msg=f"Cluster phase '{phase}' missing from dataset") + + @idata( + [ + ("antiferromagnetic",), + ("paramagnetic",), + ("spt",), + ] + ) + @unpack + def test_haldane_all_phases_present(self, phase): + """All three Haldane phases must appear in a sufficiently large dataset.""" + _, y, _, _ = phase_of_matter_data(30, 6, 4, model="haldane", one_hot=False, seed=42) + self.assertIn(phase, set(y), msg=f"Haldane phase '{phase}' missing from dataset") + + +# --------------------------------------------------------------------------- +# TestPublicAPI +# --------------------------------------------------------------------------- + + +@ddt +class TestPublicAPI(QiskitMachineLearningTestCase): + """Verify the shape and type contracts of phase_of_matter_data.""" + + @idata( + [ + ("heisenberg", 2), + ("haldane", 3), + ("annni", 4), + ("cluster", 4), + ] + ) + @unpack + def test_return_shapes_ndarray(self, model, n_classes): + """Feature and label arrays must have the correct shapes.""" + x_tr, y_tr, x_te, y_te = phase_of_matter_data(8, 4, 4, model=model, one_hot=True, seed=0) + np.testing.assert_array_equal(x_tr.shape, (8, 16)) + np.testing.assert_array_equal(y_tr.shape, (8, n_classes)) + np.testing.assert_array_equal(x_te.shape, (4, 16)) + np.testing.assert_array_equal(y_te.shape, (4, n_classes)) + + @idata([("heisenberg",), ("annni",)]) + @unpack + def test_return_shapes_statevector(self, model): + """Statevector formatting must return normalized Statevector objects.""" + x_tr, _, x_te, _ = phase_of_matter_data( + 4, 2, 4, model=model, formatting="statevector", seed=0 + ) + self.assertEqual(len(x_tr), 4) + self.assertEqual(len(x_te), 2) + self.assertIsInstance(x_tr[0], Statevector) + self.assertAlmostEqual(np.linalg.norm(x_tr[0].data), 1.0, places=6) + + def test_one_hot_true_sums_to_one(self): + """One-hot rows must each sum to exactly 1.""" + _, y_tr, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", one_hot=True, seed=0) + np.testing.assert_array_equal(y_tr.sum(axis=1), np.ones(8)) + + def test_one_hot_false_returns_strings(self): + """String labels must be a subset of the model's phase names.""" + _, y_tr, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", one_hot=False, seed=0) + self.assertTrue(all(isinstance(lbl, str) for lbl in y_tr)) + self.assertTrue(set(y_tr).issubset({"trivial", "topological"})) + + def test_include_sample_total_false(self): + """Default return must be a 4-tuple.""" + result = phase_of_matter_data(4, 2, 4, model="heisenberg", seed=0) + self.assertEqual(len(result), 4) + + def test_include_sample_total_true(self): + """include_sample_total=True must append a per-class count array.""" + result = phase_of_matter_data( + 4, 2, 4, model="heisenberg", include_sample_total=True, seed=0 + ) + self.assertEqual(len(result), 5) + totals = result[4] + self.assertEqual(totals.shape, (2,)) # 2 classes for heisenberg + self.assertTrue(np.all(totals > 0)) + + def test_custom_class_labels(self): + """Custom label names must replace the model defaults in string output.""" + _, y_tr, _, _ = phase_of_matter_data( + 8, 4, 4, model="heisenberg", one_hot=False, class_labels=["phase_A", "phase_B"], seed=0 + ) + self.assertTrue(set(y_tr).issubset({"phase_A", "phase_B"})) + + def test_custom_class_labels_one_hot(self): + """Custom labels must not affect one-hot shape or values.""" + _, y1, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", one_hot=True, seed=0) + _, y2, _, _ = phase_of_matter_data( + 8, 4, 4, model="heisenberg", one_hot=True, class_labels=["A", "B"], seed=0 + ) + np.testing.assert_array_equal(y1, y2) + + def test_feature_normalization(self): + """All returned ground states must be normalized.""" + x_tr, _, x_te, _ = phase_of_matter_data(8, 4, 4, model="annni", seed=1) + for states in (x_tr, x_te): + norms = np.linalg.norm(states, axis=1) + np.testing.assert_allclose( + norms, 1.0, atol=1e-8, err_msg="Ground states are not normalized" + ) + + def test_seed_reproducibility(self): + """Same seed must produce numerically identical outputs. + + Features are complex floating-point arrays; we use allclose with a + tight tolerance (1e-10) to allow for sub-machine-precision noise in + the ARPACK eigensolver while still catching meaningful differences. + """ + kwargs = dict(model="heisenberg", seed=99) + x1, y1, xt1, yt1 = phase_of_matter_data(6, 3, 4, **kwargs) + x2, y2, xt2, yt2 = phase_of_matter_data(6, 3, 4, **kwargs) + np.testing.assert_allclose( + x1, x2, atol=1e-10, err_msg="train features differ across equal seeds" + ) + np.testing.assert_array_equal(y1, y2) + np.testing.assert_allclose( + xt1, xt2, atol=1e-10, err_msg="test features differ across equal seeds" + ) + np.testing.assert_array_equal(yt1, yt2) + + def test_different_seeds_differ(self): + """Different seeds should (almost certainly) produce different data.""" + x1, _, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", seed=1) + x2, _, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", seed=2) + self.assertFalse(np.allclose(x1, x2)) + + def test_train_test_sizes_respected(self): + """Exact training_size / test_size must be honored.""" + for tr, te in [(10, 3), (7, 7), (1, 1)]: + x_tr, _, x_te, _ = phase_of_matter_data(tr, te, 4, model="heisenberg", seed=0) + self.assertEqual(len(x_tr), tr, f"train size mismatch (requested {tr})") + self.assertEqual(len(x_te), te, f"test size mismatch (requested {te})") + + # ----------------------------------------------------------------------- + # Error cases + # ----------------------------------------------------------------------- + + def test_invalid_model_raises(self): + """An unknown model name must raise ValueError.""" + with self.assertRaises(ValueError): + phase_of_matter_data(4, 2, 4, model="invalid") + + def test_invalid_formatting_raises(self): + """An unknown formatting string must raise ValueError.""" + with self.assertRaises(ValueError): + phase_of_matter_data(4, 2, 4, model="heisenberg", formatting="bad") + + def test_n_too_small_raises(self): + """n < 4 must raise ValueError.""" + with self.assertRaises(ValueError): + phase_of_matter_data(4, 2, 3, model="heisenberg") + + def test_wrong_class_labels_length_raises(self): + """class_labels with wrong length must raise ValueError.""" + with self.assertRaises(ValueError): + phase_of_matter_data(4, 2, 4, model="heisenberg", class_labels=["only_one"]) + + +# --------------------------------------------------------------------------- +# Integration -- import paths +# --------------------------------------------------------------------------- + + +class TestImportPaths(QiskitMachineLearningTestCase): + """Verify the package can be imported and is correctly wired up.""" + + def test_importable(self): + """phase_of_matter_data must be accessible from the datasets module.""" + import qiskit_machine_learning.datasets as ds # pylint: disable=import-outside-toplevel + + self.assertIsNotNone(ds.phase_of_matter_data) + + def test_in_all(self): + """phase_of_matter_data must be listed in datasets.__all__.""" + import qiskit_machine_learning.datasets as ds # pylint: disable=import-outside-toplevel + + self.assertIn("phase_of_matter_data", ds.__all__) + + def test_hamiltonian_modules_importable(self): + """All Hamiltonian sub-modules must expose the required attributes.""" + from qiskit_machine_learning.datasets.phase_of_matter import ( # pylint: disable=import-outside-toplevel + _annni, + _cluster, + _haldane, + _heisenberg, + ) + + for mod in (_heisenberg, _haldane, _annni, _cluster): + self.assertTrue(hasattr(mod, "build_hamiltonian")) + self.assertTrue(hasattr(mod, "sample_parameters")) + self.assertTrue(hasattr(mod, "PHASE_LABELS")) + + +if __name__ == "__main__": + unittest.main()