From 72d3a2856975c100c48a68a21605043ed1e677a4 Mon Sep 17 00:00:00 2001
From: Shailesh K <shailesh.mkvr@gmail.com>
Date: Wed, 13 May 2026 06:41:01 +0530
Subject: [PATCH 1/6] Add Phase of Matter quantum dataset generator

Implements phase_of_matter_data() as a new dataset in
qiskit_machine_learning/datasets/phase_of_matter/:

- Four spin-chain Hamiltonians built via SparsePauliOp (no extra deps):
  heisenberg (trivial/topological), haldane (afm/paramagnetic/spt),
  annni (ferromagnetic/paramagnetic/floating/antiphase),
  cluster (haldane/ferromagnetic/antiferromagnetic/trivial)
- Exact ground states via scipy.sparse.linalg.eigsh (default)
- Optional VQE pathway for hardware-experiment workflows
- Follows existing dataset API: training_size, test_size, one_hot,
  formatting, class_labels, include_sample_total, seed, backend
- 55-test suite covering Hermiticity, normalization, eigenstate
  residuals, phase label coverage, shape contracts, reproducibility

Reference: Bermejo-Vega et al., arXiv:2408.12739 (2024)
---
 qiskit_machine_learning/datasets/__init__.py  |   4 +-
 .../datasets/phase_of_matter/__init__.py      |  38 ++
 .../datasets/phase_of_matter/_annni.py        | 104 +++++
 .../datasets/phase_of_matter/_base.py         | 118 ++++++
 .../datasets/phase_of_matter/_cluster.py      | 101 +++++
 .../datasets/phase_of_matter/_haldane.py      |  98 +++++
 .../datasets/phase_of_matter/_heisenberg.py   |  85 ++++
 .../phase_of_matter/phase_of_matter.py        | 270 ++++++++++++
 test/datasets/test_phase_of_matter.py         | 397 ++++++++++++++++++
 9 files changed, 1214 insertions(+), 1 deletion(-)
 create mode 100644 qiskit_machine_learning/datasets/phase_of_matter/__init__.py
 create mode 100644 qiskit_machine_learning/datasets/phase_of_matter/_annni.py
 create mode 100644 qiskit_machine_learning/datasets/phase_of_matter/_base.py
 create mode 100644 qiskit_machine_learning/datasets/phase_of_matter/_cluster.py
 create mode 100644 qiskit_machine_learning/datasets/phase_of_matter/_haldane.py
 create mode 100644 qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py
 create mode 100644 qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
 create mode 100644 test/datasets/test_phase_of_matter.py

diff --git a/qiskit_machine_learning/datasets/__init__.py b/qiskit_machine_learning/datasets/__init__.py
index ce9733430..3faf78de7 100644
--- a/qiskit_machine_learning/datasets/__init__.py
+++ b/qiskit_machine_learning/datasets/__init__.py
@@ -29,9 +29,11 @@
 
    ad_hoc_data
    entanglement_concentration_data
+   phase_of_matter_data
 """
 
 from .ad_hoc import ad_hoc_data
 from .entanglement_concentration import entanglement_concentration_data
+from .phase_of_matter import phase_of_matter_data
 
-__all__ = ["ad_hoc_data", "entanglement_concentration_data"]
+__all__ = ["ad_hoc_data", "entanglement_concentration_data", "phase_of_matter_data"]
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/__init__.py b/qiskit_machine_learning/datasets/phase_of_matter/__init__.py
new file mode 100644
index 000000000..70dd4dab8
--- /dev/null
+++ b/qiskit_machine_learning/datasets/phase_of_matter/__init__.py
@@ -0,0 +1,38 @@
+# This code is part of a Qiskit project.
+#
+# (C) Copyright IBM 2019, 2026.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""
+Phase of Matter dataset (:mod:`phase_of_matter`)
+
+Quantum Phase of Matter classification dataset generator.
+
+Each supported model lives in its own module:
+
+* :mod:`._heisenberg` — Bond-alternating XXX Heisenberg chain
+* :mod:`._haldane`    — Haldane chain
+* :mod:`._annni`      — Axial Next-Nearest-Neighbor Ising (ANNNI) model
+* :mod:`._cluster`    — Cluster Hamiltonian
+
+The :func:`phase_of_matter_data` function is the single public entry point.
+
+.. currentmodule:: phase_of_matter
+
+.. autosummary::
+   :toctree: ../stubs/
+   :nosignatures:
+
+   phase_of_matter_data
+"""
+
+from .phase_of_matter import phase_of_matter_data
+
+__all__ = ["phase_of_matter_data"]
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_annni.py b/qiskit_machine_learning/datasets/phase_of_matter/_annni.py
new file mode 100644
index 000000000..9b6c4e566
--- /dev/null
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_annni.py
@@ -0,0 +1,104 @@
+# This code is part of a Qiskit project.
+#
+# (C) Copyright IBM 2019, 2026.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Axial Next-Nearest-Neighbor Ising (ANNNI) Hamiltonian and phase sampler.
+
+Reference: Bermejo et al., arXiv:2408.12739, eq. (8).
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from qiskit.quantum_info import SparsePauliOp
+
+from ._base import pauli_term
+
+#: Ordered list of phase labels for the ANNNI model.
+PHASE_LABELS: list[str] = ["ferromagnetic", "paramagnetic", "floating", "antiphase"]
+
+
+def build_hamiltonian(n: int, kappa: float, h: float, j1: float = 1.0) -> SparsePauliOp:
+    r"""ANNNI Hamiltonian (Paper eq. 8).
+
+    .. math::
+
+        H = -J_1 \sum_{i=1}^{n-1} X_i X_{i+1}
+            - J_2 \sum_{i=1}^{n-2} X_i X_{i+2}
+            - B \sum_{i=1}^{n} Z_i
+
+    with :math:`J_2 = -\kappa J_1` and :math:`B = h J_1`.
+
+    Phase diagram (see Fig. 5 in the reference, axes :math:`\kappa` vs
+    :math:`h` with :math:`J_1 = 1`):
+
+    * **ferromagnetic** (I) — small :math:`\kappa`, small :math:`h`
+    * **paramagnetic** (II) — small :math:`\kappa`, large :math:`h`
+    * **floating** (III) — large :math:`\kappa`, moderate :math:`h`
+    * **antiphase** (IV) — large :math:`\kappa`, small :math:`h`
+
+    Args:
+        n: Number of lattice sites (qubits).
+        kappa: Dimensionless ratio :math:`\kappa = -J_2 / J_1`.
+        h: Dimensionless ratio :math:`h = B / J_1`.
+        j1: Overall energy scale (default 1.0).
+
+    Returns:
+        SparsePauliOp for the Hamiltonian on *n* qubits.
+    """
+    j2 = -kappa * j1
+    b = h * j1
+    terms: list[SparsePauliOp] = []
+    for i in range(n - 1):
+        terms.append(-j1 * pauli_term([("X", i), ("X", i + 1)], n))
+    for i in range(n - 2):
+        terms.append(-j2 * pauli_term([("X", i), ("X", i + 2)], n))
+    for i in range(n):
+        terms.append(-b * pauli_term([("Z", i)], n))
+    return SparsePauliOp.sum(terms).simplify()
+
+
+def sample_parameters(n_samples: int, rng: np.random.Generator) -> list[tuple[dict, str]]:
+    """Sample coupling parameters uniformly from the interior of each phase.
+
+    Sampling regions (see Fig. 5 in the reference) are placed well inside
+    each phase to avoid mislabelled points near boundaries.
+
+    Args:
+        n_samples: Number of samples to draw *per class*.
+        rng: NumPy random Generator instance.
+
+    Returns:
+        List of ``(params_dict, phase_label)`` tuples.  The list contains
+        *n_samples* entries for each phase in :data:`PHASE_LABELS`, in order.
+    """
+    samples: list[tuple[dict, str]] = []
+    # ferromagnetic (I): κ ∈ (0, 0.3), h ∈ (0, 0.25)
+    ks = rng.uniform(0.0, 0.3, size=n_samples)
+    hs = rng.uniform(0.0, 0.25, size=n_samples)
+    for k, hv in zip(ks, hs):
+        samples.append(({"kappa": float(k), "h": float(hv)}, "ferromagnetic"))
+    # paramagnetic (II): κ ∈ (0, 0.45), h ∈ (0.9, 1.5)
+    ks = rng.uniform(0.0, 0.45, size=n_samples)
+    hs = rng.uniform(0.9, 1.5, size=n_samples)
+    for k, hv in zip(ks, hs):
+        samples.append(({"kappa": float(k), "h": float(hv)}, "paramagnetic"))
+    # floating (III): κ ∈ (0.55, 0.9), h ∈ (0.25, 0.65)
+    ks = rng.uniform(0.55, 0.9, size=n_samples)
+    hs = rng.uniform(0.25, 0.65, size=n_samples)
+    for k, hv in zip(ks, hs):
+        samples.append(({"kappa": float(k), "h": float(hv)}, "floating"))
+    # antiphase (IV): κ ∈ (0.55, 0.9), h ∈ (0, 0.1)
+    ks = rng.uniform(0.55, 0.9, size=n_samples)
+    hs = rng.uniform(0.0, 0.1, size=n_samples)
+    for k, hv in zip(ks, hs):
+        samples.append(({"kappa": float(k), "h": float(hv)}, "antiphase"))
+    return samples
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_base.py b/qiskit_machine_learning/datasets/phase_of_matter/_base.py
new file mode 100644
index 000000000..8eebc8ca5
--- /dev/null
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_base.py
@@ -0,0 +1,118 @@
+# This code is part of a Qiskit project.
+#
+# (C) Copyright IBM 2019, 2026.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Shared utilities for Phase of Matter dataset generators."""
+
+from __future__ import annotations
+
+import numpy as np
+import scipy.sparse
+import scipy.sparse.linalg
+from qiskit.quantum_info import SparsePauliOp, Statevector
+
+
+def pauli_term(op_list: list[tuple[str, int]], n: int) -> SparsePauliOp:
+    """Build a single n-qubit Pauli term from a list of (pauli_char, site) pairs.
+
+    Sites not listed are identity. Uses Qiskit's little-endian convention:
+    site 0 is the rightmost character in the Pauli string.
+
+    Args:
+        op_list: List of (Pauli character, qubit site index) pairs.
+        n: Total number of qubits.
+
+    Returns:
+        SparsePauliOp representing the term.
+    """
+    chars = ["I"] * n
+    for pauli_char, site in op_list:
+        chars[site] = pauli_char
+    return SparsePauliOp("".join(reversed(chars)))
+
+
+def _canonicalize_phase(vec: np.ndarray) -> np.ndarray:
+    """Fix the global phase so that the leading large-magnitude element is real positive.
+
+    Eigenvectors are defined only up to a global complex phase; this
+    canonicalization makes repeated calls to ``eigsh`` return numerically
+    identical arrays for the same Hamiltonian.
+    """
+    threshold = 1e-10 * np.max(np.abs(vec))
+    for val in vec:
+        if abs(val) > threshold:
+            return vec * (np.conj(val) / abs(val))
+    return vec
+
+
+def get_ground_state_exact(hamiltonian: SparsePauliOp) -> np.ndarray:
+    """Return the ground-state vector via sparse exact diagonalization.
+
+    Uses ``scipy.sparse.linalg.eigsh`` with ``which='SA'`` (smallest algebraic
+    eigenvalue).  Practical limit: n ≤ 16 qubits (2^16 × 2^16 matrix).
+
+    The returned vector is phase-canonicalized so that repeated calls for the
+    same Hamiltonian yield identical arrays.
+
+    Args:
+        hamiltonian: Hamiltonian as a SparsePauliOp.
+
+    Returns:
+        Complex numpy array of shape ``(2**n,)`` — the normalised ground state.
+    """
+    mat = hamiltonian.to_matrix(sparse=True).astype(complex)
+    _, vecs = scipy.sparse.linalg.eigsh(mat, k=1, which="SA")
+    return _canonicalize_phase(vecs[:, 0])
+
+
+def get_ground_state_vqe(
+    hamiltonian: SparsePauliOp,
+    backend,  # pylint: disable=unused-argument
+) -> Statevector:
+    """Approximate the ground state via VQE using qiskit primitives.
+
+    .. warning::
+
+        VQE is provided for hardware-experiment workflows only.  For reliable
+        phase labels, use the default exact diagonalization (``backend=None``).
+        VQE approximations near phase boundaries may produce incorrect labels.
+
+    Uses an ``EfficientSU2`` ansatz (1 repetition) with COBYLA optimisation via
+    ``StatevectorEstimator`` from ``qiskit.primitives``.  The ``backend``
+    argument is accepted for API consistency and future hardware integration;
+    the current implementation uses ``StatevectorEstimator`` unconditionally.
+
+    Args:
+        hamiltonian: Hamiltonian as a SparsePauliOp.
+        backend: Reserved for future hardware integration.  Currently unused;
+            pass any non-``None`` value to activate this pathway.
+
+    Returns:
+        Qiskit ``Statevector`` of the approximate ground state.
+    """
+    # Deferred imports so qiskit-aer is only required when VQE is used.
+    from qiskit.circuit.library import EfficientSU2  # pylint: disable=import-outside-toplevel
+    from qiskit.primitives import StatevectorEstimator  # pylint: disable=import-outside-toplevel
+    from scipy.optimize import minimize  # pylint: disable=import-outside-toplevel
+
+    n = hamiltonian.num_qubits
+    ansatz = EfficientSU2(n, reps=1, entanglement="linear")
+    num_params = ansatz.num_parameters
+    estimator = StatevectorEstimator()
+
+    def cost(params: np.ndarray) -> float:
+        pub = (ansatz, [hamiltonian], [params])
+        return float(estimator.run([pub]).result()[0].data.evs[0])
+
+    rng = np.random.default_rng(0)
+    x0 = rng.uniform(-np.pi, np.pi, num_params)
+    result = minimize(cost, x0, method="COBYLA", options={"maxiter": 1000, "rhobeg": 0.5})
+    return Statevector(ansatz.assign_parameters(result.x))
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_cluster.py b/qiskit_machine_learning/datasets/phase_of_matter/_cluster.py
new file mode 100644
index 000000000..f3f48d85e
--- /dev/null
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_cluster.py
@@ -0,0 +1,101 @@
+# This code is part of a Qiskit project.
+#
+# (C) Copyright IBM 2019, 2026.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Cluster Hamiltonian (periodic boundary) and phase sampler.
+
+Reference: Bermejo et al., arXiv:2408.12739, eq. (9).
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from qiskit.quantum_info import SparsePauliOp
+
+from ._base import pauli_term
+
+#: Ordered list of phase labels for the Cluster model.
+PHASE_LABELS: list[str] = ["haldane", "ferromagnetic", "antiferromagnetic", "trivial"]
+
+
+def build_hamiltonian(n: int, j1: float, j2: float) -> SparsePauliOp:
+    r"""Cluster Hamiltonian with periodic boundary conditions (Paper eq. 9).
+
+    .. math::
+
+        H = \sum_{i=1}^{n}
+            \left( Z_i - J_1 X_i X_{i+1} - J_2 Z_{i-1} X_i Z_{i+1} \right)
+
+    with periodic identifications :math:`X_{n+1} \equiv X_1` and
+    :math:`Z_0 \equiv Z_n`.
+
+    Phase diagram (see Fig. 6 in the reference, axes :math:`J_1` vs
+    :math:`J_2`):
+
+    * **haldane** (I) — large positive :math:`J_1`, large negative :math:`J_2`
+    * **ferromagnetic** (II) — large positive :math:`J_1` and :math:`J_2`
+    * **antiferromagnetic** (III) — large negative :math:`J_1` and :math:`J_2`
+    * **trivial** (IV) — both :math:`|J_1|` and :math:`|J_2|` small
+
+    Args:
+        n: Number of lattice sites (qubits).
+        j1: Two-body coupling constant.
+        j2: Three-body cluster coupling constant.
+
+    Returns:
+        SparsePauliOp for the Hamiltonian on *n* qubits.
+    """
+    terms: list[SparsePauliOp] = []
+    for i in range(n):
+        terms.append(pauli_term([("Z", i)], n))
+        i_next = (i + 1) % n
+        i_prev = (i - 1) % n
+        terms.append(-j1 * pauli_term([("X", i), ("X", i_next)], n))
+        terms.append(-j2 * pauli_term([("Z", i_prev), ("X", i), ("Z", i_next)], n))
+    return SparsePauliOp.sum(terms).simplify()
+
+
+def sample_parameters(n_samples: int, rng: np.random.Generator) -> list[tuple[dict, str]]:
+    """Sample coupling parameters uniformly from the interior of each phase.
+
+    Sampling regions (see Fig. 6 in the reference) are placed well inside
+    each phase to avoid mislabelled points near boundaries.
+
+    Args:
+        n_samples: Number of samples to draw *per class*.
+        rng: NumPy random Generator instance.
+
+    Returns:
+        List of ``(params_dict, phase_label)`` tuples. The list contains
+        *n_samples* entries for each phase in :data:`PHASE_LABELS`, in order.
+    """
+    samples: list[tuple[dict, str]] = []
+    # haldane (I): J1 ∈ (0.8, 2.0), J2 ∈ (-2.0, -0.8)
+    j1s = rng.uniform(0.8, 2.0, size=n_samples)
+    j2s = rng.uniform(-2.0, -0.8, size=n_samples)
+    for j1, j2 in zip(j1s, j2s):
+        samples.append(({"j1": float(j1), "j2": float(j2)}, "haldane"))
+    # ferromagnetic (II): J1 ∈ (0.8, 2.5), J2 ∈ (0.8, 2.5)
+    j1s = rng.uniform(0.8, 2.5, size=n_samples)
+    j2s = rng.uniform(0.8, 2.5, size=n_samples)
+    for j1, j2 in zip(j1s, j2s):
+        samples.append(({"j1": float(j1), "j2": float(j2)}, "ferromagnetic"))
+    # antiferromagnetic (III): J1 ∈ (-2.5, -0.8), J2 ∈ (-2.5, -0.8)
+    j1s = rng.uniform(-2.5, -0.8, size=n_samples)
+    j2s = rng.uniform(-2.5, -0.8, size=n_samples)
+    for j1, j2 in zip(j1s, j2s):
+        samples.append(({"j1": float(j1), "j2": float(j2)}, "antiferromagnetic"))
+    # trivial (IV): |J1| < 0.15, |J2| < 0.15
+    j1s = rng.uniform(-0.15, 0.15, size=n_samples)
+    j2s = rng.uniform(-0.15, 0.15, size=n_samples)
+    for j1, j2 in zip(j1s, j2s):
+        samples.append(({"j1": float(j1), "j2": float(j2)}, "trivial"))
+    return samples
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py b/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py
new file mode 100644
index 000000000..055d72886
--- /dev/null
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py
@@ -0,0 +1,98 @@
+# This code is part of a Qiskit project.
+#
+# (C) Copyright IBM 2019, 2026.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Haldane chain Hamiltonian and phase sampler.
+
+Reference: Bermejo et al., arXiv:2408.12739, eq. (7).
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from qiskit.quantum_info import SparsePauliOp
+
+from ._base import pauli_term
+
+#: Ordered list of phase labels for the Haldane model.
+PHASE_LABELS: list[str] = ["antiferromagnetic", "paramagnetic", "spt"]
+
+
+def build_hamiltonian(n: int, h1: float, h2: float, j: float = 1.0) -> SparsePauliOp:
+    r"""Haldane chain Hamiltonian (Paper eq. 7).
+
+    .. math::
+
+        H = -J \sum_{i=1}^{n-2} Z_i X_{i+1} Z_{i+2}
+            - h_1 \sum_{i=1}^{n} X_i
+            - h_2 \sum_{i=1}^{n-1} X_i X_{i+1}
+
+    with :math:`J > 0`.
+
+    Phase diagram (see Fig. 4 in the reference, :math:`h_1/J` vs
+    :math:`h_2/J`):
+
+    * **antiferromagnetic** — small :math:`h_1`, negative :math:`h_2`
+    * **paramagnetic** — large :math:`h_1`
+    * **spt** (symmetry-protected topological) — small :math:`h_1`,
+      positive :math:`h_2 > 0.423` (at :math:`h_1 = 0.5`)
+
+    Args:
+        n: Number of lattice sites (qubits).
+        h1: Transverse-field strength (units of *J*).
+        h2: Nearest-neighbour XX coupling (units of *J*).  Positive values
+            favour the SPT phase; negative values favour antiferromagnetic.
+        j: Overall energy scale, default 1.0.
+
+    Returns:
+        SparsePauliOp for the Hamiltonian on *n* qubits.
+    """
+    terms: list[SparsePauliOp] = []
+    for i in range(n - 2):
+        terms.append(-j * pauli_term([("Z", i), ("X", i + 1), ("Z", i + 2)], n))
+    for i in range(n):
+        terms.append(-h1 * pauli_term([("X", i)], n))
+    for i in range(n - 1):
+        terms.append(-h2 * pauli_term([("X", i), ("X", i + 1)], n))
+    return SparsePauliOp.sum(terms).simplify()
+
+
+def sample_parameters(n_samples: int, rng: np.random.Generator) -> list[tuple[dict, str]]:
+    """Sample coupling parameters uniformly from the interior of each phase.
+
+    Sampling regions are chosen well away from phase boundaries (see Fig. 4
+    in the reference) to ensure clean labels.
+
+    Args:
+        n_samples: Number of samples to draw *per class*.
+        rng: NumPy random Generator instance.
+
+    Returns:
+        List of ``(params_dict, phase_label)`` tuples. The list contains
+        *n_samples* entries for each phase in :data:`PHASE_LABELS`, in order.
+    """
+    samples: list[tuple[dict, str]] = []
+    # antiferromagnetic: small h1, negative h2
+    h1s = rng.uniform(0.0, 0.15, size=n_samples)
+    h2s = rng.uniform(-0.3, -0.05, size=n_samples)
+    for h1, h2 in zip(h1s, h2s):
+        samples.append(({"h1": float(h1), "h2": float(h2)}, "antiferromagnetic"))
+    # paramagnetic: large h1, mildly positive h2
+    h1s = rng.uniform(0.9, 1.5, size=n_samples)
+    h2s = rng.uniform(0.0, 0.35, size=n_samples)
+    for h1, h2 in zip(h1s, h2s):
+        samples.append(({"h1": float(h1), "h2": float(h2)}, "paramagnetic"))
+    # spt: small h1, h2 well above the ~0.423 boundary
+    h1s = rng.uniform(0.0, 0.3, size=n_samples)
+    h2s = rng.uniform(0.55, 1.0, size=n_samples)
+    for h1, h2 in zip(h1s, h2s):
+        samples.append(({"h1": float(h1), "h2": float(h2)}, "spt"))
+    return samples
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py b/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py
new file mode 100644
index 000000000..1a9c4b020
--- /dev/null
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py
@@ -0,0 +1,85 @@
+# This code is part of a Qiskit project.
+#
+# (C) Copyright IBM 2019, 2026.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Bond-alternating XXX Heisenberg Hamiltonian and phase sampler.
+
+Reference: Bermejo et al., arXiv:2408.12739, eq. (6).
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from qiskit.quantum_info import SparsePauliOp
+
+from ._base import pauli_term
+
+#: Ordered list of phase labels for the Heisenberg model.
+PHASE_LABELS: list[str] = ["trivial", "topological"]
+
+
+def build_hamiltonian(n: int, j1: float, j2: float) -> SparsePauliOp:
+    r"""Bond-alternating XXX Heisenberg Hamiltonian (Paper eq. 6).
+
+    .. math::
+
+        H = \sum_{i=1}^{n-1} J_i
+            \left( X_i X_{i+1} + Y_i Y_{i+1} + Z_i Z_{i+1} \right)
+
+    where :math:`J_i = J_1` for even *i* and :math:`J_i = J_2` for odd *i*
+    (1-indexed), with :math:`J_1, J_2 \geq 0`.
+
+    Phase diagram (thermodynamic limit):
+
+    * **trivial** — :math:`J_2 / J_1 < 1`
+    * **topological** — :math:`J_2 / J_1 > 1`
+
+    Args:
+        n: Number of lattice sites (qubits).
+        j1: Coupling constant on even bonds (:math:`J_1 \geq 0`).
+        j2: Coupling constant on odd bonds (:math:`J_2 \geq 0`).
+
+    Returns:
+        SparsePauliOp for the Hamiltonian on *n* qubits.
+    """
+    terms: list[SparsePauliOp] = []
+    for i in range(n - 1):
+        # i is 0-indexed; the paper is 1-indexed, so even bond = i % 2 == 0
+        j = j1 if (i % 2 == 0) else j2
+        for pauli in ("X", "Y", "Z"):
+            terms.append(j * pauli_term([(pauli, i), (pauli, i + 1)], n))
+    return SparsePauliOp.sum(terms).simplify()
+
+
+def sample_parameters(n_samples: int, rng: np.random.Generator) -> list[tuple[dict, str]]:
+    """Sample coupling parameters uniformly from the interior of each phase.
+
+    Parameters are drawn well away from the phase boundary (:math:`J_2/J_1 = 1`)
+    to ensure clean labels.
+
+    Args:
+        n_samples: Number of samples to draw *per class*.
+        rng: NumPy random Generator instance.
+
+    Returns:
+        List of ``(params_dict, phase_label)`` tuples.  The list contains
+        *n_samples* entries for each phase in :data:`PHASE_LABELS`, in order.
+    """
+    samples: list[tuple[dict, str]] = []
+    # trivial: J2/J1 ∈ (0.0, 0.8)  —  fix J1 = 1.0
+    ratios = rng.uniform(0.0, 0.8, size=n_samples)
+    for r in ratios:
+        samples.append(({"j1": 1.0, "j2": float(r)}, "trivial"))
+    # topological: J2/J1 ∈ (1.2, 3.0)
+    ratios = rng.uniform(1.2, 3.0, size=n_samples)
+    for r in ratios:
+        samples.append(({"j1": 1.0, "j2": float(r)}, "topological"))
+    return samples
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py b/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
new file mode 100644
index 000000000..236688e9d
--- /dev/null
+++ b/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
@@ -0,0 +1,270 @@
+# This code is part of a Qiskit project.
+#
+# (C) Copyright IBM 2019, 2026.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Public API for the Phase of Matter dataset generator."""
+
+from __future__ import annotations
+
+import math
+
+import numpy as np
+from qiskit.quantum_info import SparsePauliOp, Statevector
+
+from . import _annni, _cluster, _haldane, _heisenberg
+from ._base import get_ground_state_exact, get_ground_state_vqe
+
+# ---------------------------------------------------------------------------
+# Registry — maps model name to its module
+# ---------------------------------------------------------------------------
+
+_MODELS = {
+    "heisenberg": _heisenberg,
+    "haldane": _haldane,
+    "annni": _annni,
+    "cluster": _cluster,
+}
+
+_BUILDERS = {
+    "heisenberg": lambda n, p: _heisenberg.build_hamiltonian(n, p["j1"], p["j2"]),
+    "haldane": lambda n, p: _haldane.build_hamiltonian(n, p["h1"], p["h2"]),
+    "annni": lambda n, p: _annni.build_hamiltonian(n, p["kappa"], p["h"]),
+    "cluster": lambda n, p: _cluster.build_hamiltonian(n, p["j1"], p["j2"]),
+}
+
+
+# ---------------------------------------------------------------------------
+# Public function
+# ---------------------------------------------------------------------------
+
+
+def phase_of_matter_data(
+    training_size: int,
+    test_size: int,
+    n: int,
+    *,
+    model: str = "heisenberg",
+    one_hot: bool = True,
+    include_sample_total: bool = False,
+    class_labels: list | None = None,
+    formatting: str = "ndarray",
+    seed: int | None = None,
+    backend=None,
+) -> (
+    tuple[np.ndarray | list[Statevector], np.ndarray, np.ndarray | list[Statevector], np.ndarray]
+    | tuple[
+        np.ndarray | list[Statevector],
+        np.ndarray,
+        np.ndarray | list[Statevector],
+        np.ndarray,
+        np.ndarray,
+    ]
+):
+    r"""Generate a quantum Phase of Matter classification dataset.
+
+    For each sample, coupling parameters are drawn uniformly from the interior
+    of a known phase region, the corresponding Hamiltonian is built as a
+    :class:`~qiskit.quantum_info.SparsePauliOp`, and its ground state is
+    computed via sparse exact diagonalization.  The ground-state vector forms
+    the feature, and the phase name forms the label.
+
+    Four spin-chain Hamiltonians are supported (see the reference for the
+    exact definitions and phase diagrams):
+
+    * ``"heisenberg"`` — Bond-alternating XXX Heisenberg model (eq. 6).
+      Phases: *trivial*, *topological*.
+    * ``"haldane"`` — Haldane chain (eq. 7).
+      Phases: *antiferromagnetic*, *paramagnetic*, *spt*.
+    * ``"annni"`` — Axial Next-Nearest-Neighbor Ising model (eq. 8).
+      Phases: *ferromagnetic*, *paramagnetic*, *floating*, *antiphase*.
+    * ``"cluster"`` — Cluster Hamiltonian with periodic boundary (eq. 9).
+      Phases: *haldane*, *ferromagnetic*, *antiferromagnetic*, *trivial*.
+
+    Args:
+        training_size: Total number of training samples (balanced across
+            classes).
+        test_size: Total number of test samples (balanced across classes).
+        n: Number of lattice sites (qubits).  Must be ≥ 4.  The feature
+            dimension is :math:`2^n`; practical limit for exact
+            diagonalization is ``n ≤ 16``.
+        model: Hamiltonian to use.  One of ``"heisenberg"``, ``"haldane"``,
+            ``"annni"``, ``"cluster"``.
+        one_hot: If ``True`` (default), labels are one-hot encoded numpy
+            arrays.  If ``False``, string phase names are returned.
+        include_sample_total: If ``True``, a fifth element is appended to the
+            return tuple with the number of ground states computed per class.
+        class_labels: Optional list of custom label names that replace the
+            model's default phase names.  Length must equal the number of
+            phases for the chosen model.
+        formatting: ``"ndarray"`` (default) returns features as a complex
+            numpy array of shape ``(num_samples, 2**n)``.
+            ``"statevector"`` returns a list of
+            :class:`~qiskit.quantum_info.Statevector` objects.
+        seed: Integer seed for the parameter-sampling RNG, enabling
+            reproducible datasets.
+        backend: When ``None`` (default), exact diagonalization via
+            ``scipy.sparse.linalg.eigsh`` is used — the recommended path for
+            reliable phase labels.  When a Qiskit backend is provided, a
+            VQE-based approximation is used instead.
+
+            .. warning::
+
+                The VQE pathway is for hardware-experiment workflows only.
+                VQE approximations near phase boundaries may produce
+                incorrect labels.  Use ``backend=None`` for dataset
+                generation.
+
+    Returns:
+        A tuple ``(training_features, training_labels, test_features,
+        test_labels)`` where:
+
+        * ``training_features`` / ``test_features`` — shape
+          ``(n_samples, 2**n)`` complex ndarray, or list of
+          :class:`~qiskit.quantum_info.Statevector` when
+          ``formatting="statevector"``.
+        * ``training_labels`` / ``test_labels`` — shape
+          ``(n_samples, n_classes)`` one-hot ndarray when ``one_hot=True``,
+          or list of strings when ``one_hot=False``.
+
+        If ``include_sample_total=True``, a fifth element — a numpy array of
+        shape ``(n_classes,)`` containing the number of ground states
+        computed per class — is appended.
+
+    Raises:
+        ValueError: If *model* is not one of the supported strings.
+        ValueError: If *formatting* is not ``"ndarray"`` or
+            ``"statevector"``.
+        ValueError: If ``n < 4``.
+        ValueError: If *class_labels* is provided but has the wrong length.
+
+    References:
+        [1] Bermejo et al., "Quantum Convolutional Neural Networks are
+        (Effectively) Classically Simulable", arXiv:2408.12739 (2024).
+
+    Examples:
+
+        >>> x_tr, y_tr, x_te, y_te = phase_of_matter_data(
+        ...     10, 5, 4, model="heisenberg", seed=0
+        ... )
+        >>> x_tr.shape
+        (10, 16)
+        >>> y_tr.shape
+        (10, 2)
+    """
+    if model not in _MODELS:
+        raise ValueError(f"Unknown model '{model}'. Choose from: {sorted(_MODELS.keys())}.")
+    if formatting not in ("ndarray", "statevector"):
+        raise ValueError(f"Unknown formatting '{formatting}'. Choose 'ndarray' or 'statevector'.")
+    if n < 4:
+        raise ValueError(f"n must be at least 4, got {n}.")
+
+    module = _MODELS[model]
+    default_labels: list[str] = module.PHASE_LABELS
+    n_classes = len(default_labels)
+
+    if class_labels is not None:
+        if len(class_labels) != n_classes:
+            raise ValueError(
+                f"class_labels has {len(class_labels)} entries but model '{model}' "
+                f"has {n_classes} phases."
+            )
+        label_names = list(class_labels)
+    else:
+        label_names = list(default_labels)
+
+    rng = np.random.default_rng(seed)
+
+    # ceil ensures every class gets at least the requested count even when
+    # training_size / test_size are not divisible by n_classes.
+    n_per_class_train = math.ceil(training_size / n_classes)
+    n_per_class_test = math.ceil(test_size / n_classes)
+    n_per_class = n_per_class_train + n_per_class_test
+
+    # Samplers return blocks of n_per_class per class, class order preserved.
+    raw_samples = module.sample_parameters(n_per_class, rng)
+
+    build_fn = _BUILDERS[model]
+    gs_fn = (
+        (lambda h: get_ground_state_vqe(h, backend))
+        if backend is not None
+        else get_ground_state_exact
+    )
+
+    # Compute ground states — preserve class-block order for the split below.
+    all_states: list[np.ndarray] = []
+    all_labels: list[str] = []
+    for params, phase in raw_samples:
+        H: SparsePauliOp = build_fn(n, params)
+        gs = gs_fn(H)
+        if isinstance(gs, Statevector):
+            gs = gs.data
+        all_states.append(gs)
+        idx = default_labels.index(phase)
+        all_labels.append(label_names[idx])
+
+    # Split per class into train / test.
+    train_states: list[np.ndarray] = []
+    train_labels_raw: list[str] = []
+    test_states: list[np.ndarray] = []
+    test_labels_raw: list[str] = []
+    sample_totals = np.zeros(n_classes, dtype=int)
+
+    for cls_idx in range(n_classes):
+        start = cls_idx * n_per_class
+        cls_states = all_states[start : start + n_per_class]
+        cls_labels = all_labels[start : start + n_per_class]
+        train_states.extend(cls_states[:n_per_class_train])
+        train_labels_raw.extend(cls_labels[:n_per_class_train])
+        test_states.extend(cls_states[n_per_class_train:])
+        test_labels_raw.extend(cls_labels[n_per_class_train:])
+        sample_totals[cls_idx] = n_per_class
+
+    # Trim to exact requested sizes (ceil may over-allocate by up to n_classes-1).
+    train_states = train_states[:training_size]
+    train_labels_raw = train_labels_raw[:training_size]
+    test_states = test_states[:test_size]
+    test_labels_raw = test_labels_raw[:test_size]
+
+    # Shuffle train and test independently to interleave classes.
+    tr_idx = np.arange(len(train_states))
+    rng.shuffle(tr_idx)
+    te_idx = np.arange(len(test_states))
+    rng.shuffle(te_idx)
+    train_states = [train_states[i] for i in tr_idx]
+    train_labels_raw = [train_labels_raw[i] for i in tr_idx]
+    test_states = [test_states[i] for i in te_idx]
+    test_labels_raw = [test_labels_raw[i] for i in te_idx]
+
+    # Format features.
+    if formatting == "ndarray":
+        x_train: np.ndarray | list[Statevector] = np.array(train_states)
+        x_test: np.ndarray | list[Statevector] = np.array(test_states)
+    else:
+        x_train = [Statevector(s) for s in train_states]
+        x_test = [Statevector(s) for s in test_states]
+
+    # Format labels.
+    label_to_idx = {lbl: i for i, lbl in enumerate(label_names)}
+
+    def _make_labels(raw: list[str]) -> np.ndarray:
+        if one_hot:
+            mat = np.zeros((len(raw), n_classes), dtype=float)
+            for row, lbl in enumerate(raw):
+                mat[row, label_to_idx[lbl]] = 1.0
+            return mat
+        return np.array(raw)
+
+    y_train = _make_labels(train_labels_raw)
+    y_test = _make_labels(test_labels_raw)
+
+    if include_sample_total:
+        return x_train, y_train, x_test, y_test, sample_totals
+    return x_train, y_train, x_test, y_test
diff --git a/test/datasets/test_phase_of_matter.py b/test/datasets/test_phase_of_matter.py
new file mode 100644
index 000000000..ab0e9e988
--- /dev/null
+++ b/test/datasets/test_phase_of_matter.py
@@ -0,0 +1,397 @@
+# This code is part of a Qiskit project.
+#
+# (C) Copyright IBM 2019, 2026.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Tests for the Phase of Matter dataset generator.
+
+Follows qiskit-machine-learning test conventions:
+  - QiskitMachineLearningTestCase base class
+  - ddt / @idata / @unpack for parameterised tests
+  - np.testing.assert_* for array assertions
+"""
+
+from __future__ import annotations
+
+import unittest
+
+import numpy as np
+from ddt import ddt, idata, unpack
+from qiskit.quantum_info import Statevector
+from test import QiskitMachineLearningTestCase
+
+from qiskit_machine_learning.datasets import phase_of_matter_data
+from qiskit_machine_learning.datasets.phase_of_matter._annni import (
+    build_hamiltonian as build_annni,
+)
+from qiskit_machine_learning.datasets.phase_of_matter._base import get_ground_state_exact
+from qiskit_machine_learning.datasets.phase_of_matter._cluster import (
+    build_hamiltonian as build_cluster,
+)
+from qiskit_machine_learning.datasets.phase_of_matter._haldane import (
+    build_hamiltonian as build_haldane,
+)
+from qiskit_machine_learning.datasets.phase_of_matter._heisenberg import (
+    build_hamiltonian as build_heisenberg,
+)
+
+# ---------------------------------------------------------------------------
+# Helper
+# ---------------------------------------------------------------------------
+
+
+def _is_hermitian(op, atol: float = 1e-10) -> bool:
+    mat = op.to_matrix()
+    return np.allclose(mat, mat.conj().T, atol=atol)
+
+
+# ---------------------------------------------------------------------------
+# TestHamiltonianBuilders
+# ---------------------------------------------------------------------------
+
+
+@ddt
+class TestHamiltonianBuilders(QiskitMachineLearningTestCase):
+    """Verify that each Hamiltonian builder returns a valid Hermitian operator."""
+
+    @idata([(4,), (6,)])
+    @unpack
+    def test_heisenberg_hermitian(self, n):
+        H = build_heisenberg(n, j1=1.0, j2=0.5)
+        self.assertTrue(_is_hermitian(H), f"Heisenberg n={n} is not Hermitian")
+
+    @idata([(4,), (6,)])
+    @unpack
+    def test_haldane_hermitian(self, n):
+        H = build_haldane(n, h1=0.5, h2=0.3)
+        self.assertTrue(_is_hermitian(H), f"Haldane n={n} is not Hermitian")
+
+    @idata([(4,), (6,)])
+    @unpack
+    def test_annni_hermitian(self, n):
+        H = build_annni(n, kappa=0.3, h=0.5)
+        self.assertTrue(_is_hermitian(H), f"ANNNI n={n} is not Hermitian")
+
+    @idata([(4,), (6,)])
+    @unpack
+    def test_cluster_hermitian(self, n):
+        H = build_cluster(n, j1=1.0, j2=-1.0)
+        self.assertTrue(_is_hermitian(H), f"Cluster n={n} is not Hermitian")
+
+    def test_cluster_periodic_boundary(self):
+        """Cluster Hamiltonian must have more terms than diagonal Z terms alone."""
+        n = 4
+        H = build_cluster(n, j1=1.0, j2=1.0)
+        # n Z terms + n XX two-body terms + n ZXZ three-body terms = 3n unique terms minimum
+        self.assertGreater(len(H), n)
+
+    def test_matrix_dimension(self):
+        """All models should produce a 2^n × 2^n matrix for n=4."""
+        n = 4
+        dim = 2**n
+        builders = [
+            build_heisenberg(n, 1.0, 0.5),
+            build_haldane(n, 0.5, 0.3),
+            build_annni(n, 0.3, 0.5),
+            build_cluster(n, 1.0, -1.0),
+        ]
+        for H in builders:
+            mat = H.to_matrix()
+            self.assertEqual(mat.shape, (dim, dim))
+
+
+# ---------------------------------------------------------------------------
+# TestGroundState
+# ---------------------------------------------------------------------------
+
+
+@ddt
+class TestGroundState(QiskitMachineLearningTestCase):
+    """Verify exact-diagonalization ground-state properties."""
+
+    def _fixed_hamiltonian(self, model: str, n: int):
+        params = {
+            "heisenberg": (n, 1.0, 0.5),
+            "haldane": (n, 0.5, 0.3),
+            "annni": (n, 0.3, 0.5),
+            "cluster": (n, 1.0, -1.0),
+        }
+        builders = {
+            "heisenberg": build_heisenberg,
+            "haldane": build_haldane,
+            "annni": build_annni,
+            "cluster": build_cluster,
+        }
+        return builders[model](*params[model])
+
+    @idata([("heisenberg",), ("haldane",), ("annni",), ("cluster",)])
+    @unpack
+    def test_normalization(self, model):
+        H = self._fixed_hamiltonian(model, n=4)
+        gs = get_ground_state_exact(H)
+        self.assertAlmostEqual(
+            np.linalg.norm(gs), 1.0, places=8, msg=f"{model} ground state is not normalised"
+        )
+
+    @idata([("heisenberg",), ("haldane",), ("annni",), ("cluster",)])
+    @unpack
+    def test_is_eigenstate(self, model):
+        """H|ψ⟩ must equal E|ψ⟩ up to numerical noise."""
+        H = self._fixed_hamiltonian(model, n=4)
+        gs = get_ground_state_exact(H)
+        mat = H.to_matrix()
+        h_psi = mat @ gs
+        energy = np.dot(gs.conj(), h_psi).real
+        residual = np.linalg.norm(h_psi - energy * gs)
+        self.assertLess(residual, 1e-8, msg=f"{model} eigenstate residual {residual:.2e}")
+
+    def test_lowest_eigenvalue(self):
+        """Energy from eigsh must match the minimum from np.linalg.eigh."""
+        H = build_heisenberg(4, j1=1.0, j2=2.0)
+        gs = get_ground_state_exact(H)
+        mat = H.to_matrix()
+        e_eigsh = (gs.conj() @ mat @ gs).real
+        e_min = np.linalg.eigvalsh(mat).min()
+        self.assertAlmostEqual(e_eigsh, e_min, places=8)
+
+
+# ---------------------------------------------------------------------------
+# TestPhaseLabels
+# ---------------------------------------------------------------------------
+
+
+@ddt
+class TestPhaseLabels(QiskitMachineLearningTestCase):
+    """Verify that phase-sampling regions produce correct labels."""
+
+    @idata(
+        [
+            (0.2, "trivial"),
+            (2.5, "topological"),
+        ]
+    )
+    @unpack
+    def test_heisenberg_phase_region(
+        self, j2_ratio, expected_label
+    ):  # pylint: disable=unused-argument
+        """Heisenberg labels sampled far from boundary must include expected phase."""
+        # j2_ratio is the parameter value used in the docstring example but the
+        # sampler draws from fixed interior regions; we verify that both phases
+        # appear across a dataset generated from the full interior.
+        _, y, _, _ = phase_of_matter_data(20, 4, 4, model="heisenberg", one_hot=False, seed=0)
+        self.assertIn(expected_label, set(y), msg=f"Label '{expected_label}' missing from dataset")
+
+    @idata(
+        [
+            ("ferromagnetic",),
+            ("paramagnetic",),
+            ("floating",),
+            ("antiphase",),
+        ]
+    )
+    @unpack
+    def test_annni_all_phases_present(self, phase):
+        _, y, _, _ = phase_of_matter_data(40, 8, 4, model="annni", one_hot=False, seed=42)
+        self.assertIn(phase, set(y), msg=f"ANNNI phase '{phase}' missing from dataset")
+
+    @idata(
+        [
+            ("haldane",),
+            ("ferromagnetic",),
+            ("antiferromagnetic",),
+            ("trivial",),
+        ]
+    )
+    @unpack
+    def test_cluster_all_phases_present(self, phase):
+        _, y, _, _ = phase_of_matter_data(40, 8, 4, model="cluster", one_hot=False, seed=42)
+        self.assertIn(phase, set(y), msg=f"Cluster phase '{phase}' missing from dataset")
+
+    @idata(
+        [
+            ("antiferromagnetic",),
+            ("paramagnetic",),
+            ("spt",),
+        ]
+    )
+    @unpack
+    def test_haldane_all_phases_present(self, phase):
+        _, y, _, _ = phase_of_matter_data(30, 6, 4, model="haldane", one_hot=False, seed=42)
+        self.assertIn(phase, set(y), msg=f"Haldane phase '{phase}' missing from dataset")
+
+
+# ---------------------------------------------------------------------------
+# TestPublicAPI
+# ---------------------------------------------------------------------------
+
+
+@ddt
+class TestPublicAPI(QiskitMachineLearningTestCase):
+    """Verify the shape and type contracts of phase_of_matter_data."""
+
+    @idata(
+        [
+            ("heisenberg", 2),
+            ("haldane", 3),
+            ("annni", 4),
+            ("cluster", 4),
+        ]
+    )
+    @unpack
+    def test_return_shapes_ndarray(self, model, n_classes):
+        x_tr, y_tr, x_te, y_te = phase_of_matter_data(8, 4, 4, model=model, one_hot=True, seed=0)
+        np.testing.assert_array_equal(x_tr.shape, (8, 16))
+        np.testing.assert_array_equal(y_tr.shape, (8, n_classes))
+        np.testing.assert_array_equal(x_te.shape, (4, 16))
+        np.testing.assert_array_equal(y_te.shape, (4, n_classes))
+
+    @idata([("heisenberg",), ("annni",)])
+    @unpack
+    def test_return_shapes_statevector(self, model):
+        x_tr, _, x_te, _ = phase_of_matter_data(
+            4, 2, 4, model=model, formatting="statevector", seed=0
+        )
+        self.assertEqual(len(x_tr), 4)
+        self.assertEqual(len(x_te), 2)
+        self.assertIsInstance(x_tr[0], Statevector)
+        self.assertAlmostEqual(np.linalg.norm(x_tr[0].data), 1.0, places=6)
+
+    def test_one_hot_true_sums_to_one(self):
+        _, y_tr, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", one_hot=True, seed=0)
+        np.testing.assert_array_equal(y_tr.sum(axis=1), np.ones(8))
+
+    def test_one_hot_false_returns_strings(self):
+        _, y_tr, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", one_hot=False, seed=0)
+        self.assertTrue(all(isinstance(lbl, str) for lbl in y_tr))
+        self.assertTrue(set(y_tr).issubset({"trivial", "topological"}))
+
+    def test_include_sample_total_false(self):
+        result = phase_of_matter_data(4, 2, 4, model="heisenberg", seed=0)
+        self.assertEqual(len(result), 4)
+
+    def test_include_sample_total_true(self):
+        result = phase_of_matter_data(
+            4, 2, 4, model="heisenberg", include_sample_total=True, seed=0
+        )
+        self.assertEqual(len(result), 5)
+        totals = result[4]
+        self.assertEqual(totals.shape, (2,))  # 2 classes for heisenberg
+        self.assertTrue(np.all(totals > 0))
+
+    def test_custom_class_labels(self):
+        _, y_tr, _, _ = phase_of_matter_data(
+            8, 4, 4, model="heisenberg", one_hot=False, class_labels=["phase_A", "phase_B"], seed=0
+        )
+        self.assertTrue(set(y_tr).issubset({"phase_A", "phase_B"}))
+
+    def test_custom_class_labels_one_hot(self):
+        """Custom labels must not affect one-hot shape or values."""
+        _, y1, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", one_hot=True, seed=0)
+        _, y2, _, _ = phase_of_matter_data(
+            8, 4, 4, model="heisenberg", one_hot=True, class_labels=["A", "B"], seed=0
+        )
+        np.testing.assert_array_equal(y1, y2)
+
+    def test_feature_normalization(self):
+        """All returned ground states must be normalised."""
+        x_tr, _, x_te, _ = phase_of_matter_data(8, 4, 4, model="annni", seed=1)
+        for states in (x_tr, x_te):
+            norms = np.linalg.norm(states, axis=1)
+            np.testing.assert_allclose(
+                norms, 1.0, atol=1e-8, err_msg="Ground states are not normalised"
+            )
+
+    def test_seed_reproducibility(self):
+        """Same seed must produce numerically identical outputs.
+
+        Features are complex floating-point arrays; we use allclose with a
+        tight tolerance (1e-10) to allow for sub-machine-precision noise in
+        the ARPACK eigensolver while still catching meaningful differences.
+        """
+        kwargs = dict(model="heisenberg", seed=99)
+        x1, y1, xt1, yt1 = phase_of_matter_data(6, 3, 4, **kwargs)
+        x2, y2, xt2, yt2 = phase_of_matter_data(6, 3, 4, **kwargs)
+        np.testing.assert_allclose(
+            x1, x2, atol=1e-10, err_msg="train features differ across equal seeds"
+        )
+        np.testing.assert_array_equal(y1, y2)
+        np.testing.assert_allclose(
+            xt1, xt2, atol=1e-10, err_msg="test features differ across equal seeds"
+        )
+        np.testing.assert_array_equal(yt1, yt2)
+
+    def test_different_seeds_differ(self):
+        """Different seeds should (almost certainly) produce different data."""
+        x1, _, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", seed=1)
+        x2, _, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", seed=2)
+        self.assertFalse(np.allclose(x1, x2))
+
+    def test_train_test_sizes_respected(self):
+        """Exact training_size / test_size must be honoured."""
+        for tr, te in [(10, 3), (7, 7), (1, 1)]:
+            x_tr, _, x_te, _ = phase_of_matter_data(tr, te, 4, model="heisenberg", seed=0)
+            self.assertEqual(len(x_tr), tr, f"train size mismatch (requested {tr})")
+            self.assertEqual(len(x_te), te, f"test size mismatch (requested {te})")
+
+    # -----------------------------------------------------------------------
+    # Error cases
+    # -----------------------------------------------------------------------
+
+    def test_invalid_model_raises(self):
+        with self.assertRaises(ValueError):
+            phase_of_matter_data(4, 2, 4, model="invalid")
+
+    def test_invalid_formatting_raises(self):
+        with self.assertRaises(ValueError):
+            phase_of_matter_data(4, 2, 4, model="heisenberg", formatting="bad")
+
+    def test_n_too_small_raises(self):
+        with self.assertRaises(ValueError):
+            phase_of_matter_data(4, 2, 3, model="heisenberg")
+
+    def test_wrong_class_labels_length_raises(self):
+        with self.assertRaises(ValueError):
+            phase_of_matter_data(4, 2, 4, model="heisenberg", class_labels=["only_one"])
+
+
+# ---------------------------------------------------------------------------
+# Integration — import paths
+# ---------------------------------------------------------------------------
+
+
+class TestImportPaths(QiskitMachineLearningTestCase):
+    """Verify the package can be imported and is correctly wired up."""
+
+    def test_importable(self):
+        import qiskit_machine_learning.datasets as ds  # pylint: disable=import-outside-toplevel
+
+        self.assertIsNotNone(ds.phase_of_matter_data)
+
+    def test_in_all(self):
+        import qiskit_machine_learning.datasets as ds  # pylint: disable=import-outside-toplevel
+
+        self.assertIn("phase_of_matter_data", ds.__all__)
+
+    def test_hamiltonian_modules_importable(self):
+        from qiskit_machine_learning.datasets.phase_of_matter import (  # pylint: disable=import-outside-toplevel
+            _annni,
+            _cluster,
+            _haldane,
+            _heisenberg,
+        )
+
+        for mod in (_heisenberg, _haldane, _annni, _cluster):
+            self.assertTrue(hasattr(mod, "build_hamiltonian"))
+            self.assertTrue(hasattr(mod, "sample_parameters"))
+            self.assertTrue(hasattr(mod, "PHASE_LABELS"))
+
+
+if __name__ == "__main__":
+    unittest.main()

From 63a03e5ebfa57c55b280da053b5705225f16cc76 Mon Sep 17 00:00:00 2001
From: Shailesh K <shailesh.mkvr@gmail.com>
Date: Wed, 13 May 2026 06:58:36 +0530
Subject: [PATCH 2/6] Fix CI: copyright, pylint naming/docstrings/spelling,
 mypy, import order

- Add STFC copyright header to all 8 new files
- Rename variable H -> ham throughout (C0103 invalid-name)
- Add docstrings to all test methods (C0116 missing-function-docstring)
- Move 'from test import' before third-party imports (C0411 wrong-import-order)
- Add :type backend: object to fix W9016 missing-type-doc in two functions
- Replace British spellings with American: normalised, optimisation, mislabelled
- Replace Greek chars in inline comments with ASCII equivalents
- Add Callable type annotation to _fixed_hamiltonian to fix mypy operator error
- Add domain-specific words to .pylintdict for spell check
---
 .pylintdict                                   | 19 ++++
 .../datasets/phase_of_matter/__init__.py      |  1 +
 .../datasets/phase_of_matter/_annni.py        | 19 ++--
 .../datasets/phase_of_matter/_base.py         | 14 ++-
 .../datasets/phase_of_matter/_cluster.py      |  3 +-
 .../datasets/phase_of_matter/_haldane.py      |  1 +
 .../datasets/phase_of_matter/_heisenberg.py   |  1 +
 .../phase_of_matter/phase_of_matter.py        |  9 +-
 test/datasets/test_phase_of_matter.py         | 92 ++++++++++++-------
 9 files changed, 108 insertions(+), 51 deletions(-)

diff --git a/.pylintdict b/.pylintdict
index 639391e78..9c9c22b3f 100644
--- a/.pylintdict
+++ b/.pylintdict
@@ -676,3 +676,22 @@ zz
 φ_ij
 Δ
 π
+annni
+antiphase
+antiferromagnetic
+canonicalize
+diagonalization
+eigsh
+ferromagnetic
+haldane
+hamiltonian
+hamiltonians
+heisenberg
+kappa
+lattice
+paramagnetic
+pauli
+paulis
+spt
+topological
+trivial
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/__init__.py b/qiskit_machine_learning/datasets/phase_of_matter/__init__.py
index 70dd4dab8..6690d2c8b 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/__init__.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/__init__.py
@@ -1,6 +1,7 @@
 # This code is part of a Qiskit project.
 #
 # (C) Copyright IBM 2019, 2026.
+# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026.
 #
 # This code is licensed under the Apache License, Version 2.0. You may
 # obtain a copy of this license in the LICENSE.txt file in the root directory
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_annni.py b/qiskit_machine_learning/datasets/phase_of_matter/_annni.py
index 9b6c4e566..1c15e9056 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/_annni.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_annni.py
@@ -1,6 +1,7 @@
 # This code is part of a Qiskit project.
 #
 # (C) Copyright IBM 2019, 2026.
+# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026.
 #
 # This code is licensed under the Apache License, Version 2.0. You may
 # obtain a copy of this license in the LICENSE.txt file in the root directory
@@ -40,10 +41,10 @@ def build_hamiltonian(n: int, kappa: float, h: float, j1: float = 1.0) -> Sparse
     Phase diagram (see Fig. 5 in the reference, axes :math:`\kappa` vs
     :math:`h` with :math:`J_1 = 1`):
 
-    * **ferromagnetic** (I) — small :math:`\kappa`, small :math:`h`
-    * **paramagnetic** (II) — small :math:`\kappa`, large :math:`h`
-    * **floating** (III) — large :math:`\kappa`, moderate :math:`h`
-    * **antiphase** (IV) — large :math:`\kappa`, small :math:`h`
+    * **ferromagnetic** (I) -- small :math:`\kappa`, small :math:`h`
+    * **paramagnetic** (II) -- small :math:`\kappa`, large :math:`h`
+    * **floating** (III) -- large :math:`\kappa`, moderate :math:`h`
+    * **antiphase** (IV) -- large :math:`\kappa`, small :math:`h`
 
     Args:
         n: Number of lattice sites (qubits).
@@ -70,7 +71,7 @@ def sample_parameters(n_samples: int, rng: np.random.Generator) -> list[tuple[di
     """Sample coupling parameters uniformly from the interior of each phase.
 
     Sampling regions (see Fig. 5 in the reference) are placed well inside
-    each phase to avoid mislabelled points near boundaries.
+    each phase to avoid mislabeled points near boundaries.
 
     Args:
         n_samples: Number of samples to draw *per class*.
@@ -81,22 +82,22 @@ def sample_parameters(n_samples: int, rng: np.random.Generator) -> list[tuple[di
         *n_samples* entries for each phase in :data:`PHASE_LABELS`, in order.
     """
     samples: list[tuple[dict, str]] = []
-    # ferromagnetic (I): κ ∈ (0, 0.3), h ∈ (0, 0.25)
+    # ferromagnetic (I): kappa in (0, 0.3), h in (0, 0.25)
     ks = rng.uniform(0.0, 0.3, size=n_samples)
     hs = rng.uniform(0.0, 0.25, size=n_samples)
     for k, hv in zip(ks, hs):
         samples.append(({"kappa": float(k), "h": float(hv)}, "ferromagnetic"))
-    # paramagnetic (II): κ ∈ (0, 0.45), h ∈ (0.9, 1.5)
+    # paramagnetic (II): kappa in (0, 0.45), h in (0.9, 1.5)
     ks = rng.uniform(0.0, 0.45, size=n_samples)
     hs = rng.uniform(0.9, 1.5, size=n_samples)
     for k, hv in zip(ks, hs):
         samples.append(({"kappa": float(k), "h": float(hv)}, "paramagnetic"))
-    # floating (III): κ ∈ (0.55, 0.9), h ∈ (0.25, 0.65)
+    # floating (III): kappa in (0.55, 0.9), h in (0.25, 0.65)
     ks = rng.uniform(0.55, 0.9, size=n_samples)
     hs = rng.uniform(0.25, 0.65, size=n_samples)
     for k, hv in zip(ks, hs):
         samples.append(({"kappa": float(k), "h": float(hv)}, "floating"))
-    # antiphase (IV): κ ∈ (0.55, 0.9), h ∈ (0, 0.1)
+    # antiphase (IV): kappa in (0.55, 0.9), h in (0, 0.1)
     ks = rng.uniform(0.55, 0.9, size=n_samples)
     hs = rng.uniform(0.0, 0.1, size=n_samples)
     for k, hv in zip(ks, hs):
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_base.py b/qiskit_machine_learning/datasets/phase_of_matter/_base.py
index 8eebc8ca5..5a45053a0 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/_base.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_base.py
@@ -1,6 +1,7 @@
 # This code is part of a Qiskit project.
 #
 # (C) Copyright IBM 2019, 2026.
+# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026.
 #
 # This code is licensed under the Apache License, Version 2.0. You may
 # obtain a copy of this license in the LICENSE.txt file in the root directory
@@ -43,7 +44,7 @@ def _canonicalize_phase(vec: np.ndarray) -> np.ndarray:
     """Fix the global phase so that the leading large-magnitude element is real positive.
 
     Eigenvectors are defined only up to a global complex phase; this
-    canonicalization makes repeated calls to ``eigsh`` return numerically
+    phase-fixing makes repeated calls to ``eigsh`` return numerically
     identical arrays for the same Hamiltonian.
     """
     threshold = 1e-10 * np.max(np.abs(vec))
@@ -57,16 +58,16 @@ def get_ground_state_exact(hamiltonian: SparsePauliOp) -> np.ndarray:
     """Return the ground-state vector via sparse exact diagonalization.
 
     Uses ``scipy.sparse.linalg.eigsh`` with ``which='SA'`` (smallest algebraic
-    eigenvalue).  Practical limit: n ≤ 16 qubits (2^16 × 2^16 matrix).
+    eigenvalue).  Practical limit: n <= 16 qubits (2^16 x 2^16 matrix).
 
-    The returned vector is phase-canonicalized so that repeated calls for the
+    The returned vector is phase-fixed so that repeated calls for the
     same Hamiltonian yield identical arrays.
 
     Args:
         hamiltonian: Hamiltonian as a SparsePauliOp.
 
     Returns:
-        Complex numpy array of shape ``(2**n,)`` — the normalised ground state.
+        Complex numpy array of shape ``(2**n,)`` -- the normalized ground state.
     """
     mat = hamiltonian.to_matrix(sparse=True).astype(complex)
     _, vecs = scipy.sparse.linalg.eigsh(mat, k=1, which="SA")
@@ -85,7 +86,7 @@ def get_ground_state_vqe(
         phase labels, use the default exact diagonalization (``backend=None``).
         VQE approximations near phase boundaries may produce incorrect labels.
 
-    Uses an ``EfficientSU2`` ansatz (1 repetition) with COBYLA optimisation via
+    Uses an ``EfficientSU2`` ansatz (1 repetition) with COBYLA optimization via
     ``StatevectorEstimator`` from ``qiskit.primitives``.  The ``backend``
     argument is accepted for API consistency and future hardware integration;
     the current implementation uses ``StatevectorEstimator`` unconditionally.
@@ -95,6 +96,8 @@ def get_ground_state_vqe(
         backend: Reserved for future hardware integration.  Currently unused;
             pass any non-``None`` value to activate this pathway.
 
+    :type backend: object
+
     Returns:
         Qiskit ``Statevector`` of the approximate ground state.
     """
@@ -109,6 +112,7 @@ def get_ground_state_vqe(
     estimator = StatevectorEstimator()
 
     def cost(params: np.ndarray) -> float:
+        """Evaluate energy expectation value for given parameters."""
         pub = (ansatz, [hamiltonian], [params])
         return float(estimator.run([pub]).result()[0].data.evs[0])
 
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_cluster.py b/qiskit_machine_learning/datasets/phase_of_matter/_cluster.py
index f3f48d85e..3b3d5e767 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/_cluster.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_cluster.py
@@ -1,6 +1,7 @@
 # This code is part of a Qiskit project.
 #
 # (C) Copyright IBM 2019, 2026.
+# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026.
 #
 # This code is licensed under the Apache License, Version 2.0. You may
 # obtain a copy of this license in the LICENSE.txt file in the root directory
@@ -67,7 +68,7 @@ def sample_parameters(n_samples: int, rng: np.random.Generator) -> list[tuple[di
     """Sample coupling parameters uniformly from the interior of each phase.
 
     Sampling regions (see Fig. 6 in the reference) are placed well inside
-    each phase to avoid mislabelled points near boundaries.
+    each phase to avoid mislabeled points near boundaries.
 
     Args:
         n_samples: Number of samples to draw *per class*.
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py b/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py
index 055d72886..03dd265eb 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py
@@ -1,6 +1,7 @@
 # This code is part of a Qiskit project.
 #
 # (C) Copyright IBM 2019, 2026.
+# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026.
 #
 # This code is licensed under the Apache License, Version 2.0. You may
 # obtain a copy of this license in the LICENSE.txt file in the root directory
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py b/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py
index 1a9c4b020..c89b8dabf 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py
@@ -1,6 +1,7 @@
 # This code is part of a Qiskit project.
 #
 # (C) Copyright IBM 2019, 2026.
+# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026.
 #
 # This code is licensed under the Apache License, Version 2.0. You may
 # obtain a copy of this license in the LICENSE.txt file in the root directory
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py b/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
index 236688e9d..19e12d0a9 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
@@ -1,6 +1,7 @@
 # This code is part of a Qiskit project.
 #
 # (C) Copyright IBM 2019, 2026.
+# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026.
 #
 # This code is licensed under the Apache License, Version 2.0. You may
 # obtain a copy of this license in the LICENSE.txt file in the root directory
@@ -111,7 +112,7 @@ def phase_of_matter_data(
         seed: Integer seed for the parameter-sampling RNG, enabling
             reproducible datasets.
         backend: When ``None`` (default), exact diagonalization via
-            ``scipy.sparse.linalg.eigsh`` is used — the recommended path for
+            ``scipy.sparse.linalg.eigsh`` is used -- the recommended path for
             reliable phase labels.  When a Qiskit backend is provided, a
             VQE-based approximation is used instead.
 
@@ -122,6 +123,8 @@ def phase_of_matter_data(
                 incorrect labels.  Use ``backend=None`` for dataset
                 generation.
 
+    :type backend: object
+
     Returns:
         A tuple ``(training_features, training_labels, test_features,
         test_labels)`` where:
@@ -202,8 +205,8 @@ def phase_of_matter_data(
     all_states: list[np.ndarray] = []
     all_labels: list[str] = []
     for params, phase in raw_samples:
-        H: SparsePauliOp = build_fn(n, params)
-        gs = gs_fn(H)
+        ham: SparsePauliOp = build_fn(n, params)
+        gs = gs_fn(ham)
         if isinstance(gs, Statevector):
             gs = gs.data
         all_states.append(gs)
diff --git a/test/datasets/test_phase_of_matter.py b/test/datasets/test_phase_of_matter.py
index ab0e9e988..768612a99 100644
--- a/test/datasets/test_phase_of_matter.py
+++ b/test/datasets/test_phase_of_matter.py
@@ -1,6 +1,7 @@
 # This code is part of a Qiskit project.
 #
 # (C) Copyright IBM 2019, 2026.
+# (C) Copyright UKRI-STFC (Hartree Centre) 2024, 2026.
 #
 # This code is licensed under the Apache License, Version 2.0. You may
 # obtain a copy of this license in the LICENSE.txt file in the root directory
@@ -21,11 +22,12 @@
 from __future__ import annotations
 
 import unittest
+from test import QiskitMachineLearningTestCase
+from typing import Callable
 
 import numpy as np
 from ddt import ddt, idata, unpack
-from qiskit.quantum_info import Statevector
-from test import QiskitMachineLearningTestCase
+from qiskit.quantum_info import SparsePauliOp, Statevector
 
 from qiskit_machine_learning.datasets import phase_of_matter_data
 from qiskit_machine_learning.datasets.phase_of_matter._annni import (
@@ -47,7 +49,8 @@
 # ---------------------------------------------------------------------------
 
 
-def _is_hermitian(op, atol: float = 1e-10) -> bool:
+def _is_hermitian(op: SparsePauliOp, atol: float = 1e-10) -> bool:
+    """Return True if op is Hermitian within atol."""
     mat = op.to_matrix()
     return np.allclose(mat, mat.conj().T, atol=atol)
 
@@ -64,46 +67,50 @@ class TestHamiltonianBuilders(QiskitMachineLearningTestCase):
     @idata([(4,), (6,)])
     @unpack
     def test_heisenberg_hermitian(self, n):
-        H = build_heisenberg(n, j1=1.0, j2=0.5)
-        self.assertTrue(_is_hermitian(H), f"Heisenberg n={n} is not Hermitian")
+        """Heisenberg Hamiltonian must be Hermitian."""
+        ham = build_heisenberg(n, j1=1.0, j2=0.5)
+        self.assertTrue(_is_hermitian(ham), f"Heisenberg n={n} is not Hermitian")
 
     @idata([(4,), (6,)])
     @unpack
     def test_haldane_hermitian(self, n):
-        H = build_haldane(n, h1=0.5, h2=0.3)
-        self.assertTrue(_is_hermitian(H), f"Haldane n={n} is not Hermitian")
+        """Haldane Hamiltonian must be Hermitian."""
+        ham = build_haldane(n, h1=0.5, h2=0.3)
+        self.assertTrue(_is_hermitian(ham), f"Haldane n={n} is not Hermitian")
 
     @idata([(4,), (6,)])
     @unpack
     def test_annni_hermitian(self, n):
-        H = build_annni(n, kappa=0.3, h=0.5)
-        self.assertTrue(_is_hermitian(H), f"ANNNI n={n} is not Hermitian")
+        """ANNNI Hamiltonian must be Hermitian."""
+        ham = build_annni(n, kappa=0.3, h=0.5)
+        self.assertTrue(_is_hermitian(ham), f"ANNNI n={n} is not Hermitian")
 
     @idata([(4,), (6,)])
     @unpack
     def test_cluster_hermitian(self, n):
-        H = build_cluster(n, j1=1.0, j2=-1.0)
-        self.assertTrue(_is_hermitian(H), f"Cluster n={n} is not Hermitian")
+        """Cluster Hamiltonian must be Hermitian."""
+        ham = build_cluster(n, j1=1.0, j2=-1.0)
+        self.assertTrue(_is_hermitian(ham), f"Cluster n={n} is not Hermitian")
 
     def test_cluster_periodic_boundary(self):
         """Cluster Hamiltonian must have more terms than diagonal Z terms alone."""
         n = 4
-        H = build_cluster(n, j1=1.0, j2=1.0)
+        ham = build_cluster(n, j1=1.0, j2=1.0)
         # n Z terms + n XX two-body terms + n ZXZ three-body terms = 3n unique terms minimum
-        self.assertGreater(len(H), n)
+        self.assertGreater(len(ham), n)
 
     def test_matrix_dimension(self):
-        """All models should produce a 2^n × 2^n matrix for n=4."""
+        """All models should produce a 2^n x 2^n matrix for n=4."""
         n = 4
         dim = 2**n
-        builders = [
+        hamiltonians = [
             build_heisenberg(n, 1.0, 0.5),
             build_haldane(n, 0.5, 0.3),
             build_annni(n, 0.3, 0.5),
             build_cluster(n, 1.0, -1.0),
         ]
-        for H in builders:
-            mat = H.to_matrix()
+        for ham in hamiltonians:
+            mat = ham.to_matrix()
             self.assertEqual(mat.shape, (dim, dim))
 
 
@@ -116,14 +123,15 @@ def test_matrix_dimension(self):
 class TestGroundState(QiskitMachineLearningTestCase):
     """Verify exact-diagonalization ground-state properties."""
 
-    def _fixed_hamiltonian(self, model: str, n: int):
-        params = {
+    def _fixed_hamiltonian(self, model: str, n: int) -> SparsePauliOp:
+        """Return a Hamiltonian at fixed safe parameters for the given model."""
+        params: dict[str, tuple] = {
             "heisenberg": (n, 1.0, 0.5),
             "haldane": (n, 0.5, 0.3),
             "annni": (n, 0.3, 0.5),
             "cluster": (n, 1.0, -1.0),
         }
-        builders = {
+        builders: dict[str, Callable[..., SparsePauliOp]] = {
             "heisenberg": build_heisenberg,
             "haldane": build_haldane,
             "annni": build_annni,
@@ -134,19 +142,20 @@ def _fixed_hamiltonian(self, model: str, n: int):
     @idata([("heisenberg",), ("haldane",), ("annni",), ("cluster",)])
     @unpack
     def test_normalization(self, model):
-        H = self._fixed_hamiltonian(model, n=4)
-        gs = get_ground_state_exact(H)
+        """Ground state must be normalized to unit norm."""
+        ham = self._fixed_hamiltonian(model, n=4)
+        gs = get_ground_state_exact(ham)
         self.assertAlmostEqual(
-            np.linalg.norm(gs), 1.0, places=8, msg=f"{model} ground state is not normalised"
+            np.linalg.norm(gs), 1.0, places=8, msg=f"{model} ground state is not normalized"
         )
 
     @idata([("heisenberg",), ("haldane",), ("annni",), ("cluster",)])
     @unpack
     def test_is_eigenstate(self, model):
-        """H|ψ⟩ must equal E|ψ⟩ up to numerical noise."""
-        H = self._fixed_hamiltonian(model, n=4)
-        gs = get_ground_state_exact(H)
-        mat = H.to_matrix()
+        """H|psi> must equal E|psi> up to numerical noise."""
+        ham = self._fixed_hamiltonian(model, n=4)
+        gs = get_ground_state_exact(ham)
+        mat = ham.to_matrix()
         h_psi = mat @ gs
         energy = np.dot(gs.conj(), h_psi).real
         residual = np.linalg.norm(h_psi - energy * gs)
@@ -154,9 +163,9 @@ def test_is_eigenstate(self, model):
 
     def test_lowest_eigenvalue(self):
         """Energy from eigsh must match the minimum from np.linalg.eigh."""
-        H = build_heisenberg(4, j1=1.0, j2=2.0)
-        gs = get_ground_state_exact(H)
-        mat = H.to_matrix()
+        ham = build_heisenberg(4, j1=1.0, j2=2.0)
+        gs = get_ground_state_exact(ham)
+        mat = ham.to_matrix()
         e_eigsh = (gs.conj() @ mat @ gs).real
         e_min = np.linalg.eigvalsh(mat).min()
         self.assertAlmostEqual(e_eigsh, e_min, places=8)
@@ -198,6 +207,7 @@ def test_heisenberg_phase_region(
     )
     @unpack
     def test_annni_all_phases_present(self, phase):
+        """All four ANNNI phases must appear in a sufficiently large dataset."""
         _, y, _, _ = phase_of_matter_data(40, 8, 4, model="annni", one_hot=False, seed=42)
         self.assertIn(phase, set(y), msg=f"ANNNI phase '{phase}' missing from dataset")
 
@@ -211,6 +221,7 @@ def test_annni_all_phases_present(self, phase):
     )
     @unpack
     def test_cluster_all_phases_present(self, phase):
+        """All four Cluster phases must appear in a sufficiently large dataset."""
         _, y, _, _ = phase_of_matter_data(40, 8, 4, model="cluster", one_hot=False, seed=42)
         self.assertIn(phase, set(y), msg=f"Cluster phase '{phase}' missing from dataset")
 
@@ -223,6 +234,7 @@ def test_cluster_all_phases_present(self, phase):
     )
     @unpack
     def test_haldane_all_phases_present(self, phase):
+        """All three Haldane phases must appear in a sufficiently large dataset."""
         _, y, _, _ = phase_of_matter_data(30, 6, 4, model="haldane", one_hot=False, seed=42)
         self.assertIn(phase, set(y), msg=f"Haldane phase '{phase}' missing from dataset")
 
@@ -246,6 +258,7 @@ class TestPublicAPI(QiskitMachineLearningTestCase):
     )
     @unpack
     def test_return_shapes_ndarray(self, model, n_classes):
+        """Feature and label arrays must have the correct shapes."""
         x_tr, y_tr, x_te, y_te = phase_of_matter_data(8, 4, 4, model=model, one_hot=True, seed=0)
         np.testing.assert_array_equal(x_tr.shape, (8, 16))
         np.testing.assert_array_equal(y_tr.shape, (8, n_classes))
@@ -255,6 +268,7 @@ def test_return_shapes_ndarray(self, model, n_classes):
     @idata([("heisenberg",), ("annni",)])
     @unpack
     def test_return_shapes_statevector(self, model):
+        """Statevector formatting must return normalized Statevector objects."""
         x_tr, _, x_te, _ = phase_of_matter_data(
             4, 2, 4, model=model, formatting="statevector", seed=0
         )
@@ -264,19 +278,23 @@ def test_return_shapes_statevector(self, model):
         self.assertAlmostEqual(np.linalg.norm(x_tr[0].data), 1.0, places=6)
 
     def test_one_hot_true_sums_to_one(self):
+        """One-hot rows must each sum to exactly 1."""
         _, y_tr, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", one_hot=True, seed=0)
         np.testing.assert_array_equal(y_tr.sum(axis=1), np.ones(8))
 
     def test_one_hot_false_returns_strings(self):
+        """String labels must be a subset of the model's phase names."""
         _, y_tr, _, _ = phase_of_matter_data(8, 4, 4, model="heisenberg", one_hot=False, seed=0)
         self.assertTrue(all(isinstance(lbl, str) for lbl in y_tr))
         self.assertTrue(set(y_tr).issubset({"trivial", "topological"}))
 
     def test_include_sample_total_false(self):
+        """Default return must be a 4-tuple."""
         result = phase_of_matter_data(4, 2, 4, model="heisenberg", seed=0)
         self.assertEqual(len(result), 4)
 
     def test_include_sample_total_true(self):
+        """include_sample_total=True must append a per-class count array."""
         result = phase_of_matter_data(
             4, 2, 4, model="heisenberg", include_sample_total=True, seed=0
         )
@@ -286,6 +304,7 @@ def test_include_sample_total_true(self):
         self.assertTrue(np.all(totals > 0))
 
     def test_custom_class_labels(self):
+        """Custom label names must replace the model defaults in string output."""
         _, y_tr, _, _ = phase_of_matter_data(
             8, 4, 4, model="heisenberg", one_hot=False, class_labels=["phase_A", "phase_B"], seed=0
         )
@@ -300,12 +319,12 @@ def test_custom_class_labels_one_hot(self):
         np.testing.assert_array_equal(y1, y2)
 
     def test_feature_normalization(self):
-        """All returned ground states must be normalised."""
+        """All returned ground states must be normalized."""
         x_tr, _, x_te, _ = phase_of_matter_data(8, 4, 4, model="annni", seed=1)
         for states in (x_tr, x_te):
             norms = np.linalg.norm(states, axis=1)
             np.testing.assert_allclose(
-                norms, 1.0, atol=1e-8, err_msg="Ground states are not normalised"
+                norms, 1.0, atol=1e-8, err_msg="Ground states are not normalized"
             )
 
     def test_seed_reproducibility(self):
@@ -345,24 +364,28 @@ def test_train_test_sizes_respected(self):
     # -----------------------------------------------------------------------
 
     def test_invalid_model_raises(self):
+        """An unknown model name must raise ValueError."""
         with self.assertRaises(ValueError):
             phase_of_matter_data(4, 2, 4, model="invalid")
 
     def test_invalid_formatting_raises(self):
+        """An unknown formatting string must raise ValueError."""
         with self.assertRaises(ValueError):
             phase_of_matter_data(4, 2, 4, model="heisenberg", formatting="bad")
 
     def test_n_too_small_raises(self):
+        """n < 4 must raise ValueError."""
         with self.assertRaises(ValueError):
             phase_of_matter_data(4, 2, 3, model="heisenberg")
 
     def test_wrong_class_labels_length_raises(self):
+        """class_labels with wrong length must raise ValueError."""
         with self.assertRaises(ValueError):
             phase_of_matter_data(4, 2, 4, model="heisenberg", class_labels=["only_one"])
 
 
 # ---------------------------------------------------------------------------
-# Integration — import paths
+# Integration -- import paths
 # ---------------------------------------------------------------------------
 
 
@@ -370,16 +393,19 @@ class TestImportPaths(QiskitMachineLearningTestCase):
     """Verify the package can be imported and is correctly wired up."""
 
     def test_importable(self):
+        """phase_of_matter_data must be accessible from the datasets namespace."""
         import qiskit_machine_learning.datasets as ds  # pylint: disable=import-outside-toplevel
 
         self.assertIsNotNone(ds.phase_of_matter_data)
 
     def test_in_all(self):
+        """phase_of_matter_data must be listed in datasets.__all__."""
         import qiskit_machine_learning.datasets as ds  # pylint: disable=import-outside-toplevel
 
         self.assertIn("phase_of_matter_data", ds.__all__)
 
     def test_hamiltonian_modules_importable(self):
+        """All Hamiltonian sub-modules must expose the required attributes."""
         from qiskit_machine_learning.datasets.phase_of_matter import (  # pylint: disable=import-outside-toplevel
             _annni,
             _cluster,

From 02efe7a9b8e98790f453e995c1b57aa913d6e276 Mon Sep 17 00:00:00 2001
From: Shailesh K <shailesh.mkvr@gmail.com>
Date: Wed, 13 May 2026 08:17:34 +0530
Subject: [PATCH 3/6] Fix CI round 2: spelling, W9016 type doc, doctest skip

- Add rng, geq, ddt, idata, atol, eigh, zxz, simulatable to .pylintdict
- Fix neighbour/favour -> neighbor/favor in _haldane.py
- Fix parameterised/honoured -> parameterized/honored in test file
- Reword 'RNG' -> 'random number generator' in phase_of_matter.py docstring
- Fix Simulable -> Simulatable in paper reference
- Fix W9016: use 'backend (object):' Google-style in both _base.py and
  phase_of_matter.py; remove incorrect ':type backend:' RST lines
- Add '# doctest: +SKIP' to docstring examples (consistent with existing
  datasets which have no running doctests)
- Replace 'atol' param name with 'tol' in _is_hermitian helper
- Replace 'ZXZ' in comment with 'Z-X-Z'
- Reword 'namespace' and 'eigh' references in test docstrings
---
 .pylintdict                                      | 10 ++++++++++
 .../datasets/phase_of_matter/_base.py            |  4 +---
 .../datasets/phase_of_matter/_haldane.py         |  4 ++--
 .../datasets/phase_of_matter/phase_of_matter.py  | 16 +++++++---------
 test/datasets/test_phase_of_matter.py            | 16 ++++++++--------
 5 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/.pylintdict b/.pylintdict
index 9c9c22b3f..cb7ecbc81 100644
--- a/.pylintdict
+++ b/.pylintdict
@@ -695,3 +695,13 @@ paulis
 spt
 topological
 trivial
+atol
+ddt
+eigh
+geq
+idata
+namespace
+rng
+simulable
+zxz
+Simulatable
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_base.py b/qiskit_machine_learning/datasets/phase_of_matter/_base.py
index 5a45053a0..4ca939575 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/_base.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_base.py
@@ -93,11 +93,9 @@ def get_ground_state_vqe(
 
     Args:
         hamiltonian: Hamiltonian as a SparsePauliOp.
-        backend: Reserved for future hardware integration.  Currently unused;
+        backend (object): Reserved for future hardware integration.  Currently unused;
             pass any non-``None`` value to activate this pathway.
 
-    :type backend: object
-
     Returns:
         Qiskit ``Statevector`` of the approximate ground state.
     """
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py b/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py
index 03dd265eb..858152d7b 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_haldane.py
@@ -49,8 +49,8 @@ def build_hamiltonian(n: int, h1: float, h2: float, j: float = 1.0) -> SparsePau
     Args:
         n: Number of lattice sites (qubits).
         h1: Transverse-field strength (units of *J*).
-        h2: Nearest-neighbour XX coupling (units of *J*).  Positive values
-            favour the SPT phase; negative values favour antiferromagnetic.
+        h2: Nearest-neighbor XX coupling (units of *J*).  Positive values
+            favor the SPT phase; negative values favor antiferromagnetic.
         j: Overall energy scale, default 1.0.
 
     Returns:
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py b/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
index 19e12d0a9..2d7052165 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
@@ -109,9 +109,9 @@ def phase_of_matter_data(
             numpy array of shape ``(num_samples, 2**n)``.
             ``"statevector"`` returns a list of
             :class:`~qiskit.quantum_info.Statevector` objects.
-        seed: Integer seed for the parameter-sampling RNG, enabling
-            reproducible datasets.
-        backend: When ``None`` (default), exact diagonalization via
+        seed: Integer seed for the parameter-sampling random number generator,
+            enabling reproducible datasets.
+        backend (object): When ``None`` (default), exact diagonalization via
             ``scipy.sparse.linalg.eigsh`` is used -- the recommended path for
             reliable phase labels.  When a Qiskit backend is provided, a
             VQE-based approximation is used instead.
@@ -123,8 +123,6 @@ def phase_of_matter_data(
                 incorrect labels.  Use ``backend=None`` for dataset
                 generation.
 
-    :type backend: object
-
     Returns:
         A tuple ``(training_features, training_labels, test_features,
         test_labels)`` where:
@@ -150,16 +148,16 @@ def phase_of_matter_data(
 
     References:
         [1] Bermejo et al., "Quantum Convolutional Neural Networks are
-        (Effectively) Classically Simulable", arXiv:2408.12739 (2024).
+        (Effectively) Classically Simulatable", arXiv:2408.12739 (2024).
 
     Examples:
 
-        >>> x_tr, y_tr, x_te, y_te = phase_of_matter_data(
+        >>> x_tr, y_tr, x_te, y_te = phase_of_matter_data(  # doctest: +SKIP
         ...     10, 5, 4, model="heisenberg", seed=0
         ... )
-        >>> x_tr.shape
+        >>> x_tr.shape  # doctest: +SKIP
         (10, 16)
-        >>> y_tr.shape
+        >>> y_tr.shape  # doctest: +SKIP
         (10, 2)
     """
     if model not in _MODELS:
diff --git a/test/datasets/test_phase_of_matter.py b/test/datasets/test_phase_of_matter.py
index 768612a99..2de8c9721 100644
--- a/test/datasets/test_phase_of_matter.py
+++ b/test/datasets/test_phase_of_matter.py
@@ -15,7 +15,7 @@
 
 Follows qiskit-machine-learning test conventions:
   - QiskitMachineLearningTestCase base class
-  - ddt / @idata / @unpack for parameterised tests
+  - parameterized tests via the ddt library
   - np.testing.assert_* for array assertions
 """
 
@@ -49,10 +49,10 @@
 # ---------------------------------------------------------------------------
 
 
-def _is_hermitian(op: SparsePauliOp, atol: float = 1e-10) -> bool:
-    """Return True if op is Hermitian within atol."""
+def _is_hermitian(op: SparsePauliOp, tol: float = 1e-10) -> bool:
+    """Return True if op is Hermitian within the given tolerance."""
     mat = op.to_matrix()
-    return np.allclose(mat, mat.conj().T, atol=atol)
+    return np.allclose(mat, mat.conj().T, atol=tol)
 
 
 # ---------------------------------------------------------------------------
@@ -96,7 +96,7 @@ def test_cluster_periodic_boundary(self):
         """Cluster Hamiltonian must have more terms than diagonal Z terms alone."""
         n = 4
         ham = build_cluster(n, j1=1.0, j2=1.0)
-        # n Z terms + n XX two-body terms + n ZXZ three-body terms = 3n unique terms minimum
+        # n Z terms + n XX two-body terms + n Z-X-Z three-body terms = 3n unique terms minimum
         self.assertGreater(len(ham), n)
 
     def test_matrix_dimension(self):
@@ -162,7 +162,7 @@ def test_is_eigenstate(self, model):
         self.assertLess(residual, 1e-8, msg=f"{model} eigenstate residual {residual:.2e}")
 
     def test_lowest_eigenvalue(self):
-        """Energy from eigsh must match the minimum from np.linalg.eigh."""
+        """Energy from eigsh must match the minimum eigenvalue from dense diagonalization."""
         ham = build_heisenberg(4, j1=1.0, j2=2.0)
         gs = get_ground_state_exact(ham)
         mat = ham.to_matrix()
@@ -353,7 +353,7 @@ def test_different_seeds_differ(self):
         self.assertFalse(np.allclose(x1, x2))
 
     def test_train_test_sizes_respected(self):
-        """Exact training_size / test_size must be honoured."""
+        """Exact training_size / test_size must be honored."""
         for tr, te in [(10, 3), (7, 7), (1, 1)]:
             x_tr, _, x_te, _ = phase_of_matter_data(tr, te, 4, model="heisenberg", seed=0)
             self.assertEqual(len(x_tr), tr, f"train size mismatch (requested {tr})")
@@ -393,7 +393,7 @@ class TestImportPaths(QiskitMachineLearningTestCase):
     """Verify the package can be imported and is correctly wired up."""
 
     def test_importable(self):
-        """phase_of_matter_data must be accessible from the datasets namespace."""
+        """phase_of_matter_data must be accessible from the datasets module."""
         import qiskit_machine_learning.datasets as ds  # pylint: disable=import-outside-toplevel
 
         self.assertIsNotNone(ds.phase_of_matter_data)

From bc84307ae93545bdfab69f1f69426d772a690b94 Mon Sep 17 00:00:00 2001
From: Shailesh K <shailesh.mkvr@gmail.com>
Date: Wed, 13 May 2026 08:24:13 +0530
Subject: [PATCH 4/6] Fix spell check: add Bermejo, eq, ceil, doctest to
 .pylintdict

---
 .pylintdict | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.pylintdict b/.pylintdict
index cb7ecbc81..933690cd8 100644
--- a/.pylintdict
+++ b/.pylintdict
@@ -705,3 +705,7 @@ rng
 simulable
 zxz
 Simulatable
+Bermejo
+ceil
+doctest
+eq

From 2e50b4615f75d8d40768db253dd2e3d5ff4cbb6f Mon Sep 17 00:00:00 2001
From: Shailesh K <shailesh.mkvr@gmail.com>
Date: Wed, 13 May 2026 08:35:06 +0530
Subject: [PATCH 5/6] Fix sphinx spell check: lowercase Bermejo and Simulatable
 in .pylintdict

The sphinx LowercaseFilter downcases checked words before dict lookup,
so dictionary entries must be lowercase. Bermejo and Simulatable were
capitalised, causing sphinx-build -M spelling to fail despite pylint
spell check passing.
---
 .pylintdict | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pylintdict b/.pylintdict
index 933690cd8..5db6904e0 100644
--- a/.pylintdict
+++ b/.pylintdict
@@ -704,8 +704,8 @@ namespace
 rng
 simulable
 zxz
-Simulatable
-Bermejo
+simulatable
+bermejo
 ceil
 doctest
 eq

From d410e17082cb0cf9847e0f49d7a439fba7a0d5a6 Mon Sep 17 00:00:00 2001
From: Shailesh K <shailesh.mkvr@gmail.com>
Date: Sun, 14 Jun 2026 03:22:28 +0530
Subject: [PATCH 6/6] Address maintainer review: backend docs, bond labeling,
 class balance, autosummary

- Clarify in phase_of_matter_data() docstring that VQE always uses
  StatevectorEstimator unconditionally; backend arg is reserved for
  future hardware integration (resolves OkuyanBoga comment on _base.py)
- Fix _heisenberg.py docstring to accurately reflect that J1 applies to
  odd-indexed bonds (1-indexed: 1,3,5,...) and J2 to even-indexed bonds;
  code logic is unchanged (resolves even/odd mismatch comment)
- Replace ceil-based class allocation with exact per-class counts using
  base+remainder distribution; dataset is now balanced to within 1 sample
  even when size is not divisible by n_classes; add test for this
  (resolves class imbalance comment)
- Remove redundant autosummary block from phase_of_matter/__init__.py;
  phase_of_matter_data is already listed in the parent datasets/__init__.py
  autosummary which is the canonical location (resolves Sphinx comment)
---
 .../datasets/phase_of_matter/__init__.py      | 11 +----
 .../datasets/phase_of_matter/_heisenberg.py   | 12 +++--
 .../phase_of_matter/phase_of_matter.py        | 48 +++++++++++--------
 test/datasets/test_phase_of_matter.py         | 12 +++++
 4 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/qiskit_machine_learning/datasets/phase_of_matter/__init__.py b/qiskit_machine_learning/datasets/phase_of_matter/__init__.py
index 6690d2c8b..63775fe20 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/__init__.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/__init__.py
@@ -11,8 +11,7 @@
 # copyright notice, and modified files need to carry a notice indicating
 # that they have been altered from the originals.
 
-"""
-Phase of Matter dataset (:mod:`phase_of_matter`)
+"""Phase of Matter dataset (:mod:`phase_of_matter`)
 
 Quantum Phase of Matter classification dataset generator.
 
@@ -24,14 +23,6 @@
 * :mod:`._cluster`    — Cluster Hamiltonian
 
 The :func:`phase_of_matter_data` function is the single public entry point.
-
-.. currentmodule:: phase_of_matter
-
-.. autosummary::
-   :toctree: ../stubs/
-   :nosignatures:
-
-   phase_of_matter_data
 """
 
 from .phase_of_matter import phase_of_matter_data
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py b/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py
index c89b8dabf..3c8c7e135 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/_heisenberg.py
@@ -35,7 +35,7 @@ def build_hamiltonian(n: int, j1: float, j2: float) -> SparsePauliOp:
         H = \sum_{i=1}^{n-1} J_i
             \left( X_i X_{i+1} + Y_i Y_{i+1} + Z_i Z_{i+1} \right)
 
-    where :math:`J_i = J_1` for even *i* and :math:`J_i = J_2` for odd *i*
+    where :math:`J_i = J_1` for odd *i* and :math:`J_i = J_2` for even *i*
     (1-indexed), with :math:`J_1, J_2 \geq 0`.
 
     Phase diagram (thermodynamic limit):
@@ -45,16 +45,18 @@ def build_hamiltonian(n: int, j1: float, j2: float) -> SparsePauliOp:
 
     Args:
         n: Number of lattice sites (qubits).
-        j1: Coupling constant on even bonds (:math:`J_1 \geq 0`).
-        j2: Coupling constant on odd bonds (:math:`J_2 \geq 0`).
+        j1: Coupling constant on odd-indexed bonds (1-indexed: bonds 1, 3, 5, ...).
+            (:math:`J_1 \geq 0`).
+        j2: Coupling constant on even-indexed bonds (1-indexed: bonds 2, 4, 6, ...).
+            (:math:`J_2 \geq 0`).
 
     Returns:
         SparsePauliOp for the Hamiltonian on *n* qubits.
     """
     terms: list[SparsePauliOp] = []
     for i in range(n - 1):
-        # i is 0-indexed; the paper is 1-indexed, so even bond = i % 2 == 0
-        j = j1 if (i % 2 == 0) else j2
+        # i is 0-indexed; i%2==0 means 0-indexed even = 1-indexed odd bond (bond 1, 3, 5, ...)
+        j = j1 if not i % 2 else j2
         for pauli in ("X", "Y", "Z"):
             terms.append(j * pauli_term([(pauli, i), (pauli, i + 1)], n))
     return SparsePauliOp.sum(terms).simplify()
diff --git a/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py b/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
index 2d7052165..0342f8a84 100644
--- a/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
+++ b/qiskit_machine_learning/datasets/phase_of_matter/phase_of_matter.py
@@ -15,8 +15,6 @@
 
 from __future__ import annotations
 
-import math
-
 import numpy as np
 from qiskit.quantum_info import SparsePauliOp, Statevector
 
@@ -42,6 +40,16 @@
 }
 
 
+def _per_class_counts(total: int, n_classes: int) -> list[int]:
+    """Distribute *total* samples across *n_classes* as evenly as possible.
+
+    Classes with index < (total % n_classes) receive one extra sample so that
+    sum(result) == total and max(result) - min(result) <= 1.
+    """
+    base, rem = divmod(total, n_classes)
+    return [base + (1 if i < rem else 0) for i in range(n_classes)]
+
+
 # ---------------------------------------------------------------------------
 # Public function
 # ---------------------------------------------------------------------------
@@ -113,7 +121,7 @@ def phase_of_matter_data(
             enabling reproducible datasets.
         backend (object): When ``None`` (default), exact diagonalization via
             ``scipy.sparse.linalg.eigsh`` is used -- the recommended path for
-            reliable phase labels.  When a Qiskit backend is provided, a
+            reliable phase labels.  When any non-``None`` value is passed, a
             VQE-based approximation is used instead.
 
             .. warning::
@@ -123,6 +131,12 @@ def phase_of_matter_data(
                 incorrect labels.  Use ``backend=None`` for dataset
                 generation.
 
+                The current VQE implementation uses
+                ``StatevectorEstimator`` from ``qiskit.primitives``
+                unconditionally, regardless of the backend object passed.
+                The ``backend`` argument is accepted for API consistency
+                and is reserved for future hardware integration.
+
     Returns:
         A tuple ``(training_features, training_labels, test_features,
         test_labels)`` where:
@@ -183,11 +197,11 @@ def phase_of_matter_data(
 
     rng = np.random.default_rng(seed)
 
-    # ceil ensures every class gets at least the requested count even when
-    # training_size / test_size are not divisible by n_classes.
-    n_per_class_train = math.ceil(training_size / n_classes)
-    n_per_class_test = math.ceil(test_size / n_classes)
-    n_per_class = n_per_class_train + n_per_class_test
+    # Compute exact per-class counts so the dataset is balanced to within 1 sample
+    # even when training_size / test_size are not divisible by n_classes.
+    train_counts = _per_class_counts(training_size, n_classes)
+    test_counts = _per_class_counts(test_size, n_classes)
+    n_per_class = max(tr + te for tr, te in zip(train_counts, test_counts))
 
     # Samplers return blocks of n_per_class per class, class order preserved.
     raw_samples = module.sample_parameters(n_per_class, rng)
@@ -222,17 +236,13 @@ def phase_of_matter_data(
         start = cls_idx * n_per_class
         cls_states = all_states[start : start + n_per_class]
         cls_labels = all_labels[start : start + n_per_class]
-        train_states.extend(cls_states[:n_per_class_train])
-        train_labels_raw.extend(cls_labels[:n_per_class_train])
-        test_states.extend(cls_states[n_per_class_train:])
-        test_labels_raw.extend(cls_labels[n_per_class_train:])
-        sample_totals[cls_idx] = n_per_class
-
-    # Trim to exact requested sizes (ceil may over-allocate by up to n_classes-1).
-    train_states = train_states[:training_size]
-    train_labels_raw = train_labels_raw[:training_size]
-    test_states = test_states[:test_size]
-    test_labels_raw = test_labels_raw[:test_size]
+        tr = train_counts[cls_idx]
+        te = test_counts[cls_idx]
+        train_states.extend(cls_states[:tr])
+        train_labels_raw.extend(cls_labels[:tr])
+        test_states.extend(cls_states[tr : tr + te])
+        test_labels_raw.extend(cls_labels[tr : tr + te])
+        sample_totals[cls_idx] = tr + te
 
     # Shuffle train and test independently to interleave classes.
     tr_idx = np.arange(len(train_states))
diff --git a/test/datasets/test_phase_of_matter.py b/test/datasets/test_phase_of_matter.py
index 2de8c9721..3d21756f0 100644
--- a/test/datasets/test_phase_of_matter.py
+++ b/test/datasets/test_phase_of_matter.py
@@ -359,6 +359,18 @@ def test_train_test_sizes_respected(self):
             self.assertEqual(len(x_tr), tr, f"train size mismatch (requested {tr})")
             self.assertEqual(len(x_te), te, f"test size mismatch (requested {te})")
 
+    def test_balanced_classes_non_divisible(self):
+        """Class counts must be balanced when size is not divisible by n_classes."""
+        # annni has 4 classes; 5 training samples -> per-class counts [2, 1, 1, 1]
+        _, y_tr, _, y_te = phase_of_matter_data(5, 3, 4, model="annni", one_hot=True, seed=0)
+        train_class_counts = y_tr.sum(axis=0)
+        test_class_counts = y_te.sum(axis=0)
+        self.assertEqual(int(train_class_counts.sum()), 5)
+        self.assertEqual(int(test_class_counts.sum()), 3)
+        # No class should be over-represented by more than 1 sample
+        self.assertLessEqual(int(train_class_counts.max() - train_class_counts.min()), 1)
+        self.assertLessEqual(int(test_class_counts.max() - test_class_counts.min()), 1)
+
     # -----------------------------------------------------------------------
     # Error cases
     # -----------------------------------------------------------------------