Add files via upload

ss0832 · web-flow · commit 4480f4f11b79 · 2026-03-08T17:15:01.000+09:00
diff --git a/multioptpy/Wrapper/mapper.py b/multioptpy/Wrapper/mapper.py
@@ -10,6 +10,7 @@
 
 import bisect
 import glob
+import heapq
 import json
 import logging
 import os
@@ -92,8 +93,8 @@ def parse_xyz(filepath: str) -> tuple[list[str], np.ndarray]:
     return symbols, np.array(coords_raw, dtype=float)
 
 def distance_matrix(coords: np.ndarray) -> np.ndarray:
-    diff = coords[:, np.newaxis, :] - coords[np.newaxis, :, :]
-    return np.sqrt((diff ** 2).sum(axis=-1))
+    # cdist avoids the (N,N,3) intermediate array produced by manual broadcasting.
+    return cdist(coords, coords)
 
 
 # ===========================================================================
@@ -110,6 +111,15 @@ class StructureChecker:
     # Relative tolerance for declaring two eigenvalues degenerate.
     _DEGENERACY_REL_TOL: float = 0.02
 
+    # The 4 proper rotations (det=+1) from PCA sign-flip ambiguity.
+    # Built once at class definition time instead of on every call.
+    _SIGN_FLIP_MATS: tuple[np.ndarray, ...] = (
+        np.diag([ 1.0,  1.0,  1.0]),
+        np.diag([-1.0, -1.0,  1.0]),
+        np.diag([-1.0,  1.0, -1.0]),
+        np.diag([ 1.0, -1.0, -1.0]),
+    )
+
     def __init__(self, rmsd_threshold: float = 0.30) -> None:
         self.rmsd_threshold = rmsd_threshold
 
@@ -194,16 +204,34 @@ def _try_candidates(
         sym_a: list[str], ca: np.ndarray,
         sym_b: list[str], cb: np.ndarray,
     ) -> float:
-        """Evaluate every rotation candidate and return the minimum RMSD found."""
+        """Evaluate every rotation candidate and return the minimum RMSD found.
+
+        Element-to-index groupings are precomputed once before the rotation
+        loop so that the O(N) list comprehensions inside ``_optimal_mapping``
+        are not repeated for every candidate.
+        """
+        # --- Precompute element groups once (not per rotation) ---
+        elem_groups_a: dict[str, np.ndarray] = {}
+        for i, s in enumerate(sym_a):
+            elem_groups_a.setdefault(s, []).append(i)  # type: ignore[arg-type]
+        groups_a = {e: np.array(v, dtype=np.intp) for e, v in elem_groups_a.items()}
+
+        elem_groups_b: dict[str, np.ndarray] = {}
+        for i, s in enumerate(sym_b):
+            elem_groups_b.setdefault(s, []).append(i)  # type: ignore[arg-type]
+        groups_b = {e: np.array(v, dtype=np.intp) for e, v in elem_groups_b.items()}
+
         min_rmsd = float("inf")
         for R in candidates:
             cb_rot = cb @ R.T
-            perm = self._optimal_mapping(sym_a, ca, sym_b, cb_rot)
+            perm = self._optimal_mapping_fast(ca, cb_rot, groups_a, groups_b)
             if perm is None:
                 continue
             rmsd = self._kabsch_rmsd(ca, cb_rot[perm])
             if rmsd < min_rmsd:
                 min_rmsd = rmsd
+                if min_rmsd < self.rmsd_threshold:
+                    return min_rmsd  # Early exit: threshold already met
         return min_rmsd
 
     # ------------------------------------------------------------------ #
@@ -246,19 +274,14 @@ def _pca_align(coords: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
     #  Rotation candidates                                                 #
     # ------------------------------------------------------------------ #
 
-    @staticmethod
-    def _sign_flip_candidates() -> list[np.ndarray]:
+    @classmethod
+    def _sign_flip_candidates(cls) -> list[np.ndarray]:
         """
         The 4 proper rotations (det = +1) arising from sign-flip ambiguity
         of PCA eigenvectors.  Always necessary; sufficient when no
         eigenvalue degeneracy is present.
         """
-        return [
-            np.diag([ 1.0,  1.0,  1.0]),
-            np.diag([-1.0, -1.0,  1.0]),
-            np.diag([-1.0,  1.0, -1.0]),
-            np.diag([ 1.0, -1.0, -1.0]),
-        ]
+        return list(cls._SIGN_FLIP_MATS)
 
     @classmethod
     def _build_planar_candidates(
@@ -323,25 +346,54 @@ def _so3_grid(n: int) -> list[np.ndarray]:
         so that grid points are roughly uniformly distributed on S².
 
         Total: n³ rotation matrices (512 for n = 8).
-        """
-        rotations: list[np.ndarray] = []
-        for i in range(n):
-            alpha = 2 * np.pi * i / n
-            ca, sa = np.cos(alpha), np.sin(alpha)
-            Rz_alpha = np.array([[ca, -sa, 0.0], [sa, ca, 0.0], [0.0, 0.0, 1.0]])
-
-            for j in range(n):
-                beta = np.arccos(np.clip(1.0 - 2.0 * (j + 0.5) / n, -1.0, 1.0))
-                cb, sb = np.cos(beta), np.sin(beta)
-                Ry_beta = np.array([[cb, 0.0, sb], [0.0, 1.0, 0.0], [-sb, 0.0, cb]])
 
-                for k in range(n):
-                    gamma = 2 * np.pi * k / n
-                    cg, sg = np.cos(gamma), np.sin(gamma)
-                    Rz_gamma = np.array([[cg, -sg, 0.0], [sg, cg, 0.0], [0.0, 0.0, 1.0]])
-                    rotations.append(Rz_alpha @ Ry_beta @ Rz_gamma)
-
-        return rotations
+        Vectorised: all n³ matrix products are computed with batched numpy
+        operations instead of a triple Python for-loop.
+        """
+        # --- Angle arrays ---
+        k = np.arange(n)
+        alphas = 2.0 * np.pi * k / n                                          # (n,)
+        betas  = np.arccos(np.clip(1.0 - 2.0 * (k + 0.5) / n, -1.0, 1.0))   # (n,)
+        gammas = 2.0 * np.pi * k / n                                          # (n,)
+
+        # --- Component trig values ---
+        ca, sa = np.cos(alphas), np.sin(alphas)   # (n,)
+        cb, sb = np.cos(betas),  np.sin(betas)    # (n,)
+        cg, sg = np.cos(gammas), np.sin(gammas)   # (n,)
+
+        # --- Batched Rz(alpha): shape (n, 3, 3) ---
+        zero = np.zeros(n)
+        one  = np.ones(n)
+        Rz_a = np.stack([
+            np.stack([ ca, -sa, zero], axis=-1),
+            np.stack([ sa,  ca, zero], axis=-1),
+            np.stack([zero, zero, one], axis=-1),
+        ], axis=1)  # (n, 3, 3)
+
+        # --- Batched Ry(beta): shape (n, 3, 3) ---
+        Ry_b = np.stack([
+            np.stack([ cb, zero,  sb], axis=-1),
+            np.stack([zero,  one, zero], axis=-1),
+            np.stack([-sb, zero,  cb], axis=-1),
+        ], axis=1)  # (n, 3, 3)
+
+        # --- Batched Rz(gamma): shape (n, 3, 3) ---
+        Rz_g = np.stack([
+            np.stack([ cg, -sg, zero], axis=-1),
+            np.stack([ sg,  cg, zero], axis=-1),
+            np.stack([zero, zero, one], axis=-1),
+        ], axis=1)  # (n, 3, 3)
+
+        # --- ZYZ product over all (n, n, n) combinations ---
+        # Rz_a[:, None, None] @ Ry_b[None, :, None] @ Rz_g[None, None, :]
+        # Broadcasting shapes: (n,1,1,3,3) @ (1,n,1,3,3) @ (1,1,n,3,3)
+        Rza = Rz_a[:, None, None]     # (n, 1, 1, 3, 3)
+        Ryb = Ry_b[None, :, None]     # (1, n, 1, 3, 3)
+        Rzg = Rz_g[None, None, :]     # (1, 1, n, 3, 3)
+        R_all = Rza @ Ryb @ Rzg       # (n, n, n, 3, 3)
+
+        # Flatten to list of (3,3) matrices.
+        return list(R_all.reshape(-1, 3, 3))
 
     @staticmethod
     def _Rx(t: float) -> np.ndarray:
@@ -357,13 +409,36 @@ def _Rz(t: float) -> np.ndarray:
     #  Atom mapping (Hungarian algorithm)                                  #
     # ------------------------------------------------------------------ #
 
+    @staticmethod
+    def _optimal_mapping_fast(
+        coords_a: np.ndarray,
+        coords_b: np.ndarray,
+        groups_a: dict[str, np.ndarray],
+        groups_b: dict[str, np.ndarray],
+    ) -> list[int] | None:
+        """Find the atom permutation of B minimising total squared distance to A.
+
+        Accepts precomputed element-to-index arrays (``groups_a``, ``groups_b``)
+        so that the grouping step is not repeated for every rotation candidate.
+        Returns ``None`` if stoichiometry is inconsistent.
+        """
+        perm: list[int | None] = [None] * sum(len(v) for v in groups_a.values())
+        for elem, idx_a in groups_a.items():
+            idx_b = groups_b.get(elem)
+            if idx_b is None or len(idx_a) != len(idx_b):
+                return None
+            cost = cdist(coords_a[idx_a], coords_b[idx_b], metric="sqeuclidean")
+            row_ind, col_ind = linear_sum_assignment(cost)
+            for r, c in zip(row_ind, col_ind):
+                perm[idx_a[r]] = idx_b[c]
+        return None if None in perm else perm  # type: ignore[return-value]
+
     @staticmethod
     def _optimal_mapping(
         sym_a: list[str], coords_a: np.ndarray,
         sym_b: list[str], coords_b: np.ndarray,
     ) -> list[int] | None:
-        """
-        Find the permutation of B's atoms that minimises the total
+        """Find the permutation of B's atoms that minimises the total
         squared distance to A, solved independently per element.
         Returns None if stoichiometry is inconsistent.
         """
@@ -443,17 +518,41 @@ def fingerprint(
         Each key is a ``(elem_a, elem_b)`` tuple with elements in sorted
         order (so C–H and H–C map to the same key).  The value is the
         number of such bonds.
+
+        Radii are precomputed per unique element so ``_bond_threshold`` is
+        called at most once per element instead of once per atom-pair.
+        Distances are computed with ``cdist`` to avoid a Python-level O(N²)
+        loop.
         """
         n = len(symbols)
-        dmat = distance_matrix(coords)
-        counts: dict[tuple[str, str], int] = {}
+        # Precompute covalent radius for each unique element.
+        unique_elems = set(symbols)
+        elem_radius: dict[str, float] = {}
+        for elem in unique_elems:
+            if covalent_radii_lib is not None:
+                try:
+                    elem_radius[elem] = covalent_radii_lib(elem) * _BOHR2ANG
+                except KeyError:
+                    elem_radius[elem] = 0.75  # generic fallback [Å]
+            else:
+                elem_radius[elem] = 0.75
+
+        radii_arr = np.array([elem_radius[s] for s in symbols], dtype=np.float64)
+
+        # Pairwise distances — vectorised via cdist.
+        dmat = cdist(coords, coords)
+        ii, jj = np.triu_indices(n, k=1)
+        dists = dmat[ii, jj]
 
-        for i in range(n):
-            for j in range(i + 1, n):
-                threshold = self._bond_threshold(symbols[i], symbols[j])
-                if dmat[i, j] <= threshold:
-                    key = (min(symbols[i], symbols[j]), max(symbols[i], symbols[j]))
-                    counts[key] = counts.get(key, 0) + 1
+        # Per-pair bonding threshold.
+        thresholds = self.covalent_margin * (radii_arr[ii] + radii_arr[jj])
+        bonded_idx = np.where(dists <= thresholds)[0]
+
+        counts: dict[tuple[str, str], int] = {}
+        for k in bonded_idx:
+            si, sj = symbols[ii[k]], symbols[jj[k]]
+            key = (si, sj) if si <= sj else (sj, si)
+            counts[key] = counts.get(key, 0) + 1
 
         return counts
 
@@ -531,11 +630,15 @@ def compute_priority(self, task):
     """
 
     def __init__(self, rng_seed: int = 42) -> None:
+        # _tasks: canonical list of ExplorationTask objects.
+        # Kept as a real list so that subclasses (e.g. RCMCQueue) can call
+        # .sort() and .pop(0) on it directly without breaking.
         self._tasks: list[ExplorationTask] = []
-        # Parallel list of negated priorities kept in ascending order so that
-        # bisect can locate the insertion point in O(log n) without an O(n)
-        # list comprehension on every push().
-        self._neg_priorities: list[float] = []
+        # _heap: parallel min-heap of (-priority, counter, task) used by the
+        # base-class push()/pop() for O(log n) insertion and extraction.
+        # RCMCQueue overrides pop() entirely and never touches _heap.
+        self._heap: list[tuple[float, int, ExplorationTask]] = []
+        self._push_counter: int = 0
         self._submitted: set[tuple] = set()
         self._rng = np.random.default_rng(rng_seed)
 
@@ -545,21 +648,27 @@ def push(self, task: ExplorationTask) -> bool:
             return False
 
         task.priority = self.compute_priority(task)
-        # _tasks is maintained in descending priority order. _neg_priorities
-        # mirrors it as ascending negated values so bisect can find the correct
-        # insertion index in O(log n) without rebuilding the list each call.
-        neg_p = -task.priority
-        idx = bisect.bisect_right(self._neg_priorities, neg_p)
-        self._tasks.insert(idx, task)
-        self._neg_priorities.insert(idx, neg_p)
+        # Update both _tasks (for subclass access) and _heap (for base-class pop).
+        self._tasks.append(task)
+        heapq.heappush(self._heap, (-task.priority, self._push_counter, task))
+        self._push_counter += 1
         self._submitted.add(key)
         return True
 
     def pop(self) -> ExplorationTask | None:
-        if not self._tasks:
+        """Pop the highest-priority task using the heap (O(log n)).
+
+        Also removes the task from ``_tasks`` so subclasses that iterate
+        ``_tasks`` see a consistent state.
+        """
+        if not self._heap:
             return None
-        self._neg_priorities.pop(0)
-        return self._tasks.pop(0)
+        _, _, task = heapq.heappop(self._heap)
+        try:
+            self._tasks.remove(task)  # O(n) but only called in base-class path
+        except ValueError:
+            pass
+        return task
 
     def should_add(self, node: "EQNode", reference_energy: float, **kwargs) -> bool:
         """Decide probabilistically whether to enqueue a node.
@@ -631,9 +740,10 @@ def refresh_priorities(self, ref_e: float | None) -> None:
                 task.metadata["delta_E_hartree"] = eff_e - ref_e
             task.priority = self.compute_priority(task)
 
-        self._tasks.sort(key=lambda t: t.priority, reverse=True)
-        # Rebuild the parallel negated-priority list to stay in sync after sort.
-        self._neg_priorities = [-t.priority for t in self._tasks]
+        # Rebuild the heap from the updated _tasks list.
+        self._heap = [(-t.priority, i, t) for i, t in enumerate(self._tasks)]
+        heapq.heapify(self._heap)
+        self._push_counter = len(self._heap)
 
     def export_queue_status(self) -> list[dict]:
         return [
@@ -1047,15 +1157,23 @@ def to_dict(self) -> dict:
                 data[k] = str(v)
         return data
 
+# Sentinel object used by NetworkGraph to distinguish "not yet computed"
+# from a cached value of None (meaning no energy is available).
+_UNSET = object()
+
+
 class NetworkGraph:
     def __init__(self) -> None:
         self._nodes: dict[int, EQNode] = {}
         self._edges: dict[int, TSEdge] = {}
         self._node_counter: int = 0
         self._edge_counter: int = 0
+        # Cached reference energy; set to _UNSET when invalidated.
+        self._ref_energy_cache: float | None = _UNSET  # type: ignore[assignment]
 
     def add_node(self, node: EQNode) -> None:
         self._nodes[node.node_id] = node
+        self._ref_energy_cache = _UNSET  # type: ignore[assignment]
 
     def get_node(self, node_id: int) -> EQNode | None:
         return self._nodes.get(node_id)
@@ -1091,15 +1209,25 @@ def reference_energy(self) -> float | None:
           intended mixed-mode behaviour (see Q2 design decision).
         * When **no** node has free energy, fall back to the minimum
           electronic energy, preserving the original behaviour.
+
+        The result is cached and automatically invalidated whenever a new
+        node is added via :meth:`add_node`.
         """
+        if self._ref_energy_cache is not _UNSET:
+            return self._ref_energy_cache  # type: ignore[return-value]
+
         free_energies = [
             n.free_energy for n in self._nodes.values()
             if n.free_energy is not None
         ]
         if free_energies:
-            return min(free_energies)
-        real_energies = [n.energy for n in self._nodes.values() if n.has_real_energy]
-        return min(real_energies) if real_energies else None
+            result: float | None = min(free_energies)
+        else:
+            real_energies = [n.energy for n in self._nodes.values() if n.has_real_energy]
+            result = min(real_energies) if real_energies else None
+
+        self._ref_energy_cache = result  # type: ignore[assignment]
+        return result
 
     def save(self, filepath: str) -> None:
         data = {
@@ -2285,6 +2413,8 @@ def _flush_node_energy_updates(self) -> None:
                 )
 
         self._pending_node_updates.clear()
+        # Node energies may have changed; invalidate the cached reference energy.
+        self.graph._ref_energy_cache = _UNSET  # type: ignore[assignment]
 
     def _find_or_register_node(
         self,