Refactor get_beads to remove padding and simplify force/torque matrix storage

harryswift01 · harryswift01 · commit aa42de480837 · 2025-07-21T16:05:24.000+01:00
- Removed the `_pad_and_add` function from `get_beads` so it returns only the actual bead selections without added padding.
- Changed force and torque matrix storage from large pre-allocated lists with None placeholders to dictionaries keyed by `(molecule_id, residue_id)`.
- These changes eliminate unnecessary data padding and improve direct access to relevant matrices.
- Result is cleaner data structures and simpler, more reliable indexing.
diff --git a/CodeEntropy/entropy.py b/CodeEntropy/entropy.py
@@ -61,9 +61,10 @@ def execute(self):
                 self._data_logger.residue_data,
             )
 
+        # Get reduced universe and initialize values
         reduced_atom = self._get_reduced_universe()
         number_molecules, levels = self._level_manager.select_levels(reduced_atom)
-        number_residues = len(reduced_atom.residues)
+        # number_residues = len(reduced_atom.residues)
 
         ve = VibrationalEntropy(
             self._run_manager,
@@ -81,14 +82,14 @@ def execute(self):
         )
 
         # Initialise force and torque matrices
-        force_matrix_ua = [None for _ in range(number_residues)]
+        force_matrix_ua = {}
+        torque_matrix_ua = {}
         force_matrix_res = [None for _ in range(number_molecules)]
-        force_matrix_poly = [None for _ in range(number_molecules)]
-        torque_matrix_ua = [None for _ in range(number_residues)]
         torque_matrix_res = [None for _ in range(number_molecules)]
+        force_matrix_poly = [None for _ in range(number_molecules)]
         torque_matrix_poly = [None for _ in range(number_molecules)]
 
-        states_ua = [None for _ in range(number_residues)]
+        states_ua = {}
         states_res = [None for _ in range(number_molecules)]
 
         for timestep in reduced_atom.trajectory[start:end:step]:
@@ -104,6 +105,8 @@ def execute(self):
                     # Get matrices for vibrational entropy calculations
                     if level == "united_atom":
                         for res_id, residue in enumerate(mol_container.residues):
+                            key = (molecule_id, res_id)
+
                             res_container = self._run_manager.new_U_select_atom(
                                 mol_container,
                                 (
@@ -117,12 +120,13 @@ def execute(self):
                                     level,
                                     number_frames,
                                     highest_level,
-                                    force_matrix_ua[res_id],
-                                    torque_matrix_ua[res_id],
+                                    force_matrix_ua.get(key),
+                                    torque_matrix_ua.get(key),
                                 )
                             )
-                            force_matrix_ua[res_id] = force_matrix
-                            torque_matrix_ua[res_id] = torque_matrix
+
+                            force_matrix_ua[key] = force_matrix
+                            torque_matrix_ua[key] = torque_matrix
 
                     elif level == "residue":
                         force_matrix, torque_matrix = self._level_manager.get_matrices(
@@ -148,18 +152,15 @@ def execute(self):
                         force_matrix_poly[molecule_id] = force_matrix
                         torque_matrix_poly[molecule_id] = torque_matrix
 
-                    # TODO When function is ready
-                    # Get environment information for orientational entropy
-                    # if highest_level:
-                    # number_neighbours = get_neighbours()
-
         # Get states for conformational entropy calculation
         bin_width = self._args.bin_width
         for molecule_id in range(number_molecules):
             mol_container = self._get_molecule_container(reduced_atom, molecule_id)
             for level in levels[molecule_id]:
                 if level == "united_atom":
                     for res_id, residue in enumerate(mol_container.residues):
+                        key = (molecule_id, res_id)
+
                         res_container = self._run_manager.new_U_select_atom(
                             mol_container,
                             (
@@ -173,7 +174,7 @@ def execute(self):
 
                         dihedrals = self._level_manager.get_dihedrals(heavy_res, level)
                         for dihedral in dihedrals:
-                            states_ua[res_id] = ce.assign_conformation(
+                            states_ua[key] = ce.assign_conformation(
                                 heavy_res,
                                 dihedral,
                                 number_frames,
@@ -183,7 +184,7 @@ def execute(self):
                                 step,
                             )
 
-                if level == "residue":
+                elif level == "residue":
                     dihedrals = self._level_manager.get_dihedrals(mol_container, level)
                     for dihedral in dihedrals:
                         states_res[molecule_id] = ce.assign_conformation(
@@ -200,21 +201,21 @@ def execute(self):
         for molecule_id in range(number_molecules):
             mol_container = self._get_molecule_container(reduced_atom, molecule_id)
             for level in levels[molecule_id]:
-                # Identify if level is the highest (molecule) level
                 highest_level = level == levels[molecule_id][-1]
 
                 if level == "united_atom":
                     for res_id, residue in enumerate(mol_container.residues):
+                        key = (molecule_id, res_id)
                         self._process_united_atom_entropy(
                             molecule_id,
                             mol_container,
                             res_id,
                             ve,
                             ce,
                             level,
-                            force_matrix_ua[res_id],
-                            torque_matrix_ua[res_id],
-                            states_ua[res_id],
+                            force_matrix_ua[key],
+                            torque_matrix_ua[key],
+                            states_ua[key],
                             highest_level,
                         )
 
diff --git a/CodeEntropy/levels.py b/CodeEntropy/levels.py
@@ -127,100 +127,64 @@ def get_matrices(
                 data_container, list_of_beads[bead_index], rot_axes
             )
 
-        # Make covariance matrices - looping over pairs of beads
-        # list of pairs of indices
-        pair_list = [(i, j) for i in range(number_beads) for j in range(number_beads)]
-
+        # Create covariance submatrices
         force_submatrix = [
             [0 for _ in range(number_beads)] for _ in range(number_beads)
         ]
         torque_submatrix = [
             [0 for _ in range(number_beads)] for _ in range(number_beads)
         ]
 
-        for i, j in pair_list:
-            # for each pair of beads
-            # reducing effort because the matrix for [i][j] is the transpose of the one
-            # for [j][i]
-            if i <= j:
-                # calculate the force covariance segment of the matrix
+        for i in range(number_beads):
+            for j in range(i, number_beads):
                 f_sub = self.create_submatrix(
                     weighted_forces[i], weighted_forces[j], number_frames
                 )
                 t_sub = self.create_submatrix(
                     weighted_torques[i], weighted_torques[j], number_frames
                 )
-
                 force_submatrix[i][j] = f_sub
                 force_submatrix[j][i] = f_sub.T
                 torque_submatrix[i][j] = t_sub
                 torque_submatrix[j][i] = t_sub.T
 
-        # use np.block to make submatrices into one matrix
+        # Convert block matrices to full matrix
         force_block = np.block(
             [
                 [force_submatrix[i][j] for j in range(number_beads)]
                 for i in range(number_beads)
             ]
         )
-
         torque_block = np.block(
             [
                 [torque_submatrix[i][j] for j in range(number_beads)]
                 for i in range(number_beads)
             ]
         )
 
-        # Accumulate into full matrices, with shape-safe padding if needed
+        # Enforce consistent shape before accumulation
         if force_matrix is None:
             force_matrix = np.zeros_like(force_block)
         elif force_matrix.shape != force_block.shape:
-            force_matrix = self._pad_and_add(force_matrix, force_block)
+            raise ValueError(
+                f"Inconsistent force matrix shape: existing "
+                f"{force_matrix.shape}, new {force_block.shape}"
+            )
         else:
             force_matrix += force_block
 
         if torque_matrix is None:
             torque_matrix = np.zeros_like(torque_block)
         elif torque_matrix.shape != torque_block.shape:
-            torque_matrix = self._pad_and_add(torque_matrix, torque_block)
+            raise ValueError(
+                f"Inconsistent torque matrix shape: existing "
+                f"{torque_matrix.shape}, new {torque_block.shape}"
+            )
         else:
             torque_matrix += torque_block
 
         return force_matrix, torque_matrix
 
-    def _pad_and_add(self, A, B):
-        """
-        Pads two 2D numpy arrays with zeros to match their largest dimensions
-        and returns their element-wise sum.
-
-        Both input arrays A and B can have different shapes. This function
-        creates zero-padded versions of A and B with the shape equal to the
-        maximum number of rows and columns from both arrays, then adds them.
-
-        Parameters
-        ----------
-        A : np.ndarray
-            First 2D array to pad and add.
-        B : np.ndarray
-            Second 2D array to pad and add.
-
-        Returns
-        -------
-        np.ndarray
-            A new 2D array containing the element-wise sum of the padded A and B,
-            with shape (max_rows, max_cols).
-        """
-        max_rows = max(A.shape[0], B.shape[0])
-        max_cols = max(A.shape[1], B.shape[1])
-
-        A_pad = np.zeros((max_rows, max_cols))
-        B_pad = np.zeros((max_rows, max_cols))
-
-        A_pad[: A.shape[0], : A.shape[1]] = A
-        B_pad[: B.shape[0], : B.shape[1]] = B
-
-        return A_pad + B_pad
-
     def get_dihedrals(self, data_container, level):
         """
         Define the set of dihedrals for use in the conformational entropy function.