Skip to content

Commit a4cad67

Browse files
committed
Additional refinments to ensure consistency across CodeEntropy
- Remove duplicated adding and logging results, using `DataLogger` class to exclusivly handle all of the data handling - Restructured how results are diplayed, users will now see the `resname` rather than an arbitary internal counter to differentiate the molecules - Changing of `_args.selection_string` to ensure user's input is not overwritten if selection is not `all` - Updated test cases to reflect the changes made within this commit
1 parent f5c201b commit a4cad67

4 files changed

Lines changed: 191 additions & 203 deletions

File tree

CodeEntropy/config/data_logger.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import logging
3+
import re
34

45
from tabulate import tabulate
56

@@ -23,13 +24,19 @@ def save_dataframes_as_json(self, molecule_df, residue_df, output_file):
2324
with open(output_file, "w") as out:
2425
json.dump(data, out, indent=4)
2526

26-
def add_results_data(self, molecule, level, type, S_molecule):
27+
def clean_residue_name(self, resname):
28+
"""Ensures residue names are stripped and cleaned before being stored"""
29+
return re.sub(r"[-–—]", "", str(resname))
30+
31+
def add_results_data(self, resname, level, entropy_type, value):
2732
"""Add data for molecule-level entries"""
28-
self.molecule_data.append([molecule, level, type, f"{S_molecule}"])
33+
resname = self.clean_residue_name(resname)
34+
self.molecule_data.append((resname, level, entropy_type, value))
2935

30-
def add_residue_data(self, molecule, residue, type, S_trans_residue):
36+
def add_residue_data(self, resid, resname, level, entropy_type, value):
3137
"""Add data for residue-level entries"""
32-
self.residue_data.append([molecule, residue, type, f"{S_trans_residue}"])
38+
resname = self.clean_residue_name(resname)
39+
self.residue_data.append([resid, resname, level, entropy_type, value])
3340

3441
def log_tables(self):
3542
"""Log both tables at once"""
@@ -38,8 +45,10 @@ def log_tables(self):
3845
logger.info("Molecule Data Table:")
3946
table_str = tabulate(
4047
self.molecule_data,
41-
headers=["Molecule ID", "Level", "Type", "Result (J/mol/K)"],
48+
headers=["Residue Name", "Level", "Type", "Result (J/mol/K)"],
4249
tablefmt="grid",
50+
numalign="center",
51+
stralign="center",
4352
)
4453
logger.info(f"\n{table_str}")
4554

@@ -48,7 +57,15 @@ def log_tables(self):
4857
logger.info("Residue Data Table:")
4958
table_str = tabulate(
5059
self.residue_data,
51-
headers=["Molecule ID", "Residue", "Type", "Result (J/mol/K)"],
60+
headers=[
61+
"Residue ID",
62+
"Residue Name",
63+
"Level",
64+
"Type",
65+
"Result (J/mol/K)",
66+
],
5267
tablefmt="grid",
68+
numalign="center",
69+
stralign="center",
5370
)
5471
logger.info(f"\n{table_str}")

CodeEntropy/entropy.py

Lines changed: 102 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22
import math
3+
from collections import defaultdict
34

45
import numpy as np
56
import pandas as pd
@@ -33,25 +34,6 @@ def __init__(self, run_manager, args, universe, data_logger, level_manager):
3334
self._level_manager = level_manager
3435
self._GAS_CONST = 8.3144598484848
3536

36-
self._results_df = pd.DataFrame(
37-
columns=["Molecule ID", "Level", "Type", "Result"]
38-
)
39-
self._residue_results_df = pd.DataFrame(
40-
columns=["Molecule ID", "Residue", "Type", "Result"]
41-
)
42-
43-
@property
44-
def results_df(self):
45-
"""Returns the dataframe containing entropy results at all levels."""
46-
return self._results_df
47-
48-
@property
49-
def residue_results_df(self):
50-
"""
51-
Returns the dataframe containing united-atom level results for each residue.
52-
"""
53-
return self._residue_results_df
54-
5537
def execute(self):
5638
"""
5739
Executes the full entropy computation workflow over selected molecules and
@@ -63,7 +45,11 @@ def execute(self):
6345

6446
if self._universe.select_atoms("water").n_atoms > 0:
6547
self._calculate_water_entropy(self._universe, start, end, step)
66-
self._args.selection_string = "not water"
48+
49+
if self._args.selection_string != "all":
50+
self._args.selection_string += " and not water"
51+
else:
52+
self._args.selection_string = "not water"
6753

6854
reduced_atom = self._get_reduced_universe()
6955
number_molecules, levels = self._level_manager.select_levels(reduced_atom)
@@ -246,19 +232,25 @@ def _process_united_atom_level(
246232
S_rot += S_rot_res
247233
S_conf += S_conf_res
248234

249-
self._log_residue_data(
250-
residue.resid, residue.resname, "Transvibrational", S_trans_res
235+
self._data_logger.add_residue_data(
236+
mol_id, residue.resname, level, "Transvibrational", S_trans_res
251237
)
252-
self._log_residue_data(
253-
residue.resid, residue.resname, "Rovibrational", S_rot_res
238+
self._data_logger.add_residue_data(
239+
mol_id, residue.resname, level, "Rovibrational", S_rot_res
254240
)
255-
self._log_residue_data(
256-
residue.resid, residue.resname, "Conformational", S_conf_res
241+
self._data_logger.add_residue_data(
242+
mol_id, residue.resname, level, "Conformational", S_conf_res
257243
)
258244

259-
self._log_result(mol_id, level, "Transvibrational", S_trans)
260-
self._log_result(mol_id, level, "Rovibrational", S_rot)
261-
self._log_result(mol_id, level, "Conformational", S_conf)
245+
self._data_logger.add_results_data(
246+
residue.resname, level, "Transvibrational", S_trans
247+
)
248+
self._data_logger.add_results_data(
249+
residue.resname, level, "Rovibrational", S_rot
250+
)
251+
self._data_logger.add_results_data(
252+
residue.resname, level, "Conformational", S_conf
253+
)
262254

263255
def _process_vibrational_only_levels(
264256
self, mol_id, mol_container, ve, level, start, end, step, n_frames, highest
@@ -286,9 +278,13 @@ def _process_vibrational_only_levels(
286278
S_rot = ve.vibrational_entropy_calculation(
287279
torque_matrix, "torque", self._args.temperature, highest
288280
)
289-
290-
self._log_result(mol_id, level, "Transvibrational", S_trans)
291-
self._log_result(mol_id, level, "Rovibrational", S_rot)
281+
residue = mol_container.residues[mol_id]
282+
self._data_logger.add_results_data(
283+
residue.resname, level, "Transvibrational", S_trans
284+
)
285+
self._data_logger.add_results_data(
286+
residue.resname, level, "Rovibrational", S_rot
287+
)
292288

293289
def _process_conformational_residue_level(
294290
self, mol_id, mol_container, ce, level, start, end, step, n_frames
@@ -310,71 +306,48 @@ def _process_conformational_residue_level(
310306
S_conf = ce.conformational_entropy_calculation(
311307
mol_container, dihedrals, bin_width, start, end, step, n_frames
312308
)
313-
self._log_result(mol_id, level, "Conformational", S_conf)
309+
residue = mol_container.residues[mol_id]
310+
self._data_logger.add_results_data(
311+
residue.resname, level, "Conformational", S_conf
312+
)
314313

315314
def _finalize_molecule_results(self):
316315
"""
317-
Summarizes entropy for a molecule and saves results to file.
318-
319-
Args:
320-
mol_id (int): ID of the molecule.
321-
"""
322-
logger.info(f"len(self._results_df) {len(self._results_df)}")
323-
for mol_id in range(len(self._results_df)):
324-
S_total = self._results_df[self._results_df["Molecule ID"] == mol_id][
325-
"Result"
326-
].sum()
327-
self._log_result(
328-
mol_id, "Molecule Total", "Molecule Total Entropy", S_total
329-
)
330-
self._data_logger.save_dataframes_as_json(
331-
self._results_df, self._residue_results_df, self._args.output_file
332-
)
333-
334-
def _log_result(self, mol_id, level, entropy_type, value):
316+
Aggregates and logs total entropy per molecule using residue_data grouped by
317+
resid.
335318
"""
336-
Logs and stores a single entropy value in the global results dataframe.
319+
entropy_by_molecule = defaultdict(float)
337320

338-
Args:
339-
mol_id (int): Molecule ID.
340-
level (str): Entropy level or type.
341-
entropy_type (str): Type of entropy (e.g., 'Transvibrational').
342-
value (float): Entropy value.
343-
"""
344-
row = pd.DataFrame(
345-
{
346-
"Molecule ID": [mol_id],
347-
"Level": [level],
348-
"Type": [f"{entropy_type} (J/mol/K)"],
349-
"Result": [value],
350-
}
351-
)
352-
if self.results_df.empty:
353-
self._results_df = pd.concat([self._results_df, row], ignore_index=True)
354-
self._data_logger.add_results_data(mol_id, level, entropy_type, value)
321+
for mol_id, level, entropy_type, result in self._data_logger.molecule_data:
322+
if level != "Molecule Total":
323+
try:
324+
entropy_by_molecule[mol_id] += float(result)
325+
except ValueError:
326+
logger.warning(f"Skipping invalid entry: {mol_id}, {result}")
355327

356-
def _log_residue_data(self, mol_id, residue_id, entropy_type, value):
357-
"""
358-
Logs and stores per-residue entropy data.
328+
for mol_id, total_entropy in entropy_by_molecule.items():
329+
self._data_logger.molecule_data.append(
330+
(mol_id, "Molecule Total", "Molecule Total Entropy", total_entropy)
331+
)
359332

360-
Args:
361-
mol_id (int): Molecule ID.
362-
residue_id (int): Residue index within the molecule.
363-
entropy_type (str): Entropy category.
364-
value (float): Entropy value.
365-
"""
366-
row = pd.DataFrame(
367-
{
368-
"Molecule ID": [mol_id],
369-
"Residue": [residue_id],
370-
"Type": [f"{entropy_type} (J/mol/K)"],
371-
"Result": [value],
372-
}
373-
)
374-
self._residue_results_df = pd.concat(
375-
[self._residue_results_df, row], ignore_index=True
333+
# Save to file
334+
self._data_logger.save_dataframes_as_json(
335+
pd.DataFrame(
336+
self._data_logger.molecule_data,
337+
columns=["Molecule ID", "Level", "Type", "Result (J/mol/K)"],
338+
),
339+
pd.DataFrame(
340+
self._data_logger.residue_data,
341+
columns=[
342+
"Residue ID",
343+
"Residue Name",
344+
"Level",
345+
"Type",
346+
"Result (J/mol/K)",
347+
],
348+
),
349+
self._args.output_file,
376350
)
377-
self._data_logger.add_residue_data(mol_id, residue_id, entropy_type, value)
378351

379352
def _calculate_water_entropy(self, universe, start, end, step):
380353
"""
@@ -398,14 +371,10 @@ def _calculate_water_entropy(self, universe, start, end, step):
398371
# Aggregate entropy components per molecule
399372
results = {}
400373

401-
for _, row in self._residue_results_df.iterrows():
402-
entropy_type = row["Type"].split()[0]
403-
value = row["Result"]
404-
405-
if entropy_type == "Orientational":
406-
mol_id = row["Molecule ID"]
407-
else:
408-
mol_id = row["Residue"].split("_")[0]
374+
for row in self._data_logger.residue_data:
375+
mol_id = row[1]
376+
entropy_type = row[3].split()[0]
377+
value = float(row[4])
409378

410379
if mol_id not in results:
411380
results[mol_id] = {
@@ -421,11 +390,11 @@ def _calculate_water_entropy(self, universe, start, end, step):
421390
total = 0.0
422391
for entropy_type in ["Orientational", "Transvibrational", "Rovibrational"]:
423392
S_component = components[entropy_type]
424-
self._log_result(mol_id, "water", entropy_type, S_component)
393+
self._data_logger.add_results_data(
394+
mol_id, "water", entropy_type, S_component
395+
)
425396
total += S_component
426397

427-
self._log_result(mol_id, "Molecule Total", "Molecule Total Entropy", total)
428-
429398
def _calculate_water_orientational_entropy(self, Sorient_dict):
430399
"""
431400
Logs orientational entropy values directly from Sorient_dict.
@@ -434,25 +403,53 @@ def _calculate_water_orientational_entropy(self, Sorient_dict):
434403
for resname, values in resname_dict.items():
435404
if isinstance(values, list) and len(values) == 2:
436405
Sor, count = values
437-
self._log_residue_data(resname, resid, "Orientational", Sor)
406+
self._data_logger.add_residue_data(
407+
resid, resname, "Water", "Orientational", Sor
408+
)
438409

439410
def _calculate_water_vibrational_translational_entropy(self, vibrations):
440411
"""
441412
Logs summed translational entropy values per residue-solvent pair.
442413
"""
443-
for (resid, mol_id), entropy in vibrations.translational_S.items():
414+
for (solute_id, _), entropy in vibrations.translational_S.items():
444415
if isinstance(entropy, (list, np.ndarray)):
445416
entropy = float(np.sum(entropy))
446-
self._log_residue_data(mol_id, f"{resid}", "Transvibrational", entropy)
417+
418+
if "_" in solute_id:
419+
resname, resid_str = solute_id.rsplit("_", 1)
420+
try:
421+
resid = int(resid_str)
422+
except ValueError:
423+
resid = -1
424+
else:
425+
resname = solute_id
426+
resid = -1
427+
428+
self._data_logger.add_residue_data(
429+
resid, resname, "Water", "Transvibrational", entropy
430+
)
447431

448432
def _calculate_water_vibrational_rotational_entropy(self, vibrations):
449433
"""
450434
Logs summed rotational entropy values per residue-solvent pair.
451435
"""
452-
for (resid, mol_id), entropy in vibrations.rotational_S.items():
436+
for (solute_id, _), entropy in vibrations.rotational_S.items():
453437
if isinstance(entropy, (list, np.ndarray)):
454438
entropy = float(np.sum(entropy))
455-
self._log_residue_data(mol_id, f"{resid}", "Rovibrational", entropy)
439+
440+
if "_" in solute_id:
441+
resname, resid_str = solute_id.rsplit("_", 1)
442+
try:
443+
resid = int(resid_str)
444+
except ValueError:
445+
resid = -1
446+
else:
447+
resname = solute_id
448+
resid = -1
449+
450+
self._data_logger.add_residue_data(
451+
resid, resname, "Water", "Rovibrational", entropy
452+
)
456453

457454

458455
class VibrationalEntropy(EntropyManager):

tests/test_CodeEntropy/test_data_logger.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,23 @@ def test_add_results_data(self):
5353
Test that add_results_data correctly appends a molecule-level entry.
5454
"""
5555
self.logger.add_results_data(
56-
0, "united_atom", "Transvibrational (J/mol/K)", 653.404
56+
0, "united_atom", "Transvibrational", 653.4041220313459
5757
)
5858
self.assertEqual(
5959
self.logger.molecule_data,
60-
[[0, "united_atom", "Transvibrational (J/mol/K)", "653.404"]],
60+
[("0", "united_atom", "Transvibrational", 653.4041220313459)],
6161
)
6262

6363
def test_add_residue_data(self):
6464
"""
6565
Test that add_residue_data correctly appends a residue-level entry.
6666
"""
67-
self.logger.add_residue_data(0, 0, "Transvibrational (J/mol/K)", 122.612)
67+
self.logger.add_residue_data(
68+
0, "DA", "united_atom", "Transvibrational", 122.61216935211893
69+
)
6870
self.assertEqual(
69-
self.logger.residue_data, [[0, 0, "Transvibrational (J/mol/K)", "122.612"]]
71+
self.logger.residue_data,
72+
[[0, "DA", "united_atom", "Transvibrational", 122.61216935211893]],
7073
)
7174

7275
def test_save_dataframes_as_json(self):
@@ -124,9 +127,11 @@ def test_log_tables(self, mock_logger):
124127
logger.
125128
"""
126129
self.logger.add_results_data(
127-
0, "united_atom", "Transvibrational (J/mol/K)", 653.404
130+
0, "united_atom", "Transvibrational", 653.4041220313459
131+
)
132+
self.logger.add_residue_data(
133+
0, "DA", "united_atom", "Transvibrational", 122.61216935211893
128134
)
129-
self.logger.add_residue_data(0, 0, "Transvibrational (J/mol/K)", 122.612)
130135

131136
self.logger.log_tables()
132137

0 commit comments

Comments
 (0)