Skip to content

Commit 39e763a

Browse files
committed
Revert all functionality based around top level dataframe access
Let's not reinvent the wheel when accessing the final merged dataframe with all the data. Give the user access to the dataframe and let them do what they need from there.
1 parent f1eb793 commit 39e763a

2 files changed

Lines changed: 6 additions & 222 deletions

File tree

src/nuclearmasses/mass_table.py

Lines changed: 5 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import importlib.resources
2-
import typing
32

43
import pandas as pd
54

@@ -13,48 +12,8 @@ class MassTable:
1312
Internally there are separate dataframes for the NUBASE and AME data as well as a combined one for all data
1413
"""
1514

16-
def __init__(self, df: pd.DataFrame | None = None, filters: list[tuple[str, str, typing.Any]] | None = None):
17-
self._original_df: pd.DataFrame = self._parse_files() if df is None else df
18-
self._filters: list[tuple[str, str, typing.Any]] = filters or []
19-
self._create_dynamic_getters()
20-
21-
def __repr__(self) -> str:
22-
"""Make printing the class object show the DataFrame nicely"""
23-
return repr(self.df)
24-
25-
def __str__(self) -> str:
26-
"""Make printing the class object show the DataFrame nicely"""
27-
return str(self.df)
28-
29-
def __getattr__(self, attr: str) -> typing.Any:
30-
"""Delegate pandas methods for deeper chaining"""
31-
if hasattr(self.df, attr):
32-
return getattr(self.df, attr)
33-
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'")
34-
35-
def __dir__(self):
36-
"""Pass the pandas api through so we can get autocomplete"""
37-
return sorted(set(list(self.__dict__.keys()) + dir(type(self)) + dir(self.df)))
38-
39-
# def __len__(self) -> int:
40-
# return len(self.df)
41-
#
42-
# def __iter__(self):
43-
# return iter(self.df)
44-
#
45-
def __getitem__(self, key):
46-
return type(self)(df=self.df[key])
47-
48-
def get(self, column: str, value: typing.Any) -> typing.Self:
49-
"""Generic getter: all_data.get('A', 123)"""
50-
new_filters = self._filters + [(column, "==", value)]
51-
return type(self)(df=self._original_df, filters=new_filters)
52-
53-
def filter(self, expr: str) -> typing.Self:
54-
"""Allow arbitrary pandas .query() expressions."""
55-
# For simplicity we still store as tuples; you could store raw expressions too
56-
new_filters = self._filters + [("query", expr, None)]
57-
return type(self)(df=self._original_df, filters=new_filters)
15+
def __init__(self) -> None:
16+
self._complete_df: pd.DataFrame = self._parse_files()
5817

5918
def _parse_files(self) -> pd.DataFrame:
6019
data_path = importlib.resources.files("nuclearmasses").joinpath("data")
@@ -64,32 +23,6 @@ def _parse_files(self) -> pd.DataFrame:
6423
return pd.merge(AME(data_path).ame_df, NUBASE(data_path).nubase_df, on=common_columns, how="outer")
6524

6625
@property
67-
def df(self) -> pd.DataFrame:
68-
"""Apply all filters only when .df is accessed"""
69-
result = self._original_df
70-
for key, op, val in self._filters:
71-
if op == "==":
72-
# Filter on the index
73-
if key == self._original_df.index.name:
74-
result = result[result.index == val]
75-
# Filter on a regular column
76-
else:
77-
result = result[result[key] == val]
78-
elif key == "query":
79-
result = result.query(op)
80-
return result
81-
82-
def _create_dynamic_getters(self):
83-
"""Automatically create get_colname(value) methods for every column."""
84-
for col in self._original_df.columns:
85-
method_name = f"get_{col}"
86-
87-
def make_getter(column: str):
88-
def getter(self, value: typing.Any) -> MassTable:
89-
new_filters = self._filters + [(column, "==", value)]
90-
return type(self)(df=self._original_df, filters=new_filters)
91-
92-
return getter
93-
94-
# Attach the method to the class/instance
95-
setattr(self, method_name, make_getter(col).__get__(self, MassTable))
26+
def data(self) -> pd.DataFrame:
27+
"""Access the complete mass table dataframe"""
28+
return self._complete_df

tests/test_mass_table.py

Lines changed: 1 addition & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -1,157 +1,8 @@
1-
import pandas as pd
2-
import pandas.testing as pdt
3-
import pytest
4-
51
from nuclearmasses.mass_table import MassTable
62

73

8-
@pytest.fixture
9-
def empty_frame():
10-
return MassTable(df=pd.DataFrame())
11-
12-
134
def test_initial_complete_parse():
14-
data = MassTable().df
5+
data = MassTable().data
156
expected_shape = (21421, 50)
167

178
assert expected_shape == data.shape
18-
19-
20-
def test_getter_creation():
21-
cols = ["Mass", "Error", "Param", "RandomLongerString"]
22-
test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0]))
23-
24-
df = MassTable(df=test_frame)
25-
26-
for name in cols:
27-
f = f"get_{name}"
28-
assert hasattr(df, f)
29-
assert callable(getattr(df, f))
30-
31-
32-
def test_getter_not_created(empty_frame):
33-
with pytest.raises(AttributeError):
34-
empty_frame.get_Nothing()
35-
36-
37-
def test_empty_filter(empty_frame):
38-
assert len(empty_frame._filters) == 0
39-
40-
41-
def test_unique_and_sorted_dir(empty_frame):
42-
output = dir(empty_frame)
43-
assert output == sorted(output)
44-
assert len(output) == len(set(output))
45-
46-
47-
def test_dir_includes_class_attributes(empty_frame):
48-
output = dir(empty_frame)
49-
50-
assert "_parse_files" in output
51-
assert "get" in output
52-
53-
54-
def test_dir_includes_pandas_attributes(empty_frame):
55-
output = dir(empty_frame)
56-
57-
assert "describe" in output
58-
assert "head" in output
59-
60-
61-
def test_manually_populated_filter():
62-
cols = ["ManualParameter"]
63-
test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0]))
64-
65-
my_filter = [(cols[0], "==", 5)]
66-
67-
df = MassTable(df=test_frame, filters=my_filter)
68-
69-
assert len(df._filters) == 1
70-
assert df._filters == my_filter
71-
72-
73-
def test_auto_populated_filter():
74-
cols = ["AutoParameter"]
75-
test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0]))
76-
77-
df = MassTable(df=test_frame)
78-
79-
val = 2
80-
f_df = df.get_AutoParameter(val)
81-
82-
assert len(f_df._filters) == 1
83-
assert f_df._filters == [(cols[0], "==", val)]
84-
85-
86-
def test_getter_on_index():
87-
cols = ["Mass", "Error", "Param", "RandomLongerString"]
88-
test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0]))
89-
test_frame.set_index("Param")
90-
91-
m_df = MassTable(df=test_frame)
92-
m_df = m_df.get_Param(0).df
93-
94-
expected = pd.DataFrame(
95-
{
96-
"Mass": [0],
97-
"Error": [0],
98-
"Param": [0],
99-
"RandomLongerString": [0],
100-
}
101-
)
102-
103-
pdt.assert_frame_equal(m_df, expected, check_like=True)
104-
105-
106-
def test_access_property():
107-
cols = ["Mass", "Error", "Param", "RandomLongerString"]
108-
test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0]))
109-
110-
m_df = MassTable(df=test_frame).df
111-
112-
expected = pd.DataFrame(
113-
{
114-
"Mass": [0],
115-
"Error": [0],
116-
"Param": [0],
117-
"RandomLongerString": [0],
118-
}
119-
)
120-
121-
pdt.assert_frame_equal(m_df, expected, check_like=True)
122-
123-
124-
def test_generic_getter():
125-
cols = ["Mass", "Error", "Param", "RandomLongerString"]
126-
test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0]))
127-
128-
m_df = MassTable(df=test_frame).get("Error", 0).df
129-
130-
expected = pd.DataFrame(
131-
{
132-
"Mass": [0],
133-
"Error": [0],
134-
"Param": [0],
135-
"RandomLongerString": [0],
136-
}
137-
)
138-
139-
pdt.assert_frame_equal(m_df, expected, check_like=True)
140-
141-
142-
def test_generic_filter():
143-
cols = ["Mass", "Error", "Param", "RandomLongerString"]
144-
test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0]))
145-
146-
m_df = MassTable(df=test_frame).filter("Param == 0").df
147-
148-
expected = pd.DataFrame(
149-
{
150-
"Mass": [0],
151-
"Error": [0],
152-
"Param": [0],
153-
"RandomLongerString": [0],
154-
}
155-
)
156-
157-
pdt.assert_frame_equal(m_df, expected, check_like=True)

0 commit comments

Comments
 (0)