From 101b11ef60078837d58e1f826c8889b63581e325 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 29 Mar 2026 17:42:40 +0100 Subject: [PATCH 01/26] Refactor filtering to follow DRY --- src/nuclearmasses/mass_table.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/nuclearmasses/mass_table.py b/src/nuclearmasses/mass_table.py index 4b410e9..a86515c 100644 --- a/src/nuclearmasses/mass_table.py +++ b/src/nuclearmasses/mass_table.py @@ -106,8 +106,9 @@ def _combine_all_data(self) -> pd.DataFrame: # 12C has a 0.0 +/ 0.0 mass excess by definition so calculating relative error -> NaN # Set the value to 0.0 as that's what it is - df.loc[(df.Symbol == "C") & (df.A == 12), "NUBASERelativeError"] = 0.0 - df.loc[(df.Symbol == "C") & (df.A == 12), "AMERelativeError"] = 0.0 + mask = (df.Symbol == "C") & (df.A == 12) + df.loc[mask, "NUBASERelativeError"] = 0.0 + df.loc[mask, "AMERelativeError"] = 0.0 # 198Au has a typo in it's decay mode in the 2012 table. It is recorded as '-' df.loc[(df.A == 198) & (df.Z == 79) & (df.TableYear == 2012), "DecayModes"] = "B-" From 292c4f059bc9f0d4aa6e2869558d19f73491be85 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 29 Mar 2026 18:03:31 +0100 Subject: [PATCH 02/26] Use multiple inheritance rather than a chain Applying to the NUBASE set of data to avoid unnecessary dependencies. --- src/nuclearmasses/io/nubase_file.py | 9 +++------ src/nuclearmasses/io/nubase_parse.py | 5 +++-- src/nuclearmasses/utils/converter.py | 4 +++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/nuclearmasses/io/nubase_file.py b/src/nuclearmasses/io/nubase_file.py index 82035c9..157a3e9 100644 --- a/src/nuclearmasses/io/nubase_file.py +++ b/src/nuclearmasses/io/nubase_file.py @@ -1,7 +1,4 @@ -from nuclearmasses.utils.converter import Converter - - -class NUBASEFile(Converter): +class NUBASEFile: """Easy access to the variables in the NUBASE file. The NUBASE data file is formatted by location in the line, values exist @@ -10,9 +7,9 @@ class NUBASEFile(Converter): magic numbers. """ - def __init__(self, year: int): + def __init__(self, year: int, **kwargs): """Setup the values that locate the variable.""" - super().__init__() + super().__init__(**kwargs) match year: case 1995: self.HEADER = 0 diff --git a/src/nuclearmasses/io/nubase_parse.py b/src/nuclearmasses/io/nubase_parse.py index b70eac4..1f695f8 100644 --- a/src/nuclearmasses/io/nubase_parse.py +++ b/src/nuclearmasses/io/nubase_parse.py @@ -5,9 +5,10 @@ import pandas as pd from nuclearmasses.io.nubase_file import NUBASEFile +from nuclearmasses.utils.converter import Converter -class NUBASEParser(NUBASEFile): +class NUBASEParser(NUBASEFile, Converter): """Parse the NUBASE data file. A collection of functions to parse the weird format of the NUBASE file. @@ -15,7 +16,7 @@ class NUBASEParser(NUBASEFile): def __init__(self, filename: pathlib.Path, year: int): """Set the file to read and the table year.""" - super().__init__(year) + super().__init__(year=year) self.filename: pathlib.Path = filename self.year: int = year self.unit_replacements: dict[str, str] = { diff --git a/src/nuclearmasses/utils/converter.py b/src/nuclearmasses/utils/converter.py index 527f3da..1134797 100644 --- a/src/nuclearmasses/utils/converter.py +++ b/src/nuclearmasses/utils/converter.py @@ -10,8 +10,10 @@ class Converter: and the other symbol to Z. """ - def __init__(self) -> None: + def __init__(self, **kwargs) -> None: """Construct the symbol -> Z and Z -> symbol dictionaries.""" + # We are using multiple inheritance, so need this for MRO + super().__init__(**kwargs) # fmt: off # Formatter wants to put each item on it's own line, I don't self.z_to_symbol: dict[int, str] = { From e81454c7d8456aed407ccee2cc861257bd6f89d6 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 29 Mar 2026 18:16:27 +0100 Subject: [PATCH 03/26] Tidy up the NUBASE column labelling Columns have been added over time. Rather than have copies of very similar lists, create one that contains all columns and remove as appropriate for each year. --- src/nuclearmasses/io/nubase_parse.py | 125 +++++++++++---------------- 1 file changed, 50 insertions(+), 75 deletions(-) diff --git a/src/nuclearmasses/io/nubase_parse.py b/src/nuclearmasses/io/nubase_parse.py index 1f695f8..63d800e 100644 --- a/src/nuclearmasses/io/nubase_parse.py +++ b/src/nuclearmasses/io/nubase_parse.py @@ -27,92 +27,67 @@ def __init__(self, filename: pathlib.Path, year: int): def _column_names(self) -> list[str]: """Set the column name depending on the year""" - match self.year: - case 1995 | 2003: - return [ - "A", - "Z", - "State", - "NUBASEMassExcess", - "NUBASEMassExcessError", - "IsomerEnergy", - "IsomerEnergyError", - "HalfLifeValue", - "HalfLifeUnit", - "HalfLifeError", - "Spin", - "DecayModes", - ] - case _: - return [ - "A", - "Z", - "State", - "NUBASEMassExcess", - "NUBASEMassExcessError", - "IsomerEnergy", - "IsomerEnergyError", - "HalfLifeValue", - "HalfLifeUnit", - "HalfLifeError", - "Spin", - "DiscoveryYear", - "DecayModes", - ] + col_names = [ + "A", + "Z", + "State", + "NUBASEMassExcess", + "NUBASEMassExcessError", + "IsomerEnergy", + "IsomerEnergyError", + "HalfLifeValue", + "HalfLifeUnit", + "HalfLifeError", + "Spin", + "DiscoveryYear", + "DecayModes", + ] + + # The discovery year was added after 2003, and I assume it will be there in the future, so we will set up + # as if it is always present and delete for the first two tables. + if self.year == 1995 or self.year == 2003: + col_names.remove("DiscoveryYear") + + return col_names def _data_types(self) -> dict: """Set the data type depending on the year""" - match self.year: - case 1995 | 2003: - return { - "Symbol": "string", - "A": "Int64", - "Z": "Int64", - "N": "Int64", - "Experimental": "boolean", - # "State": "Int64", - "NUBASEMassExcess": "float64", - "NUBASEMassExcessError": "float64", - # "IsomerEnergy": "float64", - # "IsomerEnergyError": "float64", - "HalfLifeValue": "float64", - "HalfLifeUnit": "string", - "HalfLifeError": "float64", - "HalfLifeSeconds": "float64", - "HalfLifeErrorSeconds": "float64", - "Spin": "string", - "DecayModes": "string", - } - case _: - return { - "Symbol": "string", - "A": "Int64", - "Z": "Int64", - "N": "Int64", - "Experimental": "boolean", - # "State": "Int64", - "NUBASEMassExcess": "float64", - "NUBASEMassExcessError": "float64", - # "IsomerEnergy": "float64", - # "IsomerEnergyError": "float64", - "HalfLifeValue": "float64", - "HalfLifeUnit": "string", - "HalfLifeError": "float64", - "HalfLifeSeconds": "float64", - "HalfLifeErrorSeconds": "float64", - "Spin": "string", - "DiscoveryYear": "Int64", - "DecayModes": "string", - } + data_types = { + "Symbol": "string", + "A": "Int64", + "Z": "Int64", + "N": "Int64", + "Experimental": "boolean", + # "State": "Int64", + "NUBASEMassExcess": "float64", + "NUBASEMassExcessError": "float64", + # "IsomerEnergy": "float64", + # "IsomerEnergyError": "float64", + "HalfLifeValue": "float64", + "HalfLifeUnit": "string", + "HalfLifeError": "float64", + "HalfLifeSeconds": "float64", + "HalfLifeErrorSeconds": "float64", + "Spin": "string", + "DiscoveryYear": "Int64", + "DecayModes": "string", + } + + # The discovery year was added after 2003, and I assume it will be there in the future, so we will set up + # as if it is always present and delete for the first two tables. + if self.year == 1995 or self.year == 2003: + data_types.pop("DiscoveryYear") + + return data_types def _na_values(self) -> dict: """Set the columns that have placeholder values""" match self.year: case 1995: return { + "State": [""], "NUBASEMassExcess": [""], "NUBASEMassExcessError": [""], - "State": [""], "HalfLifeValue": [""], "HalfLifeUnit": [""], "HalfLifeError": [""], From 9547733ee9c63ff29c2a06338748e8197dca4211 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 29 Mar 2026 18:27:15 +0100 Subject: [PATCH 04/26] AME mass file inheritance refactor --- src/nuclearmasses/io/ame_mass_file.py | 9 +++------ src/nuclearmasses/io/ame_mass_parse.py | 5 +++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/nuclearmasses/io/ame_mass_file.py b/src/nuclearmasses/io/ame_mass_file.py index 3acb33d..0e9b7fc 100644 --- a/src/nuclearmasses/io/ame_mass_file.py +++ b/src/nuclearmasses/io/ame_mass_file.py @@ -1,11 +1,8 @@ -from nuclearmasses.utils.converter import Converter - - -class AMEMassFile(Converter): +class AMEMassFile: """Easy access to the variables in the AME mass file.""" - def __init__(self, year: int): - super().__init__() + def __init__(self, year: int, **kwargs): + super().__init__(**kwargs) match year: case 1983: self.HEADER = 35 diff --git a/src/nuclearmasses/io/ame_mass_parse.py b/src/nuclearmasses/io/ame_mass_parse.py index 371cb1d..8c11616 100644 --- a/src/nuclearmasses/io/ame_mass_parse.py +++ b/src/nuclearmasses/io/ame_mass_parse.py @@ -4,9 +4,10 @@ import pandas as pd from nuclearmasses.io.ame_mass_file import AMEMassFile +from nuclearmasses.utils.converter import Converter -class AMEMassParser(AMEMassFile): +class AMEMassParser(AMEMassFile, Converter): """Parse the AME mass file. The format is known but the provided string does not match all lines. @@ -16,9 +17,9 @@ class AMEMassParser(AMEMassFile): def __init__(self, filename: pathlib.Path, year: int): """Set the file to read and table year""" + super().__init__(year=year) self.filename: pathlib.Path = filename self.year: int = year - super().__init__(self.year) logging.info(f"Reading {self.filename} from {self.year}") def _column_names(self) -> list[str]: From 132073d7d18539018a3ddc5d7d00683a8b71ea8f Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 29 Mar 2026 18:29:45 +0100 Subject: [PATCH 05/26] AME reaction file 1 inheritance refactor --- src/nuclearmasses/io/ame_reaction_1_file.py | 9 +++------ src/nuclearmasses/io/ame_reaction_1_parse.py | 5 +++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/nuclearmasses/io/ame_reaction_1_file.py b/src/nuclearmasses/io/ame_reaction_1_file.py index 0e0ca64..9a0de75 100644 --- a/src/nuclearmasses/io/ame_reaction_1_file.py +++ b/src/nuclearmasses/io/ame_reaction_1_file.py @@ -1,12 +1,9 @@ -from nuclearmasses.utils.converter import Converter - - -class AMEReactionFileOne(Converter): +class AMEReactionFileOne: """Easy access to the variables in the first AME reaction file.""" - def __init__(self, year: int): + def __init__(self, year: int, **kwargs): """Setup the values that locate the variable.""" - super().__init__() + super().__init__(**kwargs) match year: case 1983: self.HEADER = 30 diff --git a/src/nuclearmasses/io/ame_reaction_1_parse.py b/src/nuclearmasses/io/ame_reaction_1_parse.py index 943be64..f8bc57f 100644 --- a/src/nuclearmasses/io/ame_reaction_1_parse.py +++ b/src/nuclearmasses/io/ame_reaction_1_parse.py @@ -4,9 +4,10 @@ import pandas as pd from nuclearmasses.io.ame_reaction_1_file import AMEReactionFileOne +from nuclearmasses.utils.converter import Converter -class AMEReactionParserOne(AMEReactionFileOne): +class AMEReactionParserOne(AMEReactionFileOne, Converter): """Parse the first AME reaction file. The format is known but I don't think python can easily parse it. @@ -14,9 +15,9 @@ class AMEReactionParserOne(AMEReactionFileOne): def __init__(self, filename: pathlib.Path, year: int): """Set the file to read and table year.""" + super().__init__(year=year) self.filename = filename self.year = year - super().__init__(self.year) logging.info(f"Reading {self.filename} from {self.year}") def _column_names(self) -> list[str]: From 5f98806fa0553877b7d6a1f5d7e5bdd8050c749d Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 29 Mar 2026 18:33:31 +0100 Subject: [PATCH 06/26] AME reaction file 2 inheritance refactor --- src/nuclearmasses/io/ame_reaction_2_file.py | 9 +++------ src/nuclearmasses/io/ame_reaction_2_parse.py | 5 +++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/nuclearmasses/io/ame_reaction_2_file.py b/src/nuclearmasses/io/ame_reaction_2_file.py index 871d95b..8aa5263 100644 --- a/src/nuclearmasses/io/ame_reaction_2_file.py +++ b/src/nuclearmasses/io/ame_reaction_2_file.py @@ -1,11 +1,8 @@ -from nuclearmasses.utils.converter import Converter - - -class AMEReactionFileTwo(Converter): +class AMEReactionFileTwo: """Easy access to the variables in the second AME reaction file.""" - def __init__(self, year: int): - super().__init__() + def __init__(self, year: int, **kwargs): + super().__init__(**kwargs) match year: case 1983: self.HEADER = 30 diff --git a/src/nuclearmasses/io/ame_reaction_2_parse.py b/src/nuclearmasses/io/ame_reaction_2_parse.py index 27a2f51..5c273f0 100644 --- a/src/nuclearmasses/io/ame_reaction_2_parse.py +++ b/src/nuclearmasses/io/ame_reaction_2_parse.py @@ -4,9 +4,10 @@ import pandas as pd from nuclearmasses.io.ame_reaction_2_file import AMEReactionFileTwo +from nuclearmasses.utils.converter import Converter -class AMEReactionParserTwo(AMEReactionFileTwo): +class AMEReactionParserTwo(AMEReactionFileTwo, Converter): """Parse the second AME reaction file. The format is known but I don't think python can easily parse it. @@ -14,9 +15,9 @@ class AMEReactionParserTwo(AMEReactionFileTwo): def __init__(self, filename: pathlib.Path, year: int): """Set the file to read and table year.""" + super().__init__(year=year) self.filename = filename self.year = year - super().__init__(self.year) logging.info(f"Reading {self.filename} from {self.year}") def _column_names(self) -> list[str]: From c229570be777dcfc9d704ce46e4f9eed1cc1ccc1 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 29 Mar 2026 18:39:46 +0100 Subject: [PATCH 07/26] Remove redundant year checks We setup to help our future selves if columns every changed. We know they currently don't so there is no need to check the year if we will always do the same thing. Use the help function we created to convert Z to symbol, rather than the raw dictionary access. --- src/nuclearmasses/io/ame_mass_parse.py | 84 ++++++++++++-------------- 1 file changed, 39 insertions(+), 45 deletions(-) diff --git a/src/nuclearmasses/io/ame_mass_parse.py b/src/nuclearmasses/io/ame_mass_parse.py index 8c11616..5b476a2 100644 --- a/src/nuclearmasses/io/ame_mass_parse.py +++ b/src/nuclearmasses/io/ame_mass_parse.py @@ -24,56 +24,50 @@ def __init__(self, filename: pathlib.Path, year: int): def _column_names(self) -> list[str]: """Set the column name depending on the year""" - match self.year: - case _: - return [ - "Z", - "A", - "AMEMassExcess", - "AMEMassExcessError", - "BindingEnergyPerA", - "BindingEnergyPerAError", - "BetaDecayEnergy", - "BetaDecayEnergyError", - "AtomicNumber", - "AtomicMass", - "AtomicMassError", - ] + return [ + "Z", + "A", + "AMEMassExcess", + "AMEMassExcessError", + "BindingEnergyPerA", + "BindingEnergyPerAError", + "BetaDecayEnergy", + "BetaDecayEnergyError", + "AtomicNumber", + "AtomicMass", + "AtomicMassError", + ] def _data_types(self) -> dict: """Set the data type depending on the year""" - match self.year: - case _: - return { - "TableYear": "Int64", - "Symbol": "string", - "N": "Int64", - "Z": "Int64", - "A": "Int64", - "AMEMassExcess": "float64", - "AMEMassExcessError": "float64", - "BindingEnergyPerA": "float64", - "BindingEnergyPerAError": "float64", - "BetaDecayEnergy": "float64", - "BetaDecayEnergyError": "float64", - "AtomicMass": "float64", - "AtomicMassError": "float64", - } + return { + "TableYear": "Int64", + "Symbol": "string", + "N": "Int64", + "Z": "Int64", + "A": "Int64", + "AMEMassExcess": "float64", + "AMEMassExcessError": "float64", + "BindingEnergyPerA": "float64", + "BindingEnergyPerAError": "float64", + "BetaDecayEnergy": "float64", + "BetaDecayEnergyError": "float64", + "AtomicMass": "float64", + "AtomicMassError": "float64", + } def _na_values(self) -> dict: """Set the columns that have placeholder values""" - match self.year: - case 1983: - return { - "A": [""], - "BetaDecayEnergy": ["", "*"], - "BetaDecayEnergyError": ["", "*"], - } - case _: - return { - "BetaDecayEnergy": ["", "*"], - "BetaDecayEnergyError": ["", "*"], - } + na_vals = { + "A": [""], + "BetaDecayEnergy": ["", "*"], + "BetaDecayEnergyError": ["", "*"], + } + + if self.year != 1983: + na_vals.pop("A") + + return na_vals def read_file(self) -> pd.DataFrame: """Read the file using it's known format @@ -123,6 +117,6 @@ def read_file(self) -> pd.DataFrame: df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) - df["Symbol"] = pd.to_numeric(df["Z"]).map(self.z_to_symbol) + df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) return df.astype(self._data_types()) From f182c82872eebf6af321ee6af99b1be85425beb2 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 29 Mar 2026 18:43:17 +0100 Subject: [PATCH 08/26] Use the function rather than raw dictionary access --- src/nuclearmasses/io/ame_reaction_1_parse.py | 2 +- src/nuclearmasses/io/ame_reaction_2_parse.py | 2 +- src/nuclearmasses/io/nubase_parse.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nuclearmasses/io/ame_reaction_1_parse.py b/src/nuclearmasses/io/ame_reaction_1_parse.py index f8bc57f..ff08ba0 100644 --- a/src/nuclearmasses/io/ame_reaction_1_parse.py +++ b/src/nuclearmasses/io/ame_reaction_1_parse.py @@ -114,6 +114,6 @@ def read_file(self) -> pd.DataFrame: df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) - df["Symbol"] = pd.to_numeric(df["Z"]).map(self.z_to_symbol) + df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) return df.astype(self._data_types()) diff --git a/src/nuclearmasses/io/ame_reaction_2_parse.py b/src/nuclearmasses/io/ame_reaction_2_parse.py index 5c273f0..97c13af 100644 --- a/src/nuclearmasses/io/ame_reaction_2_parse.py +++ b/src/nuclearmasses/io/ame_reaction_2_parse.py @@ -118,6 +118,6 @@ def read_file(self) -> pd.DataFrame: # Repeated column heading also means we have to cast to create new columns df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) - df["Symbol"] = pd.to_numeric(df["Z"]).map(self.z_to_symbol) + df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) return df.astype(self._data_types()) diff --git a/src/nuclearmasses/io/nubase_parse.py b/src/nuclearmasses/io/nubase_parse.py index 63d800e..f04cbd7 100644 --- a/src/nuclearmasses/io/nubase_parse.py +++ b/src/nuclearmasses/io/nubase_parse.py @@ -182,6 +182,6 @@ def read_file(self) -> pd.DataFrame: df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) - df["Symbol"] = pd.to_numeric(df["Z"]).map(self.z_to_symbol) + df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) return df.astype(self._data_types()) From 181b92244da74259c40e9a3dd391e1d9b26f837a Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 29 Mar 2026 18:47:22 +0100 Subject: [PATCH 09/26] As with c229570 remove redundant checks --- src/nuclearmasses/io/ame_reaction_1_parse.py | 106 +++++++++---------- src/nuclearmasses/io/ame_reaction_2_parse.py | 106 +++++++++---------- 2 files changed, 100 insertions(+), 112 deletions(-) diff --git a/src/nuclearmasses/io/ame_reaction_1_parse.py b/src/nuclearmasses/io/ame_reaction_1_parse.py index ff08ba0..f432b65 100644 --- a/src/nuclearmasses/io/ame_reaction_1_parse.py +++ b/src/nuclearmasses/io/ame_reaction_1_parse.py @@ -22,68 +22,62 @@ def __init__(self, filename: pathlib.Path, year: int): def _column_names(self) -> list[str]: """Set the column name depending on the year""" - match self.year: - case _: - return [ - "A", - "Z", - "TwoNeutronSeparationEnergy", - "TwoNeutronSeparationEnergyError", - "TwoProtonSeparationEnergy", - "TwoProtonSeparationEnergyError", - "QAlpha", - "QAlphaError", - "QTwoBeta", - "QTwoBetaError", - "QEpsilon", - "QEpsilonError", - "QBetaNeutron", - "QBetaNeutronError", - ] + return [ + "A", + "Z", + "TwoNeutronSeparationEnergy", + "TwoNeutronSeparationEnergyError", + "TwoProtonSeparationEnergy", + "TwoProtonSeparationEnergyError", + "QAlpha", + "QAlphaError", + "QTwoBeta", + "QTwoBetaError", + "QEpsilon", + "QEpsilonError", + "QBetaNeutron", + "QBetaNeutronError", + ] def _data_types(self) -> dict: """Set the data type depending on the year""" - match self.year: - case _: - return { - "TableYear": "Int64", - "Symbol": "string", - "A": "Int64", - "Z": "Int64", - "N": "Int64", - "TwoNeutronSeparationEnergy": "float64", - "TwoNeutronSeparationEnergyError": "float64", - "TwoProtonSeparationEnergy": "float64", - "TwoProtonSeparationEnergyError": "float64", - "QAlpha": "float64", - "QAlphaError": "float64", - "QTwoBeta": "float64", - "QTwoBetaError": "float64", - "QEpsilon": "float64", - "QEpsilonError": "float64", - "QBetaNeutron": "float64", - "QBetaNeutronError": "float64", - } + return { + "TableYear": "Int64", + "Symbol": "string", + "A": "Int64", + "Z": "Int64", + "N": "Int64", + "TwoNeutronSeparationEnergy": "float64", + "TwoNeutronSeparationEnergyError": "float64", + "TwoProtonSeparationEnergy": "float64", + "TwoProtonSeparationEnergyError": "float64", + "QAlpha": "float64", + "QAlphaError": "float64", + "QTwoBeta": "float64", + "QTwoBetaError": "float64", + "QEpsilon": "float64", + "QEpsilonError": "float64", + "QBetaNeutron": "float64", + "QBetaNeutronError": "float64", + } def _na_values(self) -> dict: """Set the columns that have placeholder values""" - match self.year: - case _: - return { - "A": [""], - "TwoNeutronSeparationEnergy": ["", "*"], - "TwoNeutronSeparationEnergyError": ["", "*"], - "TwoProtonSeparationEnergy": ["", "*"], - "TwoProtonSeparationEnergyError": ["", "*"], - "QAlpha": ["", "*"], - "QAlphaError": ["", "*"], - "QTwoBeta": ["", "*"], - "QTwoBetaError": ["", "*"], - "QEpsilon": ["", "*"], - "QEpsilonError": ["", "*"], - "QBetaNeutron": ["", "*"], - "QBetaNeutronError": ["", "*"], - } + return { + "A": [""], + "TwoNeutronSeparationEnergy": ["", "*"], + "TwoNeutronSeparationEnergyError": ["", "*"], + "TwoProtonSeparationEnergy": ["", "*"], + "TwoProtonSeparationEnergyError": ["", "*"], + "QAlpha": ["", "*"], + "QAlphaError": ["", "*"], + "QTwoBeta": ["", "*"], + "QTwoBetaError": ["", "*"], + "QEpsilon": ["", "*"], + "QEpsilonError": ["", "*"], + "QBetaNeutron": ["", "*"], + "QBetaNeutronError": ["", "*"], + } def read_file(self) -> pd.DataFrame: """Read the file using it's known format diff --git a/src/nuclearmasses/io/ame_reaction_2_parse.py b/src/nuclearmasses/io/ame_reaction_2_parse.py index 97c13af..7d03811 100644 --- a/src/nuclearmasses/io/ame_reaction_2_parse.py +++ b/src/nuclearmasses/io/ame_reaction_2_parse.py @@ -22,68 +22,62 @@ def __init__(self, filename: pathlib.Path, year: int): def _column_names(self) -> list[str]: """Set the column name depending on the year""" - match self.year: - case _: - return [ - "A", - "Z", - "OneNeutronSeparationEnergy", - "OneNeutronSeparationEnergyError", - "OneProtonSeparationEnergy", - "OneProtonSeparationEnergyError", - "QFourBeta", - "QFourBetaError", - "QDeuteronAlpha", - "QDeuteronAlphaError", - "QProtonAlpha", - "QProtonAlphaError", - "QNeutronAlpha", - "QNeutronAlphaError", - ] + return [ + "A", + "Z", + "OneNeutronSeparationEnergy", + "OneNeutronSeparationEnergyError", + "OneProtonSeparationEnergy", + "OneProtonSeparationEnergyError", + "QFourBeta", + "QFourBetaError", + "QDeuteronAlpha", + "QDeuteronAlphaError", + "QProtonAlpha", + "QProtonAlphaError", + "QNeutronAlpha", + "QNeutronAlphaError", + ] def _data_types(self) -> dict: """Set the data type depending on the year""" - match self.year: - case _: - return { - "TableYear": "Int64", - "Symbol": "string", - "A": "Int64", - "Z": "Int64", - "N": "Int64", - "OneNeutronSeparationEnergy": "float64", - "OneNeutronSeparationEnergyError": "float64", - "OneProtonSeparationEnergy": "float64", - "OneProtonSeparationEnergyError": "float64", - "QFourBeta": "float64", - "QFourBetaError": "float64", - "QDeuteronAlpha": "float64", - "QDeuteronAlphaError": "float64", - "QProtonAlpha": "float64", - "QProtonAlphaError": "float64", - "QNeutronAlpha": "float64", - "QNeutronAlphaError": "float64", - } + return { + "TableYear": "Int64", + "Symbol": "string", + "A": "Int64", + "Z": "Int64", + "N": "Int64", + "OneNeutronSeparationEnergy": "float64", + "OneNeutronSeparationEnergyError": "float64", + "OneProtonSeparationEnergy": "float64", + "OneProtonSeparationEnergyError": "float64", + "QFourBeta": "float64", + "QFourBetaError": "float64", + "QDeuteronAlpha": "float64", + "QDeuteronAlphaError": "float64", + "QProtonAlpha": "float64", + "QProtonAlphaError": "float64", + "QNeutronAlpha": "float64", + "QNeutronAlphaError": "float64", + } def _na_values(self) -> dict: """Set the columns that have placeholder values""" - match self.year: - case _: - return { - "A": [""], - "OneNeutronSeparationEnergy": ["", "*"], - "OneNeutronSeparationEnergyError": ["", "*"], - "OneProtonSeparationEnergy": ["", "*"], - "OneProtonSeparationEnergyError": ["", "*"], - "QFourBeta": ["", "*"], - "QFourBetaError": ["", "*"], - "QDeuteronAlpha": ["", "*"], - "QDeuteronAlphaError": ["", "*"], - "QProtonAlpha": ["", "*"], - "QProtonAlphaError": ["", "*"], - "QNeutronAlpha": ["", "*"], - "QNeutronAlphaError": ["", "*"], - } + return { + "A": [""], + "OneNeutronSeparationEnergy": ["", "*"], + "OneNeutronSeparationEnergyError": ["", "*"], + "OneProtonSeparationEnergy": ["", "*"], + "OneProtonSeparationEnergyError": ["", "*"], + "QFourBeta": ["", "*"], + "QFourBetaError": ["", "*"], + "QDeuteronAlpha": ["", "*"], + "QDeuteronAlphaError": ["", "*"], + "QProtonAlpha": ["", "*"], + "QProtonAlphaError": ["", "*"], + "QNeutronAlpha": ["", "*"], + "QNeutronAlphaError": ["", "*"], + } def read_file(self) -> pd.DataFrame: """Read the file using it's known format From ff2cf90aa6d66120e741ef1dae8f9c1bd4d56db2 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 29 Mar 2026 19:41:44 +0100 Subject: [PATCH 10/26] Refactor NUBASE parsing into a dedicated class The original parser class still reads a year's worth of data, with this new class doing the aggregation. --- src/nuclearmasses/io/nubase.py | 38 +++++++++++++++++++++++++++++++++ src/nuclearmasses/mass_table.py | 29 +++---------------------- 2 files changed, 41 insertions(+), 26 deletions(-) create mode 100644 src/nuclearmasses/io/nubase.py diff --git a/src/nuclearmasses/io/nubase.py b/src/nuclearmasses/io/nubase.py new file mode 100644 index 0000000..6ce6526 --- /dev/null +++ b/src/nuclearmasses/io/nubase.py @@ -0,0 +1,38 @@ +import pathlib + +import pandas as pd + +from nuclearmasses.io.nubase_parse import NUBASEParser + + +class NUBASE: + """Top level storage and functionality for NUBASE data""" + + def __init__(self, data_path: pathlib.Path): + super().__init__() + self.data_path = data_path + self.years = [1995, 2003, 2012, 2016, 2020] + self.nubase_df = pd.concat([self.parse_year(y) for y in self.years], ignore_index=True) + + def get_datafile(self, year: int) -> pathlib.Path: + """Use the given year to locate the NUBASE mass table file and return the absolute path.""" + nubase_mass = self.data_path / pathlib.Path(str(year)) + nubase_mass = nubase_mass.resolve() + + match year: + case 1995: + nubase_mass = nubase_mass / "nubtab97.asc" + case 2003: + nubase_mass = nubase_mass / "nubtab03.asc" + case 2012: + nubase_mass = nubase_mass / "nubtab12.asc" + case 2016: + nubase_mass = nubase_mass / "nubase2016.txt" + case 2020: + nubase_mass = nubase_mass / "nubase_1.mas20" + + return nubase_mass + + def parse_year(self, year: int) -> pd.DataFrame: + """Parse the file of the given ``year``""" + return NUBASEParser(filename=self.get_datafile(year), year=year).read_file() diff --git a/src/nuclearmasses/mass_table.py b/src/nuclearmasses/mass_table.py index a86515c..a5bb8d8 100644 --- a/src/nuclearmasses/mass_table.py +++ b/src/nuclearmasses/mass_table.py @@ -6,7 +6,8 @@ from nuclearmasses.io.ame_mass_parse import AMEMassParser from nuclearmasses.io.ame_reaction_1_parse import AMEReactionParserOne from nuclearmasses.io.ame_reaction_2_parse import AMEReactionParserTwo -from nuclearmasses.io.nubase_parse import NUBASEParser + +from nuclearmasses.io.nubase import NUBASE class MassTable: @@ -18,32 +19,12 @@ class MassTable: def __init__(self): """Do all of the work at construction.""" self.data_path = importlib.resources.files("nuclearmasses.data") - self.nubase_years = [1995, 2003, 2012, 2016, 2020] - self.nubase = pd.concat([self._parse_nubase_data(y) for y in self.nubase_years], ignore_index=True) + self.nubase = NUBASE(self.data_path).nubase_df self.ame_years = [1983, 1993, 1995, 2003, 2012, 2016, 2020] self.ame = pd.concat([self._parse_ame_data(y) for y in self.ame_years], ignore_index=True) self.full_data = self._combine_all_data() self._do_indexing() - def _get_nubase_datafile(self, year: int) -> pathlib.Path: - """Use the given year to locate the NUBASE mass table file and return the absolute path.""" - nubase_mass = self.data_path / pathlib.Path(str(year)) - nubase_mass = nubase_mass.resolve() - - match year: - case 1995: - nubase_mass = nubase_mass / "nubtab97.asc" - case 2003: - nubase_mass = nubase_mass / "nubtab03.asc" - case 2012: - nubase_mass = nubase_mass / "nubtab12.asc" - case 2016: - nubase_mass = nubase_mass / "nubase2016.txt" - case 2020: - nubase_mass = nubase_mass / "nubase_1.mas20" - - return nubase_mass - def _get_ame_datafiles(self, year: int) -> tuple[pathlib.Path, pathlib.Path, pathlib.Path]: """Use the given year to locate the 3 AME data file and return the absolute paths.""" data_dir = self.data_path / pathlib.Path(str(year)) @@ -81,10 +62,6 @@ def _get_ame_datafiles(self, year: int) -> tuple[pathlib.Path, pathlib.Path, pat return ame_mass, ame_reaction_1, ame_reaction_2 - def _parse_nubase_data(self, year: int) -> pd.DataFrame: - """Get the NUBASE for the given year as a pandas.DataFrame.""" - return NUBASEParser(self._get_nubase_datafile(year), year).read_file() - def _parse_ame_data(self, year: int) -> pd.DataFrame: """Combine all the AME files from the given year into a pandas.DataFrame.""" ame_mass, ame_reaction_1, ame_reaction_2 = self._get_ame_datafiles(year) From a48f11a4082035f31d5a9aebf57584d47a2f2189 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Wed, 1 Apr 2026 15:24:22 +0100 Subject: [PATCH 11/26] Remove isort config in favour of ruff Not sure why they are both present, and some changes applied in the current branch were causing them to argue so lets stop using isort. --- pyproject.toml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index da1a893..0d481d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,9 +75,6 @@ omit = [ "tests/*" ] -[tool.isort] -known_first_party = ["nuclearmasses"] - [tool.ruff] line-length = 120 @@ -86,6 +83,8 @@ line-length = 120 [tool.ruff.lint.isort] known-first-party = ["nuclearmasses"] +from-first = false +order-by-type = true force-sort-within-sections = true [tool.ruff.lint] From 3cfc7de51e3d9dde09d0f52681951eed78dece34 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Wed, 1 Apr 2026 18:48:57 +0100 Subject: [PATCH 12/26] Update ruff sorting config Convention seems to be to use the src directory so we will follow that. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0d481d7..73c9cc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,7 @@ line-length = 120 # Nothing different from defaults [tool.ruff.lint.isort] -known-first-party = ["nuclearmasses"] +known-first-party = ["src"] from-first = false order-by-type = true force-sort-within-sections = true From 70fd5e5c2395cc26d45caac381c7b4ac5444f588 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Wed, 1 Apr 2026 18:52:16 +0100 Subject: [PATCH 13/26] Generalise read_fwf for multiple input formats A refactor somewhere else opened up quite the can of worms in relation to the data types output by importlib.resources and mypy checking. We can now use read_fwf for all the types that are used by the core parsing and are hopefully robust if a user tries to parse their own file. --- src/nuclearmasses/io/ame_mass_parse.py | 22 +++++++++++---- src/nuclearmasses/io/ame_reaction_1_parse.py | 9 +++--- src/nuclearmasses/io/ame_reaction_2_parse.py | 9 +++--- src/nuclearmasses/io/nubase_parse.py | 29 +++++++++++++++----- src/nuclearmasses/utils/converter.py | 28 +++++++++++++++++++ 5 files changed, 75 insertions(+), 22 deletions(-) diff --git a/src/nuclearmasses/io/ame_mass_parse.py b/src/nuclearmasses/io/ame_mass_parse.py index 5b476a2..1ce5f43 100644 --- a/src/nuclearmasses/io/ame_mass_parse.py +++ b/src/nuclearmasses/io/ame_mass_parse.py @@ -1,10 +1,9 @@ import logging -import pathlib import pandas as pd from nuclearmasses.io.ame_mass_file import AMEMassFile -from nuclearmasses.utils.converter import Converter +from nuclearmasses.utils.converter import Converter, DataInput class AMEMassParser(AMEMassFile, Converter): @@ -15,10 +14,10 @@ class AMEMassParser(AMEMassFile, Converter): read the columns are interested in. """ - def __init__(self, filename: pathlib.Path, year: int): + def __init__(self, filename: DataInput, year: int): """Set the file to read and table year""" super().__init__(year=year) - self.filename: pathlib.Path = filename + self.filename: DataInput = filename self.year: int = year logging.info(f"Reading {self.filename} from {self.year}") @@ -69,6 +68,18 @@ def _na_values(self) -> dict: return na_vals + def calculate_relative_error(self, raw_df) -> pd.DataFrame: + """Calculate the relative error of the mass excess + + 12C has a 0.0 +/- 0.0 mass excess definition by definition so ensure that is still true. + """ + raw_df["AMERelativeError"] = abs( + raw_df["AMEMassExcessError"].astype(float) / raw_df["AMEMassExcess"].astype(float) + ) + raw_df.loc[(raw_df.Z == 6) & (raw_df.A == 12), "AMERelativeError"] = 0.0 + + return raw_df + def read_file(self) -> pd.DataFrame: """Read the file using it's known format @@ -76,7 +87,7 @@ def read_file(self) -> pd.DataFrame: column names, data types and locations of the date so we can now make the generic call to parse the file. """ - df = pd.read_fwf( + df = Converter.read_fwf( self.filename, colspecs=self.column_limits, names=self._column_names(), @@ -114,6 +125,7 @@ def read_file(self) -> pd.DataFrame: # We need to rescale the error value because we combined the two columns above df = df.assign(AtomicMassError=df["AtomicMassError"].astype(float) / 1.0e6) + df = self.calculate_relative_error(df) df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) diff --git a/src/nuclearmasses/io/ame_reaction_1_parse.py b/src/nuclearmasses/io/ame_reaction_1_parse.py index f432b65..c4be262 100644 --- a/src/nuclearmasses/io/ame_reaction_1_parse.py +++ b/src/nuclearmasses/io/ame_reaction_1_parse.py @@ -1,10 +1,9 @@ import logging -import pathlib import pandas as pd from nuclearmasses.io.ame_reaction_1_file import AMEReactionFileOne -from nuclearmasses.utils.converter import Converter +from nuclearmasses.utils.converter import Converter, DataInput class AMEReactionParserOne(AMEReactionFileOne, Converter): @@ -13,10 +12,10 @@ class AMEReactionParserOne(AMEReactionFileOne, Converter): The format is known but I don't think python can easily parse it. """ - def __init__(self, filename: pathlib.Path, year: int): + def __init__(self, filename: DataInput, year: int): """Set the file to read and table year.""" super().__init__(year=year) - self.filename = filename + self.filename: DataInput = filename self.year = year logging.info(f"Reading {self.filename} from {self.year}") @@ -86,7 +85,7 @@ def read_file(self) -> pd.DataFrame: column names, data types and locations of the date so we can now make the generic call to parse the file. """ - df = pd.read_fwf( + df = Converter.read_fwf( self.filename, colspecs=self.column_limits, names=self._column_names(), diff --git a/src/nuclearmasses/io/ame_reaction_2_parse.py b/src/nuclearmasses/io/ame_reaction_2_parse.py index 7d03811..d6990d8 100644 --- a/src/nuclearmasses/io/ame_reaction_2_parse.py +++ b/src/nuclearmasses/io/ame_reaction_2_parse.py @@ -1,10 +1,9 @@ import logging -import pathlib import pandas as pd from nuclearmasses.io.ame_reaction_2_file import AMEReactionFileTwo -from nuclearmasses.utils.converter import Converter +from nuclearmasses.utils.converter import Converter, DataInput class AMEReactionParserTwo(AMEReactionFileTwo, Converter): @@ -13,10 +12,10 @@ class AMEReactionParserTwo(AMEReactionFileTwo, Converter): The format is known but I don't think python can easily parse it. """ - def __init__(self, filename: pathlib.Path, year: int): + def __init__(self, filename: DataInput, year: int): """Set the file to read and table year.""" super().__init__(year=year) - self.filename = filename + self.filename: DataInput = filename self.year = year logging.info(f"Reading {self.filename} from {self.year}") @@ -86,7 +85,7 @@ def read_file(self) -> pd.DataFrame: column names, data types and locations of the date so we can now make the generic call to parse the file. """ - df = pd.read_fwf( + df = Converter.read_fwf( self.filename, colspecs=self.column_limits, names=self._column_names(), diff --git a/src/nuclearmasses/io/nubase_parse.py b/src/nuclearmasses/io/nubase_parse.py index f04cbd7..3fbe04b 100644 --- a/src/nuclearmasses/io/nubase_parse.py +++ b/src/nuclearmasses/io/nubase_parse.py @@ -1,11 +1,10 @@ import logging -import pathlib import typing import pandas as pd from nuclearmasses.io.nubase_file import NUBASEFile -from nuclearmasses.utils.converter import Converter +from nuclearmasses.utils.converter import Converter, DataInput class NUBASEParser(NUBASEFile, Converter): @@ -14,10 +13,10 @@ class NUBASEParser(NUBASEFile, Converter): A collection of functions to parse the weird format of the NUBASE file. """ - def __init__(self, filename: pathlib.Path, year: int): + def __init__(self, filename: DataInput, year: int): """Set the file to read and the table year.""" super().__init__(year=year) - self.filename: pathlib.Path = filename + self.filename: DataInput = filename self.year: int = year self.unit_replacements: dict[str, str] = { r"y$": "yr", @@ -113,8 +112,6 @@ def parse_half_life(self, raw_df) -> pd.DataFrame: any type of sorting or algorithm. Convert to the SI unit of seconds, but don't overwrite original columns. """ # Convert stable isotopes into ones with enormous lifetimes with zero error so we can cast - # pandas v3 became much stricter with type conversions so convert to object (from string) so - # we can assign a float without breaking other parts of the code raw_df["HalfLifeValue"] = raw_df["HalfLifeValue"].astype("object") raw_df["HalfLifeError"] = raw_df["HalfLifeError"].astype("object") @@ -127,6 +124,7 @@ def parse_half_life(self, raw_df) -> pd.DataFrame: # Use the 3 half-life columns to create 2 new columns with units of seconds raw_df["HalfLifeUnit"] = raw_df["HalfLifeUnit"].astype("string") + # Bookkeeping: Tidy up know unusual units, i.e. y for years and m for minutes for pattern, replacement in self.unit_replacements.items(): raw_df["HalfLifeUnit"] = raw_df["HalfLifeUnit"].str.replace(pattern, replacement, regex=True) @@ -153,6 +151,18 @@ def parse_state(self, raw_df) -> pd.DataFrame: return raw_df + def calculate_relative_error(self, raw_df) -> pd.DataFrame: + """Calculate the relative error of the mass excess + + 12C has a 0.0 +/- 0.0 mass excess definition by definition so ensure that is still true. + """ + raw_df["NUBASERelativeError"] = abs( + raw_df["NUBASEMassExcessError"].astype(float) / raw_df["NUBASEMassExcess"].astype(float) + ) + raw_df.loc[(raw_df.Z == 6) & (raw_df.A == 12), "NUBASERelativeError"] = 0.0 + + return raw_df + def read_file(self) -> pd.DataFrame: """Read the file using it's known format @@ -160,7 +170,7 @@ def read_file(self) -> pd.DataFrame: column names, data types and locations of the date so we can now make the generic call to parse the file. """ - df = pd.read_fwf( + df = Converter.read_fwf( self.filename, colspecs=typing.cast(typing.Sequence[tuple[int, int]], self.column_limits), # appease mypy names=self._column_names(), @@ -179,6 +189,11 @@ def read_file(self) -> pd.DataFrame: df.replace("#", "", regex=True, inplace=True) df = self.parse_half_life(df) + df = self.calculate_relative_error(df) + + if self.year == 2012: + # 198Au has a typo in it's decay mode in the 2012 table. It is recorded as '-' + df.loc[(df.A == 198) & (df.Z == 79), "DecayModes"] = "B-" df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) diff --git a/src/nuclearmasses/utils/converter.py b/src/nuclearmasses/utils/converter.py index 1134797..effd9f4 100644 --- a/src/nuclearmasses/utils/converter.py +++ b/src/nuclearmasses/utils/converter.py @@ -1,7 +1,15 @@ +import importlib +from importlib.resources.abc import Traversable +import os +import typing + import astropy # type: ignore[import-untyped] import numpy as np import pandas as pd +# Typing hint Union for the different ways a file or data can be represented +DataInput = Traversable | os.PathLike[str] | str | typing.TextIO + class Converter: """A utility class for converting between symbol and Z value @@ -94,3 +102,23 @@ def unit_to_seconds(self, unit_str: str) -> float: return np.nan return float(unit.to(astropy.units.s)) + + @staticmethod + def read_fwf(base: DataInput, **kwargs): + """Overloaded version of pandas.read_fwf() that accepts more types + + Our use of importlib.resource means we have types that the pandas version of read_fwf does not accept. + It can still be used but some work is required. This function does that work, as well as some other checking + to make sure we can pass the necessary types into our parser classes. + """ + # A file like object + if hasattr(base, "read"): + return pd.read_fwf(base, **kwargs) # type: ignore[arg-type] + + # importlib.resource Traversable + if isinstance(base, Traversable): + with importlib.resources.as_file(base) as the_file: + return pd.read_fwf(the_file, **kwargs) + + # Filesystem path + return pd.read_fwf(base, **kwargs) From 0356c0d7e52e37e8e399f8d08082f2831d434c5e Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Wed, 1 Apr 2026 18:57:35 +0100 Subject: [PATCH 14/26] Add tests against the relative error calculations --- tests/test_ame_mass_parse.py | 7 +++++++ tests/test_nubase_parse.py | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/tests/test_ame_mass_parse.py b/tests/test_ame_mass_parse.py index 5ebba62..f2078e5 100644 --- a/tests/test_ame_mass_parse.py +++ b/tests/test_ame_mass_parse.py @@ -25,6 +25,7 @@ def test_1983_mass(): "N": [39], "AMEMassExcess": [-63742.471], "AMEMassExcessError": [19.056], + "AMERelativeError": [19.056 / 63742.471], "BindingEnergyPerA": [582618.683 / 67], "BindingEnergyPerAError": [19.066 / 67], "BetaDecayEnergy": [3560.871], @@ -56,6 +57,7 @@ def test_1993_mass(): "N": [41], "AMEMassExcess": [-46574.693], "AMEMassExcessError": [465.747], + "AMERelativeError": [465.747 / 46574.693], "BindingEnergyPerA": [567012.133 / 67], "BindingEnergyPerAError": [465.747 / 67], "BetaDecayEnergy": [8746.727], @@ -88,6 +90,7 @@ def test_1995_mass(): "N": [41], "AMEMassExcess": [-46574.693], "AMEMassExcessError": [465.747], + "AMERelativeError": [465.747 / 46574.693], "BindingEnergyPerA": [567012.133 / 67], "BindingEnergyPerAError": [465.747 / 67], "BetaDecayEnergy": [8746.727], @@ -119,6 +122,7 @@ def test_2003_mass(): "N": [41], "AMEMassExcess": [-45692.348], "AMEMassExcessError": [415.570], + "AMERelativeError": [415.570 / 45692.348], "BindingEnergyPerA": [8449.695], "BindingEnergyPerAError": [6.203], "BetaDecayEnergy": [9368.702], @@ -150,6 +154,7 @@ def test_2012_mass(): "N": [41], "AMEMassExcess": [-46068.530], "AMEMassExcessError": [217.972], + "AMERelativeError": [217.972 / 46068.530], "BindingEnergyPerA": [8455.310], "BindingEnergyPerAError": [3.253], "BetaDecayEnergy": [9253.245], @@ -181,6 +186,7 @@ def test_2016_mass(): "N": [41], "AMEMassExcess": [-45610.155], "AMEMassExcessError": [270.285], + "AMERelativeError": [270.285 / 45610.155], "BindingEnergyPerA": [8448.469], "BindingEnergyPerAError": [4.034], "BetaDecayEnergy": [9711.620], @@ -212,6 +218,7 @@ def test_2020_mass(): "N": [41], "AMEMassExcess": [-45708.416], "AMEMassExcessError": [3.819], + "AMERelativeError": [3.819 / 45708.416], "BindingEnergyPerA": [8449.9359], "BindingEnergyPerAError": [0.0570], "BetaDecayEnergy": [9613.3678], diff --git a/tests/test_nubase_parse.py b/tests/test_nubase_parse.py index 1989a66..08740f7 100644 --- a/tests/test_nubase_parse.py +++ b/tests/test_nubase_parse.py @@ -23,6 +23,7 @@ def test_1995_nubase(): "N": [101], "NUBASEMassExcess": [-60085], "NUBASEMassExcessError": [29], + "NUBASERelativeError": [29 / 60085], "HalfLifeValue": [2.99], "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], @@ -55,6 +56,7 @@ def test_2003_nubase(): "N": [101], "NUBASEMassExcess": [-60070], "NUBASEMassExcessError": [30], + "NUBASERelativeError": [30 / 60070], "HalfLifeValue": [2.99], "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], @@ -87,6 +89,7 @@ def test_2012_nubase(): "N": [101], "NUBASEMassExcess": [-60060], "NUBASEMassExcessError": [30], + "NUBASERelativeError": [30 / 60060], "HalfLifeValue": [2.99], "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], @@ -120,6 +123,7 @@ def test_2016_nubase(): "N": [101], "NUBASEMassExcess": [-60060], "NUBASEMassExcessError": [30], + "NUBASERelativeError": [30 / 60060], "HalfLifeValue": [2.99], "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], @@ -154,6 +158,7 @@ def test_2020_nubase(): "N": [101], "NUBASEMassExcess": [-60060], "NUBASEMassExcessError": [30], + "NUBASERelativeError": [30 / 60060], "HalfLifeValue": [2.99], "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], From 602b43acaad94ffaea2dad4eed7809ed4fb13f60 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Wed, 1 Apr 2026 20:57:29 +0100 Subject: [PATCH 15/26] Top level classes to deal with all AME and NUBASE data To make things as modular as possible, there is now the class to parse and individual file as well as these new classes to manage the different data sets as a whole. --- src/nuclearmasses/io/ame.py | 49 ++++++++++++++++++++++++++++++++++ src/nuclearmasses/io/nubase.py | 44 ++++++++++++++---------------- 2 files changed, 69 insertions(+), 24 deletions(-) create mode 100644 src/nuclearmasses/io/ame.py diff --git a/src/nuclearmasses/io/ame.py b/src/nuclearmasses/io/ame.py new file mode 100644 index 0000000..089cbc0 --- /dev/null +++ b/src/nuclearmasses/io/ame.py @@ -0,0 +1,49 @@ +from importlib.resources.abc import Traversable + +import pandas as pd + +from nuclearmasses.io.ame_mass_parse import AMEMassParser +from nuclearmasses.io.ame_reaction_1_parse import AMEReactionParserOne +from nuclearmasses.io.ame_reaction_2_parse import AMEReactionParserTwo + + +class AME: + """Top level storage and functionality for AME data""" + + def __init__(self, data_path: Traversable): + self.data_path = data_path + self.years: list[int] = [1983, 1993, 1995, 2003, 2012, 2016, 2020] + self.ame_files: list[tuple[str, str, str]] = [ + ("mass.mas83", "rct1.mas83", "rct2.mas83"), + ("mass_exp.mas93", "rct1_exp.mas93", "rct2_exp.mas93"), + ("mass_exp.mas95", "rct1_exp.mas95", "rct2_exp.mas95"), + ("mass.mas03", "rct1.mas03", "rct2.mas03"), + ("mass.mas12", "rct1.mas12", "rct2.mas12"), + ("mass16.txt", "rct1-16.txt", "rct2-16.txt"), + ("mass.mas20", "rct1.mas20", "rct2.mas20"), + ] + self.files: dict[int, tuple[str, str, str]] = dict(zip(self.years, self.ame_files, strict=True)) + self.ame_df: pd.DataFrame = self.parse_all_years() + + def get_datafiles(self, year: int) -> tuple[Traversable, Traversable, Traversable]: + """Use the given year to locate the 3 AME data file and return the absolute paths.""" + root = self.data_path / str(year) + mass, rct1, rct2 = self.files[year] + + return root / mass, root / rct1, root / rct2 + + def parse_year(self, year: int) -> pd.DataFrame: + """Combine all the AME files from the given ``year``""" + ame_mass, ame_reaction_1, ame_reaction_2 = self.get_datafiles(year) + + mass_df = AMEMassParser(filename=ame_mass, year=year).read_file() + rct1_df = AMEReactionParserOne(filename=ame_reaction_1, year=year).read_file() + rct2_df = AMEReactionParserTwo(filename=ame_reaction_2, year=year).read_file() + + # Merge all 3 of the AME dataframes into one + common_columns = ["A", "Z", "N", "TableYear", "Symbol"] + return mass_df.merge(rct1_df, on=common_columns, how="outer").merge(rct2_df, on=common_columns, how="outer") + + def parse_all_years(self) -> pd.DataFrame: + """Parse the files for all available years""" + return pd.concat((self.parse_year(y) for y in self.years), ignore_index=True) diff --git a/src/nuclearmasses/io/nubase.py b/src/nuclearmasses/io/nubase.py index 6ce6526..98d8e00 100644 --- a/src/nuclearmasses/io/nubase.py +++ b/src/nuclearmasses/io/nubase.py @@ -1,4 +1,4 @@ -import pathlib +from importlib.resources.abc import Traversable import pandas as pd @@ -8,31 +8,27 @@ class NUBASE: """Top level storage and functionality for NUBASE data""" - def __init__(self, data_path: pathlib.Path): - super().__init__() + def __init__(self, data_path: Traversable): self.data_path = data_path - self.years = [1995, 2003, 2012, 2016, 2020] - self.nubase_df = pd.concat([self.parse_year(y) for y in self.years], ignore_index=True) - - def get_datafile(self, year: int) -> pathlib.Path: - """Use the given year to locate the NUBASE mass table file and return the absolute path.""" - nubase_mass = self.data_path / pathlib.Path(str(year)) - nubase_mass = nubase_mass.resolve() - - match year: - case 1995: - nubase_mass = nubase_mass / "nubtab97.asc" - case 2003: - nubase_mass = nubase_mass / "nubtab03.asc" - case 2012: - nubase_mass = nubase_mass / "nubtab12.asc" - case 2016: - nubase_mass = nubase_mass / "nubase2016.txt" - case 2020: - nubase_mass = nubase_mass / "nubase_1.mas20" - - return nubase_mass + self.years: list[int] = [1995, 2003, 2012, 2016, 2020] + self.nubase_files: list[str] = [ + "nubtab97.asc", + "nubtab03.asc", + "nubtab12.asc", + "nubase2016.txt", + "nubase_1.mas20", + ] + self.files: dict[int, str] = dict(zip(self.years, self.nubase_files, strict=True)) + self.nubase_df: pd.DataFrame = self.parse_all_files() + + def get_datafile(self, year: int) -> Traversable: + """Use the given ``year`` to locate the NUBASE mass table file and return the absolute path.""" + return self.data_path / str(year) / self.files[year] def parse_year(self, year: int) -> pd.DataFrame: """Parse the file of the given ``year``""" return NUBASEParser(filename=self.get_datafile(year), year=year).read_file() + + def parse_all_files(self) -> pd.DataFrame: + """Parse the files for all available years""" + return pd.concat((self.parse_year(y) for y in self.years), ignore_index=True) From 31f5b6a68296190d97e33184b84730dfb1ad6a76 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Wed, 1 Apr 2026 21:00:21 +0100 Subject: [PATCH 16/26] Update the MassTable class to make use of the new structure --- src/nuclearmasses/mass_table.py | 168 +++++++++++++++----------------- tests/test_mass_table.py | 66 +------------ 2 files changed, 81 insertions(+), 153 deletions(-) diff --git a/src/nuclearmasses/mass_table.py b/src/nuclearmasses/mass_table.py index a5bb8d8..77c4755 100644 --- a/src/nuclearmasses/mass_table.py +++ b/src/nuclearmasses/mass_table.py @@ -1,12 +1,9 @@ import importlib.resources -import pathlib +import typing import pandas as pd -from nuclearmasses.io.ame_mass_parse import AMEMassParser -from nuclearmasses.io.ame_reaction_1_parse import AMEReactionParserOne -from nuclearmasses.io.ame_reaction_2_parse import AMEReactionParserTwo - +from nuclearmasses.io.ame import AME from nuclearmasses.io.nubase import NUBASE @@ -16,90 +13,83 @@ class MassTable: Internally there are separate dataframes for the NUBASE and AME data as well as a combined one for all data """ - def __init__(self): - """Do all of the work at construction.""" - self.data_path = importlib.resources.files("nuclearmasses.data") - self.nubase = NUBASE(self.data_path).nubase_df - self.ame_years = [1983, 1993, 1995, 2003, 2012, 2016, 2020] - self.ame = pd.concat([self._parse_ame_data(y) for y in self.ame_years], ignore_index=True) - self.full_data = self._combine_all_data() - self._do_indexing() - - def _get_ame_datafiles(self, year: int) -> tuple[pathlib.Path, pathlib.Path, pathlib.Path]: - """Use the given year to locate the 3 AME data file and return the absolute paths.""" - data_dir = self.data_path / pathlib.Path(str(year)) - data_dir = data_dir.resolve() - - match year: - case 1983: - ame_mass = data_dir / "mass.mas83" - ame_reaction_1 = data_dir / "rct1.mas83" - ame_reaction_2 = data_dir / "rct2.mas83" - case 1993: - ame_mass = data_dir / "mass_exp.mas93" - ame_reaction_1 = data_dir / "rct1_exp.mas93" - ame_reaction_2 = data_dir / "rct2_exp.mas93" - case 1995: - ame_mass = data_dir / "mass_exp.mas95" - ame_reaction_1 = data_dir / "rct1_exp.mas95" - ame_reaction_2 = data_dir / "rct2_exp.mas95" - case 2003: - ame_mass = data_dir / "mass.mas03" - ame_reaction_1 = data_dir / "rct1.mas03" - ame_reaction_2 = data_dir / "rct2.mas03" - case 2012: - ame_mass = data_dir / "mass.mas12" - ame_reaction_1 = data_dir / "rct1.mas12" - ame_reaction_2 = data_dir / "rct2.mas12" - case 2016: - ame_mass = data_dir / "mass16.txt" - ame_reaction_1 = data_dir / "rct1-16.txt" - ame_reaction_2 = data_dir / "rct2-16.txt" - case 2020: - ame_mass = data_dir / "mass.mas20" - ame_reaction_1 = data_dir / "rct1.mas20" - ame_reaction_2 = data_dir / "rct2.mas20" - - return ame_mass, ame_reaction_1, ame_reaction_2 - - def _parse_ame_data(self, year: int) -> pd.DataFrame: - """Combine all the AME files from the given year into a pandas.DataFrame.""" - ame_mass, ame_reaction_1, ame_reaction_2 = self._get_ame_datafiles(year) - - ame_mass_df = AMEMassParser(ame_mass, year).read_file() - - # Merge all 3 of the AME files/data frames into one - common_columns = ["A", "Z", "N", "TableYear", "Symbol"] - temp_df = ame_mass_df.merge(AMEReactionParserOne(ame_reaction_1, year).read_file(), on=common_columns) - return temp_df.merge(AMEReactionParserTwo(ame_reaction_2, year).read_file(), on=common_columns) + def __init__(self, df: pd.DataFrame | None = None, filters: list[tuple[str, str, typing.Any]] | None = None): + self._original_df: pd.DataFrame = self._parse_files() if df is None else df + self._filters: list[tuple[str, str, typing.Any]] = filters or [] + self._create_dynamic_getters() + + def __repr__(self) -> str: + """Make printing the class object show the DataFrame nicely""" + return repr(self.df) + + def __str__(self) -> str: + """Make printing the class object show the DataFrame nicely""" + return str(self.df) + + def __getattr__(self, attr: str) -> typing.Any: + """Delegate pandas methods for deeper chaining""" + if hasattr(self.df, attr): + return getattr(self.df, attr) + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") + + def __dir__(self): + """Pass the pandas api through so we can get autocomplete""" + return sorted(set(list(self.__dict__.keys()) + dir(type(self)) + dir(self.df))) + + # def __len__(self) -> int: + # return len(self.df) + # + # def __iter__(self): + # return iter(self.df) + # + # def __getitem__(self, key): + # return type(self)(df=self.df[key]) + + def get(self, column: str, value: typing.Any) -> typing.Self: + """Generic getter: all_data.get('A', 123)""" + new_filters = self._filters + [(column, "==", value)] + return type(self)(df=self._original_df, filters=new_filters) + + def filter(self, expr: str) -> typing.Self: + """Allow arbitrary pandas .query() expressions.""" + # For simplicity we still store as tuples; you could store raw expressions too + new_filters = self._filters + [("query", expr, None)] + return type(self)(df=self._original_df, filters=new_filters) + + def _parse_files(self) -> pd.DataFrame: + data_path = importlib.resources.files("nuclearmasses").joinpath("data") - def _combine_all_data(self) -> pd.DataFrame: - """Combine all NUBASE and AME data into a single pandas DataFrame.""" common_columns = ["A", "Z", "N", "TableYear", "Symbol"] - df = pd.merge(self.ame, self.nubase, on=common_columns, how="outer") - - df["NUBASERelativeError"] = abs(df["NUBASEMassExcessError"] / df["NUBASEMassExcess"]) - df["AMERelativeError"] = abs(df["AMEMassExcessError"] / df["AMEMassExcess"]) - - # 12C has a 0.0 +/ 0.0 mass excess by definition so calculating relative error -> NaN - # Set the value to 0.0 as that's what it is - mask = (df.Symbol == "C") & (df.A == 12) - df.loc[mask, "NUBASERelativeError"] = 0.0 - df.loc[mask, "AMERelativeError"] = 0.0 - - # 198Au has a typo in it's decay mode in the 2012 table. It is recorded as '-' - df.loc[(df.A == 198) & (df.Z == 79) & (df.TableYear == 2012), "DecayModes"] = "B-" - - return df - - def _do_indexing(self) -> None: - """ - Set the index of the dataframe to the table year. This is done in place so nothing is returned. - - param: Nothing - :return: Nothing - """ - self.nubase.set_index("TableYear", inplace=True) - self.ame.set_index("TableYear", inplace=True) - self.full_data.set_index("TableYear", inplace=True) + return pd.merge(AME(data_path).ame_df, NUBASE(data_path).nubase_df, on=common_columns, how="outer") + + @property + def df(self) -> pd.DataFrame: + """Apply all filters only when .df is accessed""" + result = self._original_df + for key, op, val in self._filters: + if op == "==": + # Filter on the index + if key == self._original_df.index.name: + result = result[result.index == val] + # Filter on a regular column + else: + result = result[result[key] == val] + elif key == "query": + result = result.query(op) + return result + + def _create_dynamic_getters(self): + """Automatically create get_colname(value) methods for every column.""" + for col in self._original_df.columns: + method_name = f"get_{col}" + + def make_getter(column: str): + def getter(self, value: typing.Any) -> MassTable: + new_filters = self._filters + [(column, "==", value)] + return type(self)(df=self._original_df, filters=new_filters) + + return getter + + # Attach the method to the class/instance + setattr(self, method_name, make_getter(col).__get__(self, MassTable)) diff --git a/tests/test_mass_table.py b/tests/test_mass_table.py index 931fb46..c5a0db3 100644 --- a/tests/test_mass_table.py +++ b/tests/test_mass_table.py @@ -1,65 +1,3 @@ -import pytest +# from nuclearmasses.mass_table import MassTable -from nuclearmasses.mass_table import MassTable - - -@pytest.fixture -def mt(): - return MassTable() - - -def test_get_nubase_datafile(mt): - year = 1995 - assert mt._get_nubase_datafile(year) == mt.data_path / str(year) / "nubtab97.asc" - year = 2003 - assert mt._get_nubase_datafile(year) == mt.data_path / str(year) / "nubtab03.asc" - year = 2012 - assert mt._get_nubase_datafile(year) == mt.data_path / str(year) / "nubtab12.asc" - year = 2016 - assert mt._get_nubase_datafile(year) == mt.data_path / str(year) / "nubase2016.txt" - year = 2020 - assert mt._get_nubase_datafile(year) == mt.data_path / str(year) / "nubase_1.mas20" - - -def test_get_ame_datafiles(mt): - year = 1983 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass.mas83" - assert reaction01 == data_path / "rct1.mas83" - assert reaction02 == data_path / "rct2.mas83" - - year = 1993 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass_exp.mas93" - assert reaction01 == data_path / "rct1_exp.mas93" - assert reaction02 == data_path / "rct2_exp.mas93" - - year = 1995 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass_exp.mas95" - assert reaction01 == data_path / "rct1_exp.mas95" - assert reaction02 == data_path / "rct2_exp.mas95" - - year = 2012 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass.mas12" - assert reaction01 == data_path / "rct1.mas12" - assert reaction02 == data_path / "rct2.mas12" - - year = 2016 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass16.txt" - assert reaction01 == data_path / "rct1-16.txt" - assert reaction02 == data_path / "rct2-16.txt" - - year = 2020 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass.mas20" - assert reaction01 == data_path / "rct1.mas20" - assert reaction02 == data_path / "rct2.mas20" +# TODO: Write some sensible tests From 5e8bb70336c98e1fde1d8a34336fef8a445540c7 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Wed, 1 Apr 2026 21:00:41 +0100 Subject: [PATCH 17/26] Add tests for the new top level classes --- tests/test_ame.py | 55 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_nubase.py | 24 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 tests/test_ame.py create mode 100644 tests/test_nubase.py diff --git a/tests/test_ame.py b/tests/test_ame.py new file mode 100644 index 0000000..1f111c8 --- /dev/null +++ b/tests/test_ame.py @@ -0,0 +1,55 @@ +import importlib.resources + +import pytest + +from nuclearmasses.io.ame import AME + + +@pytest.fixture +def ame(): + data_path = importlib.resources.files("nuclearmasses.data") + return AME(data_path=data_path) + + +def test_get_ame_datafiles(ame): + year = 1983 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass.mas83" + assert reaction01 == data_path / "rct1.mas83" + assert reaction02 == data_path / "rct2.mas83" + + year = 1993 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass_exp.mas93" + assert reaction01 == data_path / "rct1_exp.mas93" + assert reaction02 == data_path / "rct2_exp.mas93" + + year = 1995 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass_exp.mas95" + assert reaction01 == data_path / "rct1_exp.mas95" + assert reaction02 == data_path / "rct2_exp.mas95" + + year = 2012 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass.mas12" + assert reaction01 == data_path / "rct1.mas12" + assert reaction02 == data_path / "rct2.mas12" + + year = 2016 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass16.txt" + assert reaction01 == data_path / "rct1-16.txt" + assert reaction02 == data_path / "rct2-16.txt" + + year = 2020 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass.mas20" + assert reaction01 == data_path / "rct1.mas20" + assert reaction02 == data_path / "rct2.mas20" diff --git a/tests/test_nubase.py b/tests/test_nubase.py new file mode 100644 index 0000000..2efaf3a --- /dev/null +++ b/tests/test_nubase.py @@ -0,0 +1,24 @@ +import importlib.resources + +import pytest + +from nuclearmasses.io.nubase import NUBASE + + +@pytest.fixture +def nubase(): + data_path = importlib.resources.files("nuclearmasses.data") + return NUBASE(data_path=data_path) + + +def test_get_nubase_datafile(nubase): + year = 1995 + assert nubase.get_datafile(year) == nubase.data_path / str(year) / "nubtab97.asc" + year = 2003 + assert nubase.get_datafile(year) == nubase.data_path / str(year) / "nubtab03.asc" + year = 2012 + assert nubase.get_datafile(year) == nubase.data_path / str(year) / "nubtab12.asc" + year = 2016 + assert nubase.get_datafile(year) == nubase.data_path / str(year) / "nubase2016.txt" + year = 2020 + assert nubase.get_datafile(year) == nubase.data_path / str(year) / "nubase_1.mas20" From 40abc4d061dfb4a9e6a6947ae21a39c825090a94 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sat, 4 Apr 2026 15:14:00 +0100 Subject: [PATCH 18/26] Add some test to the MassTable after the refactor --- tests/test_mass_table.py | 75 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/tests/test_mass_table.py b/tests/test_mass_table.py index c5a0db3..8d4d812 100644 --- a/tests/test_mass_table.py +++ b/tests/test_mass_table.py @@ -1,3 +1,74 @@ -# from nuclearmasses.mass_table import MassTable +import pandas as pd +import pandas.testing as pdt +import pytest -# TODO: Write some sensible tests +from nuclearmasses.mass_table import MassTable + + +@pytest.fixture +def empty_frame(): + return MassTable(df=pd.DataFrame()) + + +def test_getter_creation(): + cols = ["Mass", "Error", "Param", "RandomLongerString"] + test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) + + df = MassTable(df=test_frame) + + for name in cols: + f = f"get_{name}" + assert hasattr(df, f) + assert callable(getattr(df, f)) + + +def test_getter_not_created(empty_frame): + with pytest.raises(AttributeError): + empty_frame.get_Nothing() + + +def test_empty_filter(empty_frame): + assert len(empty_frame._filters) == 0 + + +def test_manually_populated_filter(): + cols = ["ManualParameter"] + test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) + + my_filter = [(cols[0], "==", 5)] + + df = MassTable(df=test_frame, filters=my_filter) + + assert len(df._filters) == 1 + assert df._filters == my_filter + + +def test_auto_populated_filter(): + cols = ["AutoParameter"] + test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) + + df = MassTable(df=test_frame) + + val = 2 + f_df = df.get_AutoParameter(val) + + assert len(f_df._filters) == 1 + assert f_df._filters == [(cols[0], "==", val)] + + +def test_access_property(): + cols = ["Mass", "Error", "Param", "RandomLongerString"] + test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) + + m_df = MassTable(df=test_frame).df + + expected = pd.DataFrame( + { + "Mass": [0], + "Error": [0], + "Param": [0], + "RandomLongerString": [0], + } + ) + + pdt.assert_frame_equal(m_df, expected, check_like=True) From bdb16e6c16f1bc6e61a4d4844d076a0b0dd84cb0 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sat, 4 Apr 2026 15:50:11 +0100 Subject: [PATCH 19/26] Add more test coverage for MassTable --- tests/test_mass_table.py | 63 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tests/test_mass_table.py b/tests/test_mass_table.py index 8d4d812..66fdc54 100644 --- a/tests/test_mass_table.py +++ b/tests/test_mass_table.py @@ -10,6 +10,13 @@ def empty_frame(): return MassTable(df=pd.DataFrame()) +def test_initial_complete_parse(): + data = MassTable().df + expected_shape = (21421, 50) + + assert expected_shape == data.shape + + def test_getter_creation(): cols = ["Mass", "Error", "Param", "RandomLongerString"] test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) @@ -56,6 +63,26 @@ def test_auto_populated_filter(): assert f_df._filters == [(cols[0], "==", val)] +def test_getter_on_index(): + cols = ["Mass", "Error", "Param", "RandomLongerString"] + test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) + + m_df = MassTable(df=test_frame) + m_df.set_index("Param") + m_df = m_df.get_Param(0).df + + expected = pd.DataFrame( + { + "Mass": [0], + "Error": [0], + "Param": [0], + "RandomLongerString": [0], + } + ) + + pdt.assert_frame_equal(m_df, expected, check_like=True) + + def test_access_property(): cols = ["Mass", "Error", "Param", "RandomLongerString"] test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) @@ -72,3 +99,39 @@ def test_access_property(): ) pdt.assert_frame_equal(m_df, expected, check_like=True) + + +def test_generic_getter(): + cols = ["Mass", "Error", "Param", "RandomLongerString"] + test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) + + m_df = MassTable(df=test_frame).get("Error", 0).df + + expected = pd.DataFrame( + { + "Mass": [0], + "Error": [0], + "Param": [0], + "RandomLongerString": [0], + } + ) + + pdt.assert_frame_equal(m_df, expected, check_like=True) + + +def test_generic_filter(): + cols = ["Mass", "Error", "Param", "RandomLongerString"] + test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) + + m_df = MassTable(df=test_frame).filter("Param == 0").df + + expected = pd.DataFrame( + { + "Mass": [0], + "Error": [0], + "Param": [0], + "RandomLongerString": [0], + } + ) + + pdt.assert_frame_equal(m_df, expected, check_like=True) From 2ea5d988fd73f52852fd319831209f98b326ed43 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 5 Apr 2026 08:19:18 +0100 Subject: [PATCH 20/26] Correct index testing Add some tests related to the `dir()` method. --- tests/test_mass_table.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/test_mass_table.py b/tests/test_mass_table.py index 66fdc54..4e8bc38 100644 --- a/tests/test_mass_table.py +++ b/tests/test_mass_table.py @@ -38,6 +38,26 @@ def test_empty_filter(empty_frame): assert len(empty_frame._filters) == 0 +def test_unique_and_sorted_dir(empty_frame): + output = dir(empty_frame) + assert output == sorted(output) + assert len(output) == len(set(output)) + + +def test_dir_includes_class_attributes(empty_frame): + output = dir(empty_frame) + + assert "_parse_files" in output + assert "get" in output + + +def test_dir_includes_pandas_attributes(empty_frame): + output = dir(empty_frame) + + assert "describe" in output + assert "head" in output + + def test_manually_populated_filter(): cols = ["ManualParameter"] test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) @@ -66,9 +86,9 @@ def test_auto_populated_filter(): def test_getter_on_index(): cols = ["Mass", "Error", "Param", "RandomLongerString"] test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) + test_frame.set_index("Param") m_df = MassTable(df=test_frame) - m_df.set_index("Param") m_df = m_df.get_Param(0).df expected = pd.DataFrame( From f0b35473faaa5dd18ff2ba881551d4e9038cb76b Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 5 Apr 2026 08:42:50 +0100 Subject: [PATCH 21/26] Update unit checking with more non-time units Make us of parametrization for simpler code. --- tests/test_parse.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_parse.py b/tests/test_parse.py index 4dc596e..cbe2aa8 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -40,8 +40,8 @@ def test_units_to_seconds(converter): assert converter.unit_to_seconds("d") == 86400.0 assert converter.unit_to_seconds("year") == 31557600.0 + +@pytest.mark.parametrize("unit", [5, "m", "Hz", "", " "]) +def test_nontime_unit_return_nan(converter, unit): # Don't use == on np.nan. Floating point numbers are complicated! - assert np.isnan(converter.unit_to_seconds(5)) - assert np.isnan(converter.unit_to_seconds("keV")) - assert np.isnan(converter.unit_to_seconds("")) - assert np.isnan(converter.unit_to_seconds(" ")) + assert np.isnan(converter.unit_to_seconds(unit)) From a2d317ec0766f4d5f07b7646402fccef9b58c7f4 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 5 Apr 2026 15:12:18 +0100 Subject: [PATCH 22/26] Remove isort check from linting command We use the inbuilt import sort checking in ruff so have removed the use of isort. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 429c554..88e69c5 100644 --- a/README.md +++ b/README.md @@ -127,11 +127,11 @@ TableYear If you have ideas for additional functionality or find bugs please create an [issue](https://github.com/php1ic/nuclearmasses/issues) or better yet a [pull request](https://github.com/php1ic/nuclearmasses/pulls). -We use a combination of [isort](https://pycqa.github.io/isort/), [ruff](https://docs.astral.sh/ruff/) and [mypy](https://www.mypy-lang.org/) to keep things tidy and hopefully catch errors and bugs before they happen. +We use a combination of [ruff](https://docs.astral.sh/ruff/) and [mypy](https://www.mypy-lang.org/) to keep things tidy and hopefully catch errors and bugs before they happen. The command below returns no errors or issues so should be run after any code changes. We might add a CI pipeline in the future, but for the moment, it's a manual process. ```bash -isort . && ruff format && ruff check && mypy src +ruff format && ruff check && mypy src ``` ## Known issues From f1eb7936df9dea270f41a7f053c42f816b7a4152 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 5 Apr 2026 15:50:42 +0100 Subject: [PATCH 23/26] Allow a MassTable instance to be subscriptable We need to be careful to not simply replicate the functionality of a pandas dataframe, however I think a user may expect to be able to access columns wit square brackets so we will make use of those. --- src/nuclearmasses/mass_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nuclearmasses/mass_table.py b/src/nuclearmasses/mass_table.py index 77c4755..9983d39 100644 --- a/src/nuclearmasses/mass_table.py +++ b/src/nuclearmasses/mass_table.py @@ -42,8 +42,8 @@ def __dir__(self): # def __iter__(self): # return iter(self.df) # - # def __getitem__(self, key): - # return type(self)(df=self.df[key]) + def __getitem__(self, key): + return type(self)(df=self.df[key]) def get(self, column: str, value: typing.Any) -> typing.Self: """Generic getter: all_data.get('A', 123)""" From 39e763af309484d75f73cfa3f5406554f8e24437 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 5 Apr 2026 17:57:28 +0100 Subject: [PATCH 24/26] Revert all functionality based around top level dataframe access Let's not reinvent the wheel when accessing the final merged dataframe with all the data. Give the user access to the dataframe and let them do what they need from there. --- src/nuclearmasses/mass_table.py | 77 ++-------------- tests/test_mass_table.py | 151 +------------------------------- 2 files changed, 6 insertions(+), 222 deletions(-) diff --git a/src/nuclearmasses/mass_table.py b/src/nuclearmasses/mass_table.py index 9983d39..9d74028 100644 --- a/src/nuclearmasses/mass_table.py +++ b/src/nuclearmasses/mass_table.py @@ -1,5 +1,4 @@ import importlib.resources -import typing import pandas as pd @@ -13,48 +12,8 @@ class MassTable: Internally there are separate dataframes for the NUBASE and AME data as well as a combined one for all data """ - def __init__(self, df: pd.DataFrame | None = None, filters: list[tuple[str, str, typing.Any]] | None = None): - self._original_df: pd.DataFrame = self._parse_files() if df is None else df - self._filters: list[tuple[str, str, typing.Any]] = filters or [] - self._create_dynamic_getters() - - def __repr__(self) -> str: - """Make printing the class object show the DataFrame nicely""" - return repr(self.df) - - def __str__(self) -> str: - """Make printing the class object show the DataFrame nicely""" - return str(self.df) - - def __getattr__(self, attr: str) -> typing.Any: - """Delegate pandas methods for deeper chaining""" - if hasattr(self.df, attr): - return getattr(self.df, attr) - raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") - - def __dir__(self): - """Pass the pandas api through so we can get autocomplete""" - return sorted(set(list(self.__dict__.keys()) + dir(type(self)) + dir(self.df))) - - # def __len__(self) -> int: - # return len(self.df) - # - # def __iter__(self): - # return iter(self.df) - # - def __getitem__(self, key): - return type(self)(df=self.df[key]) - - def get(self, column: str, value: typing.Any) -> typing.Self: - """Generic getter: all_data.get('A', 123)""" - new_filters = self._filters + [(column, "==", value)] - return type(self)(df=self._original_df, filters=new_filters) - - def filter(self, expr: str) -> typing.Self: - """Allow arbitrary pandas .query() expressions.""" - # For simplicity we still store as tuples; you could store raw expressions too - new_filters = self._filters + [("query", expr, None)] - return type(self)(df=self._original_df, filters=new_filters) + def __init__(self) -> None: + self._complete_df: pd.DataFrame = self._parse_files() def _parse_files(self) -> pd.DataFrame: data_path = importlib.resources.files("nuclearmasses").joinpath("data") @@ -64,32 +23,6 @@ def _parse_files(self) -> pd.DataFrame: return pd.merge(AME(data_path).ame_df, NUBASE(data_path).nubase_df, on=common_columns, how="outer") @property - def df(self) -> pd.DataFrame: - """Apply all filters only when .df is accessed""" - result = self._original_df - for key, op, val in self._filters: - if op == "==": - # Filter on the index - if key == self._original_df.index.name: - result = result[result.index == val] - # Filter on a regular column - else: - result = result[result[key] == val] - elif key == "query": - result = result.query(op) - return result - - def _create_dynamic_getters(self): - """Automatically create get_colname(value) methods for every column.""" - for col in self._original_df.columns: - method_name = f"get_{col}" - - def make_getter(column: str): - def getter(self, value: typing.Any) -> MassTable: - new_filters = self._filters + [(column, "==", value)] - return type(self)(df=self._original_df, filters=new_filters) - - return getter - - # Attach the method to the class/instance - setattr(self, method_name, make_getter(col).__get__(self, MassTable)) + def data(self) -> pd.DataFrame: + """Access the complete mass table dataframe""" + return self._complete_df diff --git a/tests/test_mass_table.py b/tests/test_mass_table.py index 4e8bc38..2331d5f 100644 --- a/tests/test_mass_table.py +++ b/tests/test_mass_table.py @@ -1,157 +1,8 @@ -import pandas as pd -import pandas.testing as pdt -import pytest - from nuclearmasses.mass_table import MassTable -@pytest.fixture -def empty_frame(): - return MassTable(df=pd.DataFrame()) - - def test_initial_complete_parse(): - data = MassTable().df + data = MassTable().data expected_shape = (21421, 50) assert expected_shape == data.shape - - -def test_getter_creation(): - cols = ["Mass", "Error", "Param", "RandomLongerString"] - test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) - - df = MassTable(df=test_frame) - - for name in cols: - f = f"get_{name}" - assert hasattr(df, f) - assert callable(getattr(df, f)) - - -def test_getter_not_created(empty_frame): - with pytest.raises(AttributeError): - empty_frame.get_Nothing() - - -def test_empty_filter(empty_frame): - assert len(empty_frame._filters) == 0 - - -def test_unique_and_sorted_dir(empty_frame): - output = dir(empty_frame) - assert output == sorted(output) - assert len(output) == len(set(output)) - - -def test_dir_includes_class_attributes(empty_frame): - output = dir(empty_frame) - - assert "_parse_files" in output - assert "get" in output - - -def test_dir_includes_pandas_attributes(empty_frame): - output = dir(empty_frame) - - assert "describe" in output - assert "head" in output - - -def test_manually_populated_filter(): - cols = ["ManualParameter"] - test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) - - my_filter = [(cols[0], "==", 5)] - - df = MassTable(df=test_frame, filters=my_filter) - - assert len(df._filters) == 1 - assert df._filters == my_filter - - -def test_auto_populated_filter(): - cols = ["AutoParameter"] - test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) - - df = MassTable(df=test_frame) - - val = 2 - f_df = df.get_AutoParameter(val) - - assert len(f_df._filters) == 1 - assert f_df._filters == [(cols[0], "==", val)] - - -def test_getter_on_index(): - cols = ["Mass", "Error", "Param", "RandomLongerString"] - test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) - test_frame.set_index("Param") - - m_df = MassTable(df=test_frame) - m_df = m_df.get_Param(0).df - - expected = pd.DataFrame( - { - "Mass": [0], - "Error": [0], - "Param": [0], - "RandomLongerString": [0], - } - ) - - pdt.assert_frame_equal(m_df, expected, check_like=True) - - -def test_access_property(): - cols = ["Mass", "Error", "Param", "RandomLongerString"] - test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) - - m_df = MassTable(df=test_frame).df - - expected = pd.DataFrame( - { - "Mass": [0], - "Error": [0], - "Param": [0], - "RandomLongerString": [0], - } - ) - - pdt.assert_frame_equal(m_df, expected, check_like=True) - - -def test_generic_getter(): - cols = ["Mass", "Error", "Param", "RandomLongerString"] - test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) - - m_df = MassTable(df=test_frame).get("Error", 0).df - - expected = pd.DataFrame( - { - "Mass": [0], - "Error": [0], - "Param": [0], - "RandomLongerString": [0], - } - ) - - pdt.assert_frame_equal(m_df, expected, check_like=True) - - -def test_generic_filter(): - cols = ["Mass", "Error", "Param", "RandomLongerString"] - test_frame = pd.DataFrame.from_dict(data=dict.fromkeys(cols, [0])) - - m_df = MassTable(df=test_frame).filter("Param == 0").df - - expected = pd.DataFrame( - { - "Mass": [0], - "Error": [0], - "Param": [0], - "RandomLongerString": [0], - } - ) - - pdt.assert_frame_equal(m_df, expected, check_like=True) From ad48107b7d3a788931064eabc51d3e37312bfbcf Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 5 Apr 2026 18:08:01 +0100 Subject: [PATCH 25/26] Update usage examples in the README We no longer index on the year and the main dataframe is accessed via a different member name. --- README.md | 110 +++++++++++++++++++++++++++--------------------------- 1 file changed, 54 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 88e69c5..f84e40f 100644 --- a/README.md +++ b/README.md @@ -54,73 +54,71 @@ git clone https://github.com/php1ic/nuclearmasses > While every effort is made to maintain a stable API, this module is relatively new so users should not be surprised if there are changes between versions. > If a breaking change has been introduced, it will always be highlighted in the [CHANGELOG](CHANGELOG.md). -Once installed or cloned, the data is available as a single dataframe indexed on the mass table year +The combination of AME and NUBASE values from all years is available as a single dataframe ```python >>> from nuclearmasses.mass_table import MassTable ->>> df = MassTable().full_data +>>> df = MassTable().data ``` You can then interrogate, or extract, whatever information you want. For example, how has the mass excess and it's accuracy changed overtime for 190Re according to the AME ```python >>> df[(df['A'] == 190) & (df['Symbol'] == 'Re')][['AMEMassExcess', 'AMEMassExcessError']] - AMEMassExcess AMEMassExcessError -TableYear -1983 -35536.605 200.029 -1993 -35557.789 145.549 -1995 -35568.032 212.151 -2003 -35566.326 149.248 -2012 -35634.992 70.542 -2016 -35635.830 70.852 -2020 -35583.015 4.870 + AMEMassExcess AMEMassExcessError +16054 -35536.605 200.029 +16055 -35557.789 145.549 +16056 -35568.032 212.151 +16057 -35566.326 149.248 +16058 -35634.992 70.542 +16059 -35635.830 70.852 +16060 -35583.015 4.870 ``` Or how does the mass excess of gold vary across the isotopic chain according to NUBASE in the most recent table for both experimentally measured and theoretical values ```python >>> df.query("TableYear == 2020 and Symbol == 'Au'")[['A', 'NUBASEMassExcess', 'NUBASEMassExcessError', 'Experimental']] - A NUBASEMassExcess NUBASEMassExcessError Experimental -TableYear -2020 168 2530.0 400.0 False -2020 169 -1790.0 300.0 False -2020 170 -3700.0 200.0 False -2020 171 -7562.0 21.0 True -2020 172 -9320.0 60.0 True -2020 173 -12832.0 23.0 True -2020 174 -14060.0 100.0 False -2020 175 -17400.0 40.0 True -2020 176 -18520.0 30.0 True -2020 177 -21546.0 10.0 True -2020 178 -22303.0 10.0 True -2020 179 -24989.0 12.0 True -2020 180 -25626.0 5.0 True -2020 181 -27871.0 20.0 True -2020 182 -28304.0 19.0 True -2020 183 -30191.0 9.0 True -2020 184 -30319.0 22.0 True -2020 185 -31858.1 2.6 True -2020 186 -31715.0 21.0 True -2020 187 -33029.0 22.0 True -2020 188 -32371.3 2.7 True -2020 189 -33582.0 20.0 True -2020 190 -32834.0 3.0 True -2020 191 -33798.0 5.0 True -2020 192 -32772.0 16.0 True -2020 193 -33405.0 9.0 True -2020 194 -32211.9 2.1 True -2020 195 -32567.1 1.1 True -2020 196 -31138.7 3.0 True -2020 197 -31139.8 0.5 True -2020 198 -29580.8 0.5 True -2020 199 -29093.8 0.5 True -2020 200 -27240.0 27.0 True -2020 201 -26401.0 3.0 True -2020 202 -24353.0 23.0 True -2020 203 -23143.0 3.0 True -2020 204 -20390.0 200.0 False -2020 205 -18570.0 200.0 False -2020 206 -14190.0 300.0 False -2020 207 -10640.0 300.0 False -2020 208 -5910.0 300.0 False -2020 209 -2230.0 400.0 False -2020 210 2680.0 400.0 False + A NUBASEMassExcess NUBASEMassExcessError Experimental +14084 168 2530.0 400.0 False +14189 169 -1790.0 300.0 False +14291 170 -3700.0 200.0 False +14391 171 -7562.0 21.0 True +14492 172 -9320.0 60.0 True +14591 173 -12832.0 23.0 True +14687 174 -14060.0 100.0 False +14781 175 -17400.0 40.0 True +14874 176 -18520.0 30.0 True +14968 177 -21546.0 10.0 True +15060 178 -22303.0 10.0 True +15153 179 -24989.0 12.0 True +15244 180 -25626.0 5.0 True +15334 181 -27871.0 20.0 True +15419 182 -28304.0 19.0 True +15503 183 -30191.0 9.0 True +15588 184 -30319.0 22.0 True +15673 185 -31858.1 2.6 True +15757 186 -31715.0 21.0 True +15842 187 -33029.0 22.0 True +15926 188 -32371.3 2.7 True +16007 189 -33582.0 20.0 True +16088 190 -32834.0 3.0 True +16164 191 -33798.0 5.0 True +16243 192 -32772.0 16.0 True +16320 193 -33405.0 9.0 True +16401 194 -32211.9 2.1 True +16480 195 -32567.1 1.1 True +16560 196 -31138.7 3.0 True +16637 197 -31139.8 0.5 True +16713 198 -29580.8 0.5 True +16788 199 -29093.8 0.5 True +16861 200 -27240.0 27.0 True +16935 201 -26401.0 3.0 True +17012 202 -24353.0 23.0 True +17089 203 -23143.0 3.0 True +17163 204 -20390.0 200.0 False +17237 205 -18570.0 200.0 False +17308 206 -14190.0 300.0 False +17382 207 -10640.0 300.0 False +17456 208 -5910.0 300.0 False +17528 209 -2230.0 400.0 False +17603 210 2680.0 400.0 False ``` ## Contributing From 1658d0245d12aec88bd220fc44326823949d83f5 Mon Sep 17 00:00:00 2001 From: Ian Cullen Date: Sun, 5 Apr 2026 18:10:25 +0100 Subject: [PATCH 26/26] Rename file so it matches the class within --- tests/{test_parse.py => test_converter.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_parse.py => test_converter.py} (100%) diff --git a/tests/test_parse.py b/tests/test_converter.py similarity index 100% rename from tests/test_parse.py rename to tests/test_converter.py