diff --git a/README.md b/README.md index 429c554..f84e40f 100644 --- a/README.md +++ b/README.md @@ -54,84 +54,82 @@ git clone https://github.com/php1ic/nuclearmasses > While every effort is made to maintain a stable API, this module is relatively new so users should not be surprised if there are changes between versions. > If a breaking change has been introduced, it will always be highlighted in the [CHANGELOG](CHANGELOG.md). -Once installed or cloned, the data is available as a single dataframe indexed on the mass table year +The combination of AME and NUBASE values from all years is available as a single dataframe ```python >>> from nuclearmasses.mass_table import MassTable ->>> df = MassTable().full_data +>>> df = MassTable().data ``` You can then interrogate, or extract, whatever information you want. For example, how has the mass excess and it's accuracy changed overtime for 190Re according to the AME ```python >>> df[(df['A'] == 190) & (df['Symbol'] == 'Re')][['AMEMassExcess', 'AMEMassExcessError']] - AMEMassExcess AMEMassExcessError -TableYear -1983 -35536.605 200.029 -1993 -35557.789 145.549 -1995 -35568.032 212.151 -2003 -35566.326 149.248 -2012 -35634.992 70.542 -2016 -35635.830 70.852 -2020 -35583.015 4.870 + AMEMassExcess AMEMassExcessError +16054 -35536.605 200.029 +16055 -35557.789 145.549 +16056 -35568.032 212.151 +16057 -35566.326 149.248 +16058 -35634.992 70.542 +16059 -35635.830 70.852 +16060 -35583.015 4.870 ``` Or how does the mass excess of gold vary across the isotopic chain according to NUBASE in the most recent table for both experimentally measured and theoretical values ```python >>> df.query("TableYear == 2020 and Symbol == 'Au'")[['A', 'NUBASEMassExcess', 'NUBASEMassExcessError', 'Experimental']] - A NUBASEMassExcess NUBASEMassExcessError Experimental -TableYear -2020 168 2530.0 400.0 False -2020 169 -1790.0 300.0 False -2020 170 -3700.0 200.0 False -2020 171 -7562.0 21.0 True -2020 172 -9320.0 60.0 True -2020 173 -12832.0 23.0 True -2020 174 -14060.0 100.0 False -2020 175 -17400.0 40.0 True -2020 176 -18520.0 30.0 True -2020 177 -21546.0 10.0 True -2020 178 -22303.0 10.0 True -2020 179 -24989.0 12.0 True -2020 180 -25626.0 5.0 True -2020 181 -27871.0 20.0 True -2020 182 -28304.0 19.0 True -2020 183 -30191.0 9.0 True -2020 184 -30319.0 22.0 True -2020 185 -31858.1 2.6 True -2020 186 -31715.0 21.0 True -2020 187 -33029.0 22.0 True -2020 188 -32371.3 2.7 True -2020 189 -33582.0 20.0 True -2020 190 -32834.0 3.0 True -2020 191 -33798.0 5.0 True -2020 192 -32772.0 16.0 True -2020 193 -33405.0 9.0 True -2020 194 -32211.9 2.1 True -2020 195 -32567.1 1.1 True -2020 196 -31138.7 3.0 True -2020 197 -31139.8 0.5 True -2020 198 -29580.8 0.5 True -2020 199 -29093.8 0.5 True -2020 200 -27240.0 27.0 True -2020 201 -26401.0 3.0 True -2020 202 -24353.0 23.0 True -2020 203 -23143.0 3.0 True -2020 204 -20390.0 200.0 False -2020 205 -18570.0 200.0 False -2020 206 -14190.0 300.0 False -2020 207 -10640.0 300.0 False -2020 208 -5910.0 300.0 False -2020 209 -2230.0 400.0 False -2020 210 2680.0 400.0 False + A NUBASEMassExcess NUBASEMassExcessError Experimental +14084 168 2530.0 400.0 False +14189 169 -1790.0 300.0 False +14291 170 -3700.0 200.0 False +14391 171 -7562.0 21.0 True +14492 172 -9320.0 60.0 True +14591 173 -12832.0 23.0 True +14687 174 -14060.0 100.0 False +14781 175 -17400.0 40.0 True +14874 176 -18520.0 30.0 True +14968 177 -21546.0 10.0 True +15060 178 -22303.0 10.0 True +15153 179 -24989.0 12.0 True +15244 180 -25626.0 5.0 True +15334 181 -27871.0 20.0 True +15419 182 -28304.0 19.0 True +15503 183 -30191.0 9.0 True +15588 184 -30319.0 22.0 True +15673 185 -31858.1 2.6 True +15757 186 -31715.0 21.0 True +15842 187 -33029.0 22.0 True +15926 188 -32371.3 2.7 True +16007 189 -33582.0 20.0 True +16088 190 -32834.0 3.0 True +16164 191 -33798.0 5.0 True +16243 192 -32772.0 16.0 True +16320 193 -33405.0 9.0 True +16401 194 -32211.9 2.1 True +16480 195 -32567.1 1.1 True +16560 196 -31138.7 3.0 True +16637 197 -31139.8 0.5 True +16713 198 -29580.8 0.5 True +16788 199 -29093.8 0.5 True +16861 200 -27240.0 27.0 True +16935 201 -26401.0 3.0 True +17012 202 -24353.0 23.0 True +17089 203 -23143.0 3.0 True +17163 204 -20390.0 200.0 False +17237 205 -18570.0 200.0 False +17308 206 -14190.0 300.0 False +17382 207 -10640.0 300.0 False +17456 208 -5910.0 300.0 False +17528 209 -2230.0 400.0 False +17603 210 2680.0 400.0 False ``` ## Contributing If you have ideas for additional functionality or find bugs please create an [issue](https://github.com/php1ic/nuclearmasses/issues) or better yet a [pull request](https://github.com/php1ic/nuclearmasses/pulls). -We use a combination of [isort](https://pycqa.github.io/isort/), [ruff](https://docs.astral.sh/ruff/) and [mypy](https://www.mypy-lang.org/) to keep things tidy and hopefully catch errors and bugs before they happen. +We use a combination of [ruff](https://docs.astral.sh/ruff/) and [mypy](https://www.mypy-lang.org/) to keep things tidy and hopefully catch errors and bugs before they happen. The command below returns no errors or issues so should be run after any code changes. We might add a CI pipeline in the future, but for the moment, it's a manual process. ```bash -isort . && ruff format && ruff check && mypy src +ruff format && ruff check && mypy src ``` ## Known issues diff --git a/pyproject.toml b/pyproject.toml index da1a893..73c9cc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,9 +75,6 @@ omit = [ "tests/*" ] -[tool.isort] -known_first_party = ["nuclearmasses"] - [tool.ruff] line-length = 120 @@ -85,7 +82,9 @@ line-length = 120 # Nothing different from defaults [tool.ruff.lint.isort] -known-first-party = ["nuclearmasses"] +known-first-party = ["src"] +from-first = false +order-by-type = true force-sort-within-sections = true [tool.ruff.lint] diff --git a/src/nuclearmasses/io/ame.py b/src/nuclearmasses/io/ame.py new file mode 100644 index 0000000..089cbc0 --- /dev/null +++ b/src/nuclearmasses/io/ame.py @@ -0,0 +1,49 @@ +from importlib.resources.abc import Traversable + +import pandas as pd + +from nuclearmasses.io.ame_mass_parse import AMEMassParser +from nuclearmasses.io.ame_reaction_1_parse import AMEReactionParserOne +from nuclearmasses.io.ame_reaction_2_parse import AMEReactionParserTwo + + +class AME: + """Top level storage and functionality for AME data""" + + def __init__(self, data_path: Traversable): + self.data_path = data_path + self.years: list[int] = [1983, 1993, 1995, 2003, 2012, 2016, 2020] + self.ame_files: list[tuple[str, str, str]] = [ + ("mass.mas83", "rct1.mas83", "rct2.mas83"), + ("mass_exp.mas93", "rct1_exp.mas93", "rct2_exp.mas93"), + ("mass_exp.mas95", "rct1_exp.mas95", "rct2_exp.mas95"), + ("mass.mas03", "rct1.mas03", "rct2.mas03"), + ("mass.mas12", "rct1.mas12", "rct2.mas12"), + ("mass16.txt", "rct1-16.txt", "rct2-16.txt"), + ("mass.mas20", "rct1.mas20", "rct2.mas20"), + ] + self.files: dict[int, tuple[str, str, str]] = dict(zip(self.years, self.ame_files, strict=True)) + self.ame_df: pd.DataFrame = self.parse_all_years() + + def get_datafiles(self, year: int) -> tuple[Traversable, Traversable, Traversable]: + """Use the given year to locate the 3 AME data file and return the absolute paths.""" + root = self.data_path / str(year) + mass, rct1, rct2 = self.files[year] + + return root / mass, root / rct1, root / rct2 + + def parse_year(self, year: int) -> pd.DataFrame: + """Combine all the AME files from the given ``year``""" + ame_mass, ame_reaction_1, ame_reaction_2 = self.get_datafiles(year) + + mass_df = AMEMassParser(filename=ame_mass, year=year).read_file() + rct1_df = AMEReactionParserOne(filename=ame_reaction_1, year=year).read_file() + rct2_df = AMEReactionParserTwo(filename=ame_reaction_2, year=year).read_file() + + # Merge all 3 of the AME dataframes into one + common_columns = ["A", "Z", "N", "TableYear", "Symbol"] + return mass_df.merge(rct1_df, on=common_columns, how="outer").merge(rct2_df, on=common_columns, how="outer") + + def parse_all_years(self) -> pd.DataFrame: + """Parse the files for all available years""" + return pd.concat((self.parse_year(y) for y in self.years), ignore_index=True) diff --git a/src/nuclearmasses/io/ame_mass_file.py b/src/nuclearmasses/io/ame_mass_file.py index 3acb33d..0e9b7fc 100644 --- a/src/nuclearmasses/io/ame_mass_file.py +++ b/src/nuclearmasses/io/ame_mass_file.py @@ -1,11 +1,8 @@ -from nuclearmasses.utils.converter import Converter - - -class AMEMassFile(Converter): +class AMEMassFile: """Easy access to the variables in the AME mass file.""" - def __init__(self, year: int): - super().__init__() + def __init__(self, year: int, **kwargs): + super().__init__(**kwargs) match year: case 1983: self.HEADER = 35 diff --git a/src/nuclearmasses/io/ame_mass_parse.py b/src/nuclearmasses/io/ame_mass_parse.py index 371cb1d..1ce5f43 100644 --- a/src/nuclearmasses/io/ame_mass_parse.py +++ b/src/nuclearmasses/io/ame_mass_parse.py @@ -1,12 +1,12 @@ import logging -import pathlib import pandas as pd from nuclearmasses.io.ame_mass_file import AMEMassFile +from nuclearmasses.utils.converter import Converter, DataInput -class AMEMassParser(AMEMassFile): +class AMEMassParser(AMEMassFile, Converter): """Parse the AME mass file. The format is known but the provided string does not match all lines. @@ -14,65 +14,71 @@ class AMEMassParser(AMEMassFile): read the columns are interested in. """ - def __init__(self, filename: pathlib.Path, year: int): + def __init__(self, filename: DataInput, year: int): """Set the file to read and table year""" - self.filename: pathlib.Path = filename + super().__init__(year=year) + self.filename: DataInput = filename self.year: int = year - super().__init__(self.year) logging.info(f"Reading {self.filename} from {self.year}") def _column_names(self) -> list[str]: """Set the column name depending on the year""" - match self.year: - case _: - return [ - "Z", - "A", - "AMEMassExcess", - "AMEMassExcessError", - "BindingEnergyPerA", - "BindingEnergyPerAError", - "BetaDecayEnergy", - "BetaDecayEnergyError", - "AtomicNumber", - "AtomicMass", - "AtomicMassError", - ] + return [ + "Z", + "A", + "AMEMassExcess", + "AMEMassExcessError", + "BindingEnergyPerA", + "BindingEnergyPerAError", + "BetaDecayEnergy", + "BetaDecayEnergyError", + "AtomicNumber", + "AtomicMass", + "AtomicMassError", + ] def _data_types(self) -> dict: """Set the data type depending on the year""" - match self.year: - case _: - return { - "TableYear": "Int64", - "Symbol": "string", - "N": "Int64", - "Z": "Int64", - "A": "Int64", - "AMEMassExcess": "float64", - "AMEMassExcessError": "float64", - "BindingEnergyPerA": "float64", - "BindingEnergyPerAError": "float64", - "BetaDecayEnergy": "float64", - "BetaDecayEnergyError": "float64", - "AtomicMass": "float64", - "AtomicMassError": "float64", - } + return { + "TableYear": "Int64", + "Symbol": "string", + "N": "Int64", + "Z": "Int64", + "A": "Int64", + "AMEMassExcess": "float64", + "AMEMassExcessError": "float64", + "BindingEnergyPerA": "float64", + "BindingEnergyPerAError": "float64", + "BetaDecayEnergy": "float64", + "BetaDecayEnergyError": "float64", + "AtomicMass": "float64", + "AtomicMassError": "float64", + } def _na_values(self) -> dict: """Set the columns that have placeholder values""" - match self.year: - case 1983: - return { - "A": [""], - "BetaDecayEnergy": ["", "*"], - "BetaDecayEnergyError": ["", "*"], - } - case _: - return { - "BetaDecayEnergy": ["", "*"], - "BetaDecayEnergyError": ["", "*"], - } + na_vals = { + "A": [""], + "BetaDecayEnergy": ["", "*"], + "BetaDecayEnergyError": ["", "*"], + } + + if self.year != 1983: + na_vals.pop("A") + + return na_vals + + def calculate_relative_error(self, raw_df) -> pd.DataFrame: + """Calculate the relative error of the mass excess + + 12C has a 0.0 +/- 0.0 mass excess definition by definition so ensure that is still true. + """ + raw_df["AMERelativeError"] = abs( + raw_df["AMEMassExcessError"].astype(float) / raw_df["AMEMassExcess"].astype(float) + ) + raw_df.loc[(raw_df.Z == 6) & (raw_df.A == 12), "AMERelativeError"] = 0.0 + + return raw_df def read_file(self) -> pd.DataFrame: """Read the file using it's known format @@ -81,7 +87,7 @@ def read_file(self) -> pd.DataFrame: column names, data types and locations of the date so we can now make the generic call to parse the file. """ - df = pd.read_fwf( + df = Converter.read_fwf( self.filename, colspecs=self.column_limits, names=self._column_names(), @@ -119,9 +125,10 @@ def read_file(self) -> pd.DataFrame: # We need to rescale the error value because we combined the two columns above df = df.assign(AtomicMassError=df["AtomicMassError"].astype(float) / 1.0e6) + df = self.calculate_relative_error(df) df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) - df["Symbol"] = pd.to_numeric(df["Z"]).map(self.z_to_symbol) + df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) return df.astype(self._data_types()) diff --git a/src/nuclearmasses/io/ame_reaction_1_file.py b/src/nuclearmasses/io/ame_reaction_1_file.py index 0e0ca64..9a0de75 100644 --- a/src/nuclearmasses/io/ame_reaction_1_file.py +++ b/src/nuclearmasses/io/ame_reaction_1_file.py @@ -1,12 +1,9 @@ -from nuclearmasses.utils.converter import Converter - - -class AMEReactionFileOne(Converter): +class AMEReactionFileOne: """Easy access to the variables in the first AME reaction file.""" - def __init__(self, year: int): + def __init__(self, year: int, **kwargs): """Setup the values that locate the variable.""" - super().__init__() + super().__init__(**kwargs) match year: case 1983: self.HEADER = 30 diff --git a/src/nuclearmasses/io/ame_reaction_1_parse.py b/src/nuclearmasses/io/ame_reaction_1_parse.py index 943be64..c4be262 100644 --- a/src/nuclearmasses/io/ame_reaction_1_parse.py +++ b/src/nuclearmasses/io/ame_reaction_1_parse.py @@ -1,88 +1,82 @@ import logging -import pathlib import pandas as pd from nuclearmasses.io.ame_reaction_1_file import AMEReactionFileOne +from nuclearmasses.utils.converter import Converter, DataInput -class AMEReactionParserOne(AMEReactionFileOne): +class AMEReactionParserOne(AMEReactionFileOne, Converter): """Parse the first AME reaction file. The format is known but I don't think python can easily parse it. """ - def __init__(self, filename: pathlib.Path, year: int): + def __init__(self, filename: DataInput, year: int): """Set the file to read and table year.""" - self.filename = filename + super().__init__(year=year) + self.filename: DataInput = filename self.year = year - super().__init__(self.year) logging.info(f"Reading {self.filename} from {self.year}") def _column_names(self) -> list[str]: """Set the column name depending on the year""" - match self.year: - case _: - return [ - "A", - "Z", - "TwoNeutronSeparationEnergy", - "TwoNeutronSeparationEnergyError", - "TwoProtonSeparationEnergy", - "TwoProtonSeparationEnergyError", - "QAlpha", - "QAlphaError", - "QTwoBeta", - "QTwoBetaError", - "QEpsilon", - "QEpsilonError", - "QBetaNeutron", - "QBetaNeutronError", - ] + return [ + "A", + "Z", + "TwoNeutronSeparationEnergy", + "TwoNeutronSeparationEnergyError", + "TwoProtonSeparationEnergy", + "TwoProtonSeparationEnergyError", + "QAlpha", + "QAlphaError", + "QTwoBeta", + "QTwoBetaError", + "QEpsilon", + "QEpsilonError", + "QBetaNeutron", + "QBetaNeutronError", + ] def _data_types(self) -> dict: """Set the data type depending on the year""" - match self.year: - case _: - return { - "TableYear": "Int64", - "Symbol": "string", - "A": "Int64", - "Z": "Int64", - "N": "Int64", - "TwoNeutronSeparationEnergy": "float64", - "TwoNeutronSeparationEnergyError": "float64", - "TwoProtonSeparationEnergy": "float64", - "TwoProtonSeparationEnergyError": "float64", - "QAlpha": "float64", - "QAlphaError": "float64", - "QTwoBeta": "float64", - "QTwoBetaError": "float64", - "QEpsilon": "float64", - "QEpsilonError": "float64", - "QBetaNeutron": "float64", - "QBetaNeutronError": "float64", - } + return { + "TableYear": "Int64", + "Symbol": "string", + "A": "Int64", + "Z": "Int64", + "N": "Int64", + "TwoNeutronSeparationEnergy": "float64", + "TwoNeutronSeparationEnergyError": "float64", + "TwoProtonSeparationEnergy": "float64", + "TwoProtonSeparationEnergyError": "float64", + "QAlpha": "float64", + "QAlphaError": "float64", + "QTwoBeta": "float64", + "QTwoBetaError": "float64", + "QEpsilon": "float64", + "QEpsilonError": "float64", + "QBetaNeutron": "float64", + "QBetaNeutronError": "float64", + } def _na_values(self) -> dict: """Set the columns that have placeholder values""" - match self.year: - case _: - return { - "A": [""], - "TwoNeutronSeparationEnergy": ["", "*"], - "TwoNeutronSeparationEnergyError": ["", "*"], - "TwoProtonSeparationEnergy": ["", "*"], - "TwoProtonSeparationEnergyError": ["", "*"], - "QAlpha": ["", "*"], - "QAlphaError": ["", "*"], - "QTwoBeta": ["", "*"], - "QTwoBetaError": ["", "*"], - "QEpsilon": ["", "*"], - "QEpsilonError": ["", "*"], - "QBetaNeutron": ["", "*"], - "QBetaNeutronError": ["", "*"], - } + return { + "A": [""], + "TwoNeutronSeparationEnergy": ["", "*"], + "TwoNeutronSeparationEnergyError": ["", "*"], + "TwoProtonSeparationEnergy": ["", "*"], + "TwoProtonSeparationEnergyError": ["", "*"], + "QAlpha": ["", "*"], + "QAlphaError": ["", "*"], + "QTwoBeta": ["", "*"], + "QTwoBetaError": ["", "*"], + "QEpsilon": ["", "*"], + "QEpsilonError": ["", "*"], + "QBetaNeutron": ["", "*"], + "QBetaNeutronError": ["", "*"], + } def read_file(self) -> pd.DataFrame: """Read the file using it's known format @@ -91,7 +85,7 @@ def read_file(self) -> pd.DataFrame: column names, data types and locations of the date so we can now make the generic call to parse the file. """ - df = pd.read_fwf( + df = Converter.read_fwf( self.filename, colspecs=self.column_limits, names=self._column_names(), @@ -113,6 +107,6 @@ def read_file(self) -> pd.DataFrame: df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) - df["Symbol"] = pd.to_numeric(df["Z"]).map(self.z_to_symbol) + df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) return df.astype(self._data_types()) diff --git a/src/nuclearmasses/io/ame_reaction_2_file.py b/src/nuclearmasses/io/ame_reaction_2_file.py index 871d95b..8aa5263 100644 --- a/src/nuclearmasses/io/ame_reaction_2_file.py +++ b/src/nuclearmasses/io/ame_reaction_2_file.py @@ -1,11 +1,8 @@ -from nuclearmasses.utils.converter import Converter - - -class AMEReactionFileTwo(Converter): +class AMEReactionFileTwo: """Easy access to the variables in the second AME reaction file.""" - def __init__(self, year: int): - super().__init__() + def __init__(self, year: int, **kwargs): + super().__init__(**kwargs) match year: case 1983: self.HEADER = 30 diff --git a/src/nuclearmasses/io/ame_reaction_2_parse.py b/src/nuclearmasses/io/ame_reaction_2_parse.py index 27a2f51..d6990d8 100644 --- a/src/nuclearmasses/io/ame_reaction_2_parse.py +++ b/src/nuclearmasses/io/ame_reaction_2_parse.py @@ -1,88 +1,82 @@ import logging -import pathlib import pandas as pd from nuclearmasses.io.ame_reaction_2_file import AMEReactionFileTwo +from nuclearmasses.utils.converter import Converter, DataInput -class AMEReactionParserTwo(AMEReactionFileTwo): +class AMEReactionParserTwo(AMEReactionFileTwo, Converter): """Parse the second AME reaction file. The format is known but I don't think python can easily parse it. """ - def __init__(self, filename: pathlib.Path, year: int): + def __init__(self, filename: DataInput, year: int): """Set the file to read and table year.""" - self.filename = filename + super().__init__(year=year) + self.filename: DataInput = filename self.year = year - super().__init__(self.year) logging.info(f"Reading {self.filename} from {self.year}") def _column_names(self) -> list[str]: """Set the column name depending on the year""" - match self.year: - case _: - return [ - "A", - "Z", - "OneNeutronSeparationEnergy", - "OneNeutronSeparationEnergyError", - "OneProtonSeparationEnergy", - "OneProtonSeparationEnergyError", - "QFourBeta", - "QFourBetaError", - "QDeuteronAlpha", - "QDeuteronAlphaError", - "QProtonAlpha", - "QProtonAlphaError", - "QNeutronAlpha", - "QNeutronAlphaError", - ] + return [ + "A", + "Z", + "OneNeutronSeparationEnergy", + "OneNeutronSeparationEnergyError", + "OneProtonSeparationEnergy", + "OneProtonSeparationEnergyError", + "QFourBeta", + "QFourBetaError", + "QDeuteronAlpha", + "QDeuteronAlphaError", + "QProtonAlpha", + "QProtonAlphaError", + "QNeutronAlpha", + "QNeutronAlphaError", + ] def _data_types(self) -> dict: """Set the data type depending on the year""" - match self.year: - case _: - return { - "TableYear": "Int64", - "Symbol": "string", - "A": "Int64", - "Z": "Int64", - "N": "Int64", - "OneNeutronSeparationEnergy": "float64", - "OneNeutronSeparationEnergyError": "float64", - "OneProtonSeparationEnergy": "float64", - "OneProtonSeparationEnergyError": "float64", - "QFourBeta": "float64", - "QFourBetaError": "float64", - "QDeuteronAlpha": "float64", - "QDeuteronAlphaError": "float64", - "QProtonAlpha": "float64", - "QProtonAlphaError": "float64", - "QNeutronAlpha": "float64", - "QNeutronAlphaError": "float64", - } + return { + "TableYear": "Int64", + "Symbol": "string", + "A": "Int64", + "Z": "Int64", + "N": "Int64", + "OneNeutronSeparationEnergy": "float64", + "OneNeutronSeparationEnergyError": "float64", + "OneProtonSeparationEnergy": "float64", + "OneProtonSeparationEnergyError": "float64", + "QFourBeta": "float64", + "QFourBetaError": "float64", + "QDeuteronAlpha": "float64", + "QDeuteronAlphaError": "float64", + "QProtonAlpha": "float64", + "QProtonAlphaError": "float64", + "QNeutronAlpha": "float64", + "QNeutronAlphaError": "float64", + } def _na_values(self) -> dict: """Set the columns that have placeholder values""" - match self.year: - case _: - return { - "A": [""], - "OneNeutronSeparationEnergy": ["", "*"], - "OneNeutronSeparationEnergyError": ["", "*"], - "OneProtonSeparationEnergy": ["", "*"], - "OneProtonSeparationEnergyError": ["", "*"], - "QFourBeta": ["", "*"], - "QFourBetaError": ["", "*"], - "QDeuteronAlpha": ["", "*"], - "QDeuteronAlphaError": ["", "*"], - "QProtonAlpha": ["", "*"], - "QProtonAlphaError": ["", "*"], - "QNeutronAlpha": ["", "*"], - "QNeutronAlphaError": ["", "*"], - } + return { + "A": [""], + "OneNeutronSeparationEnergy": ["", "*"], + "OneNeutronSeparationEnergyError": ["", "*"], + "OneProtonSeparationEnergy": ["", "*"], + "OneProtonSeparationEnergyError": ["", "*"], + "QFourBeta": ["", "*"], + "QFourBetaError": ["", "*"], + "QDeuteronAlpha": ["", "*"], + "QDeuteronAlphaError": ["", "*"], + "QProtonAlpha": ["", "*"], + "QProtonAlphaError": ["", "*"], + "QNeutronAlpha": ["", "*"], + "QNeutronAlphaError": ["", "*"], + } def read_file(self) -> pd.DataFrame: """Read the file using it's known format @@ -91,7 +85,7 @@ def read_file(self) -> pd.DataFrame: column names, data types and locations of the date so we can now make the generic call to parse the file. """ - df = pd.read_fwf( + df = Converter.read_fwf( self.filename, colspecs=self.column_limits, names=self._column_names(), @@ -117,6 +111,6 @@ def read_file(self) -> pd.DataFrame: # Repeated column heading also means we have to cast to create new columns df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) - df["Symbol"] = pd.to_numeric(df["Z"]).map(self.z_to_symbol) + df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) return df.astype(self._data_types()) diff --git a/src/nuclearmasses/io/nubase.py b/src/nuclearmasses/io/nubase.py new file mode 100644 index 0000000..98d8e00 --- /dev/null +++ b/src/nuclearmasses/io/nubase.py @@ -0,0 +1,34 @@ +from importlib.resources.abc import Traversable + +import pandas as pd + +from nuclearmasses.io.nubase_parse import NUBASEParser + + +class NUBASE: + """Top level storage and functionality for NUBASE data""" + + def __init__(self, data_path: Traversable): + self.data_path = data_path + self.years: list[int] = [1995, 2003, 2012, 2016, 2020] + self.nubase_files: list[str] = [ + "nubtab97.asc", + "nubtab03.asc", + "nubtab12.asc", + "nubase2016.txt", + "nubase_1.mas20", + ] + self.files: dict[int, str] = dict(zip(self.years, self.nubase_files, strict=True)) + self.nubase_df: pd.DataFrame = self.parse_all_files() + + def get_datafile(self, year: int) -> Traversable: + """Use the given ``year`` to locate the NUBASE mass table file and return the absolute path.""" + return self.data_path / str(year) / self.files[year] + + def parse_year(self, year: int) -> pd.DataFrame: + """Parse the file of the given ``year``""" + return NUBASEParser(filename=self.get_datafile(year), year=year).read_file() + + def parse_all_files(self) -> pd.DataFrame: + """Parse the files for all available years""" + return pd.concat((self.parse_year(y) for y in self.years), ignore_index=True) diff --git a/src/nuclearmasses/io/nubase_file.py b/src/nuclearmasses/io/nubase_file.py index 82035c9..157a3e9 100644 --- a/src/nuclearmasses/io/nubase_file.py +++ b/src/nuclearmasses/io/nubase_file.py @@ -1,7 +1,4 @@ -from nuclearmasses.utils.converter import Converter - - -class NUBASEFile(Converter): +class NUBASEFile: """Easy access to the variables in the NUBASE file. The NUBASE data file is formatted by location in the line, values exist @@ -10,9 +7,9 @@ class NUBASEFile(Converter): magic numbers. """ - def __init__(self, year: int): + def __init__(self, year: int, **kwargs): """Setup the values that locate the variable.""" - super().__init__() + super().__init__(**kwargs) match year: case 1995: self.HEADER = 0 diff --git a/src/nuclearmasses/io/nubase_parse.py b/src/nuclearmasses/io/nubase_parse.py index b70eac4..3fbe04b 100644 --- a/src/nuclearmasses/io/nubase_parse.py +++ b/src/nuclearmasses/io/nubase_parse.py @@ -1,22 +1,22 @@ import logging -import pathlib import typing import pandas as pd from nuclearmasses.io.nubase_file import NUBASEFile +from nuclearmasses.utils.converter import Converter, DataInput -class NUBASEParser(NUBASEFile): +class NUBASEParser(NUBASEFile, Converter): """Parse the NUBASE data file. A collection of functions to parse the weird format of the NUBASE file. """ - def __init__(self, filename: pathlib.Path, year: int): + def __init__(self, filename: DataInput, year: int): """Set the file to read and the table year.""" - super().__init__(year) - self.filename: pathlib.Path = filename + super().__init__(year=year) + self.filename: DataInput = filename self.year: int = year self.unit_replacements: dict[str, str] = { r"y$": "yr", @@ -26,92 +26,67 @@ def __init__(self, filename: pathlib.Path, year: int): def _column_names(self) -> list[str]: """Set the column name depending on the year""" - match self.year: - case 1995 | 2003: - return [ - "A", - "Z", - "State", - "NUBASEMassExcess", - "NUBASEMassExcessError", - "IsomerEnergy", - "IsomerEnergyError", - "HalfLifeValue", - "HalfLifeUnit", - "HalfLifeError", - "Spin", - "DecayModes", - ] - case _: - return [ - "A", - "Z", - "State", - "NUBASEMassExcess", - "NUBASEMassExcessError", - "IsomerEnergy", - "IsomerEnergyError", - "HalfLifeValue", - "HalfLifeUnit", - "HalfLifeError", - "Spin", - "DiscoveryYear", - "DecayModes", - ] + col_names = [ + "A", + "Z", + "State", + "NUBASEMassExcess", + "NUBASEMassExcessError", + "IsomerEnergy", + "IsomerEnergyError", + "HalfLifeValue", + "HalfLifeUnit", + "HalfLifeError", + "Spin", + "DiscoveryYear", + "DecayModes", + ] + + # The discovery year was added after 2003, and I assume it will be there in the future, so we will set up + # as if it is always present and delete for the first two tables. + if self.year == 1995 or self.year == 2003: + col_names.remove("DiscoveryYear") + + return col_names def _data_types(self) -> dict: """Set the data type depending on the year""" - match self.year: - case 1995 | 2003: - return { - "Symbol": "string", - "A": "Int64", - "Z": "Int64", - "N": "Int64", - "Experimental": "boolean", - # "State": "Int64", - "NUBASEMassExcess": "float64", - "NUBASEMassExcessError": "float64", - # "IsomerEnergy": "float64", - # "IsomerEnergyError": "float64", - "HalfLifeValue": "float64", - "HalfLifeUnit": "string", - "HalfLifeError": "float64", - "HalfLifeSeconds": "float64", - "HalfLifeErrorSeconds": "float64", - "Spin": "string", - "DecayModes": "string", - } - case _: - return { - "Symbol": "string", - "A": "Int64", - "Z": "Int64", - "N": "Int64", - "Experimental": "boolean", - # "State": "Int64", - "NUBASEMassExcess": "float64", - "NUBASEMassExcessError": "float64", - # "IsomerEnergy": "float64", - # "IsomerEnergyError": "float64", - "HalfLifeValue": "float64", - "HalfLifeUnit": "string", - "HalfLifeError": "float64", - "HalfLifeSeconds": "float64", - "HalfLifeErrorSeconds": "float64", - "Spin": "string", - "DiscoveryYear": "Int64", - "DecayModes": "string", - } + data_types = { + "Symbol": "string", + "A": "Int64", + "Z": "Int64", + "N": "Int64", + "Experimental": "boolean", + # "State": "Int64", + "NUBASEMassExcess": "float64", + "NUBASEMassExcessError": "float64", + # "IsomerEnergy": "float64", + # "IsomerEnergyError": "float64", + "HalfLifeValue": "float64", + "HalfLifeUnit": "string", + "HalfLifeError": "float64", + "HalfLifeSeconds": "float64", + "HalfLifeErrorSeconds": "float64", + "Spin": "string", + "DiscoveryYear": "Int64", + "DecayModes": "string", + } + + # The discovery year was added after 2003, and I assume it will be there in the future, so we will set up + # as if it is always present and delete for the first two tables. + if self.year == 1995 or self.year == 2003: + data_types.pop("DiscoveryYear") + + return data_types def _na_values(self) -> dict: """Set the columns that have placeholder values""" match self.year: case 1995: return { + "State": [""], "NUBASEMassExcess": [""], "NUBASEMassExcessError": [""], - "State": [""], "HalfLifeValue": [""], "HalfLifeUnit": [""], "HalfLifeError": [""], @@ -137,8 +112,6 @@ def parse_half_life(self, raw_df) -> pd.DataFrame: any type of sorting or algorithm. Convert to the SI unit of seconds, but don't overwrite original columns. """ # Convert stable isotopes into ones with enormous lifetimes with zero error so we can cast - # pandas v3 became much stricter with type conversions so convert to object (from string) so - # we can assign a float without breaking other parts of the code raw_df["HalfLifeValue"] = raw_df["HalfLifeValue"].astype("object") raw_df["HalfLifeError"] = raw_df["HalfLifeError"].astype("object") @@ -151,6 +124,7 @@ def parse_half_life(self, raw_df) -> pd.DataFrame: # Use the 3 half-life columns to create 2 new columns with units of seconds raw_df["HalfLifeUnit"] = raw_df["HalfLifeUnit"].astype("string") + # Bookkeeping: Tidy up know unusual units, i.e. y for years and m for minutes for pattern, replacement in self.unit_replacements.items(): raw_df["HalfLifeUnit"] = raw_df["HalfLifeUnit"].str.replace(pattern, replacement, regex=True) @@ -177,6 +151,18 @@ def parse_state(self, raw_df) -> pd.DataFrame: return raw_df + def calculate_relative_error(self, raw_df) -> pd.DataFrame: + """Calculate the relative error of the mass excess + + 12C has a 0.0 +/- 0.0 mass excess definition by definition so ensure that is still true. + """ + raw_df["NUBASERelativeError"] = abs( + raw_df["NUBASEMassExcessError"].astype(float) / raw_df["NUBASEMassExcess"].astype(float) + ) + raw_df.loc[(raw_df.Z == 6) & (raw_df.A == 12), "NUBASERelativeError"] = 0.0 + + return raw_df + def read_file(self) -> pd.DataFrame: """Read the file using it's known format @@ -184,7 +170,7 @@ def read_file(self) -> pd.DataFrame: column names, data types and locations of the date so we can now make the generic call to parse the file. """ - df = pd.read_fwf( + df = Converter.read_fwf( self.filename, colspecs=typing.cast(typing.Sequence[tuple[int, int]], self.column_limits), # appease mypy names=self._column_names(), @@ -203,9 +189,14 @@ def read_file(self) -> pd.DataFrame: df.replace("#", "", regex=True, inplace=True) df = self.parse_half_life(df) + df = self.calculate_relative_error(df) + + if self.year == 2012: + # 198Au has a typo in it's decay mode in the 2012 table. It is recorded as '-' + df.loc[(df.A == 198) & (df.Z == 79), "DecayModes"] = "B-" df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) - df["Symbol"] = pd.to_numeric(df["Z"]).map(self.z_to_symbol) + df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) return df.astype(self._data_types()) diff --git a/src/nuclearmasses/mass_table.py b/src/nuclearmasses/mass_table.py index 4b410e9..9d74028 100644 --- a/src/nuclearmasses/mass_table.py +++ b/src/nuclearmasses/mass_table.py @@ -1,12 +1,9 @@ import importlib.resources -import pathlib import pandas as pd -from nuclearmasses.io.ame_mass_parse import AMEMassParser -from nuclearmasses.io.ame_reaction_1_parse import AMEReactionParserOne -from nuclearmasses.io.ame_reaction_2_parse import AMEReactionParserTwo -from nuclearmasses.io.nubase_parse import NUBASEParser +from nuclearmasses.io.ame import AME +from nuclearmasses.io.nubase import NUBASE class MassTable: @@ -15,113 +12,17 @@ class MassTable: Internally there are separate dataframes for the NUBASE and AME data as well as a combined one for all data """ - def __init__(self): - """Do all of the work at construction.""" - self.data_path = importlib.resources.files("nuclearmasses.data") - self.nubase_years = [1995, 2003, 2012, 2016, 2020] - self.nubase = pd.concat([self._parse_nubase_data(y) for y in self.nubase_years], ignore_index=True) - self.ame_years = [1983, 1993, 1995, 2003, 2012, 2016, 2020] - self.ame = pd.concat([self._parse_ame_data(y) for y in self.ame_years], ignore_index=True) - self.full_data = self._combine_all_data() - self._do_indexing() + def __init__(self) -> None: + self._complete_df: pd.DataFrame = self._parse_files() - def _get_nubase_datafile(self, year: int) -> pathlib.Path: - """Use the given year to locate the NUBASE mass table file and return the absolute path.""" - nubase_mass = self.data_path / pathlib.Path(str(year)) - nubase_mass = nubase_mass.resolve() + def _parse_files(self) -> pd.DataFrame: + data_path = importlib.resources.files("nuclearmasses").joinpath("data") - match year: - case 1995: - nubase_mass = nubase_mass / "nubtab97.asc" - case 2003: - nubase_mass = nubase_mass / "nubtab03.asc" - case 2012: - nubase_mass = nubase_mass / "nubtab12.asc" - case 2016: - nubase_mass = nubase_mass / "nubase2016.txt" - case 2020: - nubase_mass = nubase_mass / "nubase_1.mas20" - - return nubase_mass - - def _get_ame_datafiles(self, year: int) -> tuple[pathlib.Path, pathlib.Path, pathlib.Path]: - """Use the given year to locate the 3 AME data file and return the absolute paths.""" - data_dir = self.data_path / pathlib.Path(str(year)) - data_dir = data_dir.resolve() - - match year: - case 1983: - ame_mass = data_dir / "mass.mas83" - ame_reaction_1 = data_dir / "rct1.mas83" - ame_reaction_2 = data_dir / "rct2.mas83" - case 1993: - ame_mass = data_dir / "mass_exp.mas93" - ame_reaction_1 = data_dir / "rct1_exp.mas93" - ame_reaction_2 = data_dir / "rct2_exp.mas93" - case 1995: - ame_mass = data_dir / "mass_exp.mas95" - ame_reaction_1 = data_dir / "rct1_exp.mas95" - ame_reaction_2 = data_dir / "rct2_exp.mas95" - case 2003: - ame_mass = data_dir / "mass.mas03" - ame_reaction_1 = data_dir / "rct1.mas03" - ame_reaction_2 = data_dir / "rct2.mas03" - case 2012: - ame_mass = data_dir / "mass.mas12" - ame_reaction_1 = data_dir / "rct1.mas12" - ame_reaction_2 = data_dir / "rct2.mas12" - case 2016: - ame_mass = data_dir / "mass16.txt" - ame_reaction_1 = data_dir / "rct1-16.txt" - ame_reaction_2 = data_dir / "rct2-16.txt" - case 2020: - ame_mass = data_dir / "mass.mas20" - ame_reaction_1 = data_dir / "rct1.mas20" - ame_reaction_2 = data_dir / "rct2.mas20" - - return ame_mass, ame_reaction_1, ame_reaction_2 - - def _parse_nubase_data(self, year: int) -> pd.DataFrame: - """Get the NUBASE for the given year as a pandas.DataFrame.""" - return NUBASEParser(self._get_nubase_datafile(year), year).read_file() - - def _parse_ame_data(self, year: int) -> pd.DataFrame: - """Combine all the AME files from the given year into a pandas.DataFrame.""" - ame_mass, ame_reaction_1, ame_reaction_2 = self._get_ame_datafiles(year) - - ame_mass_df = AMEMassParser(ame_mass, year).read_file() - - # Merge all 3 of the AME files/data frames into one common_columns = ["A", "Z", "N", "TableYear", "Symbol"] - temp_df = ame_mass_df.merge(AMEReactionParserOne(ame_reaction_1, year).read_file(), on=common_columns) - return temp_df.merge(AMEReactionParserTwo(ame_reaction_2, year).read_file(), on=common_columns) - - def _combine_all_data(self) -> pd.DataFrame: - """Combine all NUBASE and AME data into a single pandas DataFrame.""" - common_columns = ["A", "Z", "N", "TableYear", "Symbol"] - df = pd.merge(self.ame, self.nubase, on=common_columns, how="outer") - - df["NUBASERelativeError"] = abs(df["NUBASEMassExcessError"] / df["NUBASEMassExcess"]) - df["AMERelativeError"] = abs(df["AMEMassExcessError"] / df["AMEMassExcess"]) - - # 12C has a 0.0 +/ 0.0 mass excess by definition so calculating relative error -> NaN - # Set the value to 0.0 as that's what it is - df.loc[(df.Symbol == "C") & (df.A == 12), "NUBASERelativeError"] = 0.0 - df.loc[(df.Symbol == "C") & (df.A == 12), "AMERelativeError"] = 0.0 - - # 198Au has a typo in it's decay mode in the 2012 table. It is recorded as '-' - df.loc[(df.A == 198) & (df.Z == 79) & (df.TableYear == 2012), "DecayModes"] = "B-" - - return df - - def _do_indexing(self) -> None: - """ - Set the index of the dataframe to the table year. This is done in place so nothing is returned. - param: Nothing + return pd.merge(AME(data_path).ame_df, NUBASE(data_path).nubase_df, on=common_columns, how="outer") - :return: Nothing - """ - self.nubase.set_index("TableYear", inplace=True) - self.ame.set_index("TableYear", inplace=True) - self.full_data.set_index("TableYear", inplace=True) + @property + def data(self) -> pd.DataFrame: + """Access the complete mass table dataframe""" + return self._complete_df diff --git a/src/nuclearmasses/utils/converter.py b/src/nuclearmasses/utils/converter.py index 527f3da..effd9f4 100644 --- a/src/nuclearmasses/utils/converter.py +++ b/src/nuclearmasses/utils/converter.py @@ -1,7 +1,15 @@ +import importlib +from importlib.resources.abc import Traversable +import os +import typing + import astropy # type: ignore[import-untyped] import numpy as np import pandas as pd +# Typing hint Union for the different ways a file or data can be represented +DataInput = Traversable | os.PathLike[str] | str | typing.TextIO + class Converter: """A utility class for converting between symbol and Z value @@ -10,8 +18,10 @@ class Converter: and the other symbol to Z. """ - def __init__(self) -> None: + def __init__(self, **kwargs) -> None: """Construct the symbol -> Z and Z -> symbol dictionaries.""" + # We are using multiple inheritance, so need this for MRO + super().__init__(**kwargs) # fmt: off # Formatter wants to put each item on it's own line, I don't self.z_to_symbol: dict[int, str] = { @@ -92,3 +102,23 @@ def unit_to_seconds(self, unit_str: str) -> float: return np.nan return float(unit.to(astropy.units.s)) + + @staticmethod + def read_fwf(base: DataInput, **kwargs): + """Overloaded version of pandas.read_fwf() that accepts more types + + Our use of importlib.resource means we have types that the pandas version of read_fwf does not accept. + It can still be used but some work is required. This function does that work, as well as some other checking + to make sure we can pass the necessary types into our parser classes. + """ + # A file like object + if hasattr(base, "read"): + return pd.read_fwf(base, **kwargs) # type: ignore[arg-type] + + # importlib.resource Traversable + if isinstance(base, Traversable): + with importlib.resources.as_file(base) as the_file: + return pd.read_fwf(the_file, **kwargs) + + # Filesystem path + return pd.read_fwf(base, **kwargs) diff --git a/tests/test_ame.py b/tests/test_ame.py new file mode 100644 index 0000000..1f111c8 --- /dev/null +++ b/tests/test_ame.py @@ -0,0 +1,55 @@ +import importlib.resources + +import pytest + +from nuclearmasses.io.ame import AME + + +@pytest.fixture +def ame(): + data_path = importlib.resources.files("nuclearmasses.data") + return AME(data_path=data_path) + + +def test_get_ame_datafiles(ame): + year = 1983 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass.mas83" + assert reaction01 == data_path / "rct1.mas83" + assert reaction02 == data_path / "rct2.mas83" + + year = 1993 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass_exp.mas93" + assert reaction01 == data_path / "rct1_exp.mas93" + assert reaction02 == data_path / "rct2_exp.mas93" + + year = 1995 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass_exp.mas95" + assert reaction01 == data_path / "rct1_exp.mas95" + assert reaction02 == data_path / "rct2_exp.mas95" + + year = 2012 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass.mas12" + assert reaction01 == data_path / "rct1.mas12" + assert reaction02 == data_path / "rct2.mas12" + + year = 2016 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass16.txt" + assert reaction01 == data_path / "rct1-16.txt" + assert reaction02 == data_path / "rct2-16.txt" + + year = 2020 + data_path = ame.data_path / str(year) + mass, reaction01, reaction02 = ame.get_datafiles(year) + assert mass == data_path / "mass.mas20" + assert reaction01 == data_path / "rct1.mas20" + assert reaction02 == data_path / "rct2.mas20" diff --git a/tests/test_ame_mass_parse.py b/tests/test_ame_mass_parse.py index 5ebba62..f2078e5 100644 --- a/tests/test_ame_mass_parse.py +++ b/tests/test_ame_mass_parse.py @@ -25,6 +25,7 @@ def test_1983_mass(): "N": [39], "AMEMassExcess": [-63742.471], "AMEMassExcessError": [19.056], + "AMERelativeError": [19.056 / 63742.471], "BindingEnergyPerA": [582618.683 / 67], "BindingEnergyPerAError": [19.066 / 67], "BetaDecayEnergy": [3560.871], @@ -56,6 +57,7 @@ def test_1993_mass(): "N": [41], "AMEMassExcess": [-46574.693], "AMEMassExcessError": [465.747], + "AMERelativeError": [465.747 / 46574.693], "BindingEnergyPerA": [567012.133 / 67], "BindingEnergyPerAError": [465.747 / 67], "BetaDecayEnergy": [8746.727], @@ -88,6 +90,7 @@ def test_1995_mass(): "N": [41], "AMEMassExcess": [-46574.693], "AMEMassExcessError": [465.747], + "AMERelativeError": [465.747 / 46574.693], "BindingEnergyPerA": [567012.133 / 67], "BindingEnergyPerAError": [465.747 / 67], "BetaDecayEnergy": [8746.727], @@ -119,6 +122,7 @@ def test_2003_mass(): "N": [41], "AMEMassExcess": [-45692.348], "AMEMassExcessError": [415.570], + "AMERelativeError": [415.570 / 45692.348], "BindingEnergyPerA": [8449.695], "BindingEnergyPerAError": [6.203], "BetaDecayEnergy": [9368.702], @@ -150,6 +154,7 @@ def test_2012_mass(): "N": [41], "AMEMassExcess": [-46068.530], "AMEMassExcessError": [217.972], + "AMERelativeError": [217.972 / 46068.530], "BindingEnergyPerA": [8455.310], "BindingEnergyPerAError": [3.253], "BetaDecayEnergy": [9253.245], @@ -181,6 +186,7 @@ def test_2016_mass(): "N": [41], "AMEMassExcess": [-45610.155], "AMEMassExcessError": [270.285], + "AMERelativeError": [270.285 / 45610.155], "BindingEnergyPerA": [8448.469], "BindingEnergyPerAError": [4.034], "BetaDecayEnergy": [9711.620], @@ -212,6 +218,7 @@ def test_2020_mass(): "N": [41], "AMEMassExcess": [-45708.416], "AMEMassExcessError": [3.819], + "AMERelativeError": [3.819 / 45708.416], "BindingEnergyPerA": [8449.9359], "BindingEnergyPerAError": [0.0570], "BetaDecayEnergy": [9613.3678], diff --git a/tests/test_parse.py b/tests/test_converter.py similarity index 85% rename from tests/test_parse.py rename to tests/test_converter.py index 4dc596e..cbe2aa8 100644 --- a/tests/test_parse.py +++ b/tests/test_converter.py @@ -40,8 +40,8 @@ def test_units_to_seconds(converter): assert converter.unit_to_seconds("d") == 86400.0 assert converter.unit_to_seconds("year") == 31557600.0 + +@pytest.mark.parametrize("unit", [5, "m", "Hz", "", " "]) +def test_nontime_unit_return_nan(converter, unit): # Don't use == on np.nan. Floating point numbers are complicated! - assert np.isnan(converter.unit_to_seconds(5)) - assert np.isnan(converter.unit_to_seconds("keV")) - assert np.isnan(converter.unit_to_seconds("")) - assert np.isnan(converter.unit_to_seconds(" ")) + assert np.isnan(converter.unit_to_seconds(unit)) diff --git a/tests/test_mass_table.py b/tests/test_mass_table.py index 931fb46..2331d5f 100644 --- a/tests/test_mass_table.py +++ b/tests/test_mass_table.py @@ -1,65 +1,8 @@ -import pytest - from nuclearmasses.mass_table import MassTable -@pytest.fixture -def mt(): - return MassTable() - - -def test_get_nubase_datafile(mt): - year = 1995 - assert mt._get_nubase_datafile(year) == mt.data_path / str(year) / "nubtab97.asc" - year = 2003 - assert mt._get_nubase_datafile(year) == mt.data_path / str(year) / "nubtab03.asc" - year = 2012 - assert mt._get_nubase_datafile(year) == mt.data_path / str(year) / "nubtab12.asc" - year = 2016 - assert mt._get_nubase_datafile(year) == mt.data_path / str(year) / "nubase2016.txt" - year = 2020 - assert mt._get_nubase_datafile(year) == mt.data_path / str(year) / "nubase_1.mas20" - - -def test_get_ame_datafiles(mt): - year = 1983 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass.mas83" - assert reaction01 == data_path / "rct1.mas83" - assert reaction02 == data_path / "rct2.mas83" - - year = 1993 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass_exp.mas93" - assert reaction01 == data_path / "rct1_exp.mas93" - assert reaction02 == data_path / "rct2_exp.mas93" - - year = 1995 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass_exp.mas95" - assert reaction01 == data_path / "rct1_exp.mas95" - assert reaction02 == data_path / "rct2_exp.mas95" - - year = 2012 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass.mas12" - assert reaction01 == data_path / "rct1.mas12" - assert reaction02 == data_path / "rct2.mas12" - - year = 2016 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass16.txt" - assert reaction01 == data_path / "rct1-16.txt" - assert reaction02 == data_path / "rct2-16.txt" +def test_initial_complete_parse(): + data = MassTable().data + expected_shape = (21421, 50) - year = 2020 - data_path = mt.data_path / str(year) - mass, reaction01, reaction02 = mt._get_ame_datafiles(year) - assert mass == data_path / "mass.mas20" - assert reaction01 == data_path / "rct1.mas20" - assert reaction02 == data_path / "rct2.mas20" + assert expected_shape == data.shape diff --git a/tests/test_nubase.py b/tests/test_nubase.py new file mode 100644 index 0000000..2efaf3a --- /dev/null +++ b/tests/test_nubase.py @@ -0,0 +1,24 @@ +import importlib.resources + +import pytest + +from nuclearmasses.io.nubase import NUBASE + + +@pytest.fixture +def nubase(): + data_path = importlib.resources.files("nuclearmasses.data") + return NUBASE(data_path=data_path) + + +def test_get_nubase_datafile(nubase): + year = 1995 + assert nubase.get_datafile(year) == nubase.data_path / str(year) / "nubtab97.asc" + year = 2003 + assert nubase.get_datafile(year) == nubase.data_path / str(year) / "nubtab03.asc" + year = 2012 + assert nubase.get_datafile(year) == nubase.data_path / str(year) / "nubtab12.asc" + year = 2016 + assert nubase.get_datafile(year) == nubase.data_path / str(year) / "nubase2016.txt" + year = 2020 + assert nubase.get_datafile(year) == nubase.data_path / str(year) / "nubase_1.mas20" diff --git a/tests/test_nubase_parse.py b/tests/test_nubase_parse.py index 1989a66..08740f7 100644 --- a/tests/test_nubase_parse.py +++ b/tests/test_nubase_parse.py @@ -23,6 +23,7 @@ def test_1995_nubase(): "N": [101], "NUBASEMassExcess": [-60085], "NUBASEMassExcessError": [29], + "NUBASERelativeError": [29 / 60085], "HalfLifeValue": [2.99], "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], @@ -55,6 +56,7 @@ def test_2003_nubase(): "N": [101], "NUBASEMassExcess": [-60070], "NUBASEMassExcessError": [30], + "NUBASERelativeError": [30 / 60070], "HalfLifeValue": [2.99], "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], @@ -87,6 +89,7 @@ def test_2012_nubase(): "N": [101], "NUBASEMassExcess": [-60060], "NUBASEMassExcessError": [30], + "NUBASERelativeError": [30 / 60060], "HalfLifeValue": [2.99], "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], @@ -120,6 +123,7 @@ def test_2016_nubase(): "N": [101], "NUBASEMassExcess": [-60060], "NUBASEMassExcessError": [30], + "NUBASERelativeError": [30 / 60060], "HalfLifeValue": [2.99], "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], @@ -154,6 +158,7 @@ def test_2020_nubase(): "N": [101], "NUBASEMassExcess": [-60060], "NUBASEMassExcessError": [30], + "NUBASERelativeError": [30 / 60060], "HalfLifeValue": [2.99], "HalfLifeUnit": ["min"], "HalfLifeError": [0.07],