diff --git a/README.md b/README.md index f84e40f..bc32814 100644 --- a/README.md +++ b/README.md @@ -42,10 +42,13 @@ The package is available on the Python Package Index so can be installed via pip pip install nuclearmasses ``` -Or you can clone the latest version from github. -All work is done on a feature branch so cloning and using `main` should be the same as using the latest installed version from pip. +Or you can clone the latest version from github and install locally. +All work is done on a feature branch so cloning and using `main` should be the similar to using the latest installed version from pip. +There may be some additional functionality, but nothing should have been removed. ```bash git clone https://github.com/php1ic/nuclearmasses +cd nuclearmasses +pip install -e . ``` ## Usage @@ -60,67 +63,117 @@ The combination of AME and NUBASE values from all years is available as a single >>> df = MassTable().data ``` You can then interrogate, or extract, whatever information you want. -For example, how has the mass excess and it's accuracy changed overtime for 190Re according to the AME +For example, how has the mass excess and its accuracy changed overtime for 190Re according to the AME ```python ->>> df[(df['A'] == 190) & (df['Symbol'] == 'Re')][['AMEMassExcess', 'AMEMassExcessError']] - AMEMassExcess AMEMassExcessError -16054 -35536.605 200.029 -16055 -35557.789 145.549 -16056 -35568.032 212.151 -16057 -35566.326 149.248 -16058 -35634.992 70.542 -16059 -35635.830 70.852 -16060 -35583.015 4.870 +>>> df[(df['A'] == 190) & (df['Symbol'] == 'Re')][['TableYear', 'AMEMassExcess', 'AMEMassExcessError']] + TableYear AMEMassExcess AMEMassExcessError +16054 1983 -35536.605 200.029 +16055 1993 -35557.789 145.549 +16056 1995 -35568.032 212.151 +16057 2003 -35566.326 149.248 +16058 2012 -35634.992 70.542 +16059 2016 -35635.830 70.852 +16060 2020 -35583.015 4.870 ``` -Or how does the mass excess of gold vary across the isotopic chain according to NUBASE in the most recent table for both experimentally measured and theoretical values +Or how does the mass excess of lithium vary across the isotopic chain according to NUBASE in the most recent table for both experimentally measured and theoretical values ```python ->>> df.query("TableYear == 2020 and Symbol == 'Au'")[['A', 'NUBASEMassExcess', 'NUBASEMassExcessError', 'Experimental']] - A NUBASEMassExcess NUBASEMassExcessError Experimental -14084 168 2530.0 400.0 False -14189 169 -1790.0 300.0 False -14291 170 -3700.0 200.0 False -14391 171 -7562.0 21.0 True -14492 172 -9320.0 60.0 True -14591 173 -12832.0 23.0 True -14687 174 -14060.0 100.0 False -14781 175 -17400.0 40.0 True -14874 176 -18520.0 30.0 True -14968 177 -21546.0 10.0 True -15060 178 -22303.0 10.0 True -15153 179 -24989.0 12.0 True -15244 180 -25626.0 5.0 True -15334 181 -27871.0 20.0 True -15419 182 -28304.0 19.0 True -15503 183 -30191.0 9.0 True -15588 184 -30319.0 22.0 True -15673 185 -31858.1 2.6 True -15757 186 -31715.0 21.0 True -15842 187 -33029.0 22.0 True -15926 188 -32371.3 2.7 True -16007 189 -33582.0 20.0 True -16088 190 -32834.0 3.0 True -16164 191 -33798.0 5.0 True -16243 192 -32772.0 16.0 True -16320 193 -33405.0 9.0 True -16401 194 -32211.9 2.1 True -16480 195 -32567.1 1.1 True -16560 196 -31138.7 3.0 True -16637 197 -31139.8 0.5 True -16713 198 -29580.8 0.5 True -16788 199 -29093.8 0.5 True -16861 200 -27240.0 27.0 True -16935 201 -26401.0 3.0 True -17012 202 -24353.0 23.0 True -17089 203 -23143.0 3.0 True -17163 204 -20390.0 200.0 False -17237 205 -18570.0 200.0 False -17308 206 -14190.0 300.0 False -17382 207 -10640.0 300.0 False -17456 208 -5910.0 300.0 False -17528 209 -2230.0 400.0 False -17603 210 2680.0 400.0 False +>>> df.query("TableYear == 2020 and Symbol == 'Li'")[['A', 'NUBASEMassExcess', 'NUBASEMassExcessError', 'Experimental']] + A NUBASEMassExcess NUBASEMassExcessError Experimental +38 3 28670.0000 2000.0000 False +59 4 25320.0000 210.0000 True +79 5 11680.0000 50.0000 True +104 6 14086.8804 0.0014 True +133 7 14907.1050 0.0040 True +161 8 20945.8000 0.0500 True +196 9 24954.9100 0.1900 True +229 10 33053.0000 13.0000 True +264 11 40728.3000 0.6000 True +298 12 49010.0000 30.0000 True +336 13 56980.0000 70.0000 True ``` +### Adding User Data + +Functionality exists for a user to add their own data into the combined table. +The function `add_user_data()` is a method of the `MassTable` class and takes data in [json](https://www.json.org) format and adds it to the table. + +An identifier column `DataSource` already exists and for all published data and is set to 0. +Along with providing the new data, a user can specify a value for `DataSource`, otherwise a value of 1 is automatically assigned. +The ability to specify a value means multiple data sources can be added whilst maintaining the ability to distinguish. +If no value is given, the value of 1 will always be used. + +To ensure uniqueness, values for `A` and `Z` must be part of the data. +With the assumption that you would like to compare and contrast this new data with the published values, the name associated with your data must also match existing columns. +I know this doesn't quite make sense, as you might not necessarily want to assign your new mass to either AME or NUBASE, but in general, the columns are generic so I'm sure you'll work it out. + +Let's imagine I have a new measurement for the mass excess of 100Ag at -78136.4 +/- 0.6 and I want to add it to the table +```python +>>> # Addition is done on the class level so create an instance of the MassTable +>>> from nuclearmasses.mass_table import MassTable +>>> table = MassTable() +>>> # This step doesn't need to be done, but, for demonstration purposes, +>>> # extract the table data and check the current details for 100Ag +>>> df = table.data +>>> df.query("Symbol == 'Ag' and A == 100")[['A', 'Z', 'NUBASEMassExcess', 'NUBASEMassExcessError', 'DataSource']]) + A NUBASEMassExcess NUBASEMassExcessError DataSource +6976 100 NaN NaN 0 +6977 100 NaN NaN 0 +6978 100 -78180.0 80.0 0 +6979 100 -78150.0 80.0 0 +6980 100 -78138.0 5.0 0 +6981 100 -78138.0 5.0 0 +6982 100 -78138.0 5.0 0 +>>> # Add our new value to the NUBASE columns +>>> table.add_user_data('[{"A": 100, "Z": 47, "NUBASEMassExcess": -78136.4, "NUBASEMassExcessError": 0.6}]') +>>> # The underlying table has been modified, so we need to get the latest version +>>> df = table.data +>>> # Re-run the query to see the new value, notice the value for DataSource has been set to 1 +>>> df.query("Symbol == 'Ag' and A == 100")[['A', 'Z', 'NUBASEMassExcess', 'NUBASEMassExcessError', 'DataSource']]) + A NUBASEMassExcess NUBASEMassExcessError DataSource +6976 100 NaN NaN 0 +6977 100 NaN NaN 0 +6978 100 -78180.0 80.0 0 +6979 100 -78150.0 80.0 0 +6980 100 -78138.0 5.0 0 +6981 100 -78138.0 5.0 0 +6982 100 -78138.0 5.0 0 +21421 100 -78136.4 0.6 1 +>>> # We can add the same data but this time assign to a different source +>>> table.add_user_data('[{"A": 100, "Z": 47, "NUBASEMassExcess": -78136.4, "NUBASEMassExcessError": 0.6}]', source=5) +>>> # Again, this modifies the underlying dataframe so we need to fetch the updated version +>>> df = table.data +>>> # Run the query and see that our new data is there twice against two different sources +>>> df.query("Symbol == 'Ag' and A == 100")[['A', 'Z', 'NUBASEMassExcess', 'NUBASEMassExcessError', 'DataSource']]) + A NUBASEMassExcess NUBASEMassExcessError DataSource +6976 100 NaN NaN 0 +6977 100 NaN NaN 0 +6978 100 -78180.0 80.0 0 +6979 100 -78150.0 80.0 0 +6980 100 -78138.0 5.0 0 +6981 100 -78138.0 5.0 0 +6982 100 -78138.0 5.0 0 +21421 100 -78136.4 0.6 1 +21422 100 -78136.4 0.6 5 +``` + +Now let's imagine that we have a large new data set in a json file, but have forgotten to add the `Experimental` attribute and for reasons, it is not possible to edit or update the file with this additional information. +When reading in the file, we can pass a dictionary as a parameter, and the keys will be used as the column names, with the values used as value to for all new isotopes. + +In the following example, we will assume the data is in a file `new_data.json` and of type [pathlib](https://docs.python.org/3/library/pathlib.html). +```python +>>> import pathlib +>>> from nuclearmasses.mass_table import MassTable +>>> table = MassTable() +>>> missed_values = {'Experimental': True} +>>> new_data = pathlib.Path('new_data.json') +>>> table.add_user_data(new_data, common_values=missed_values) +>>> # All isotopes from new_data.json will have their Experimental column assigned to True +``` + +If an existing column is not populated with new data, it is assigned the value pd.NA. +We do not try and infer what a value could, should or might be. + + ## Contributing If you have ideas for additional functionality or find bugs please create an [issue](https://github.com/php1ic/nuclearmasses/issues) or better yet a [pull request](https://github.com/php1ic/nuclearmasses/pulls). diff --git a/src/nuclearmasses/io/ame.py b/src/nuclearmasses/io/ame.py index 089cbc0..30afdf0 100644 --- a/src/nuclearmasses/io/ame.py +++ b/src/nuclearmasses/io/ame.py @@ -41,7 +41,7 @@ def parse_year(self, year: int) -> pd.DataFrame: rct2_df = AMEReactionParserTwo(filename=ame_reaction_2, year=year).read_file() # Merge all 3 of the AME dataframes into one - common_columns = ["A", "Z", "N", "TableYear", "Symbol"] + common_columns = ["A", "Z", "N", "TableYear", "Symbol", "DataSource"] return mass_df.merge(rct1_df, on=common_columns, how="outer").merge(rct2_df, on=common_columns, how="outer") def parse_all_years(self) -> pd.DataFrame: diff --git a/src/nuclearmasses/io/ame_mass_parse.py b/src/nuclearmasses/io/ame_mass_parse.py index 1ce5f43..30da1c5 100644 --- a/src/nuclearmasses/io/ame_mass_parse.py +++ b/src/nuclearmasses/io/ame_mass_parse.py @@ -53,6 +53,7 @@ def _data_types(self) -> dict: "BetaDecayEnergyError": "float64", "AtomicMass": "float64", "AtomicMassError": "float64", + "DataSource": "Int64", } def _na_values(self) -> dict: @@ -130,5 +131,6 @@ def read_file(self) -> pd.DataFrame: df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) + df["DataSource"] = 0 return df.astype(self._data_types()) diff --git a/src/nuclearmasses/io/ame_reaction_1_parse.py b/src/nuclearmasses/io/ame_reaction_1_parse.py index c4be262..0d38e47 100644 --- a/src/nuclearmasses/io/ame_reaction_1_parse.py +++ b/src/nuclearmasses/io/ame_reaction_1_parse.py @@ -58,6 +58,7 @@ def _data_types(self) -> dict: "QEpsilonError": "float64", "QBetaNeutron": "float64", "QBetaNeutronError": "float64", + "DataSource": "Int64", } def _na_values(self) -> dict: @@ -108,5 +109,6 @@ def read_file(self) -> pd.DataFrame: df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) + df["DataSource"] = 0 return df.astype(self._data_types()) diff --git a/src/nuclearmasses/io/ame_reaction_2_parse.py b/src/nuclearmasses/io/ame_reaction_2_parse.py index d6990d8..965be9c 100644 --- a/src/nuclearmasses/io/ame_reaction_2_parse.py +++ b/src/nuclearmasses/io/ame_reaction_2_parse.py @@ -58,6 +58,7 @@ def _data_types(self) -> dict: "QProtonAlphaError": "float64", "QNeutronAlpha": "float64", "QNeutronAlphaError": "float64", + "DataSource": "Int64", } def _na_values(self) -> dict: @@ -112,5 +113,6 @@ def read_file(self) -> pd.DataFrame: df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) + df["DataSource"] = 0 return df.astype(self._data_types()) diff --git a/src/nuclearmasses/io/nubase_parse.py b/src/nuclearmasses/io/nubase_parse.py index 3fbe04b..d350b90 100644 --- a/src/nuclearmasses/io/nubase_parse.py +++ b/src/nuclearmasses/io/nubase_parse.py @@ -70,6 +70,7 @@ def _data_types(self) -> dict: "Spin": "string", "DiscoveryYear": "Int64", "DecayModes": "string", + "DataSource": "Int64", } # The discovery year was added after 2003, and I assume it will be there in the future, so we will set up @@ -198,5 +199,6 @@ def read_file(self) -> pd.DataFrame: df["TableYear"] = self.year df["N"] = pd.to_numeric(df["A"]) - pd.to_numeric(df["Z"]) df["Symbol"] = pd.to_numeric(df["Z"]).map(self.get_symbol) + df["DataSource"] = 0 return df.astype(self._data_types()) diff --git a/src/nuclearmasses/mass_table.py b/src/nuclearmasses/mass_table.py index 9d74028..745c13a 100644 --- a/src/nuclearmasses/mass_table.py +++ b/src/nuclearmasses/mass_table.py @@ -1,9 +1,14 @@ +from difflib import get_close_matches import importlib.resources +import io +import pathlib +import typing import pandas as pd from nuclearmasses.io.ame import AME from nuclearmasses.io.nubase import NUBASE +from nuclearmasses.utils.converter import Converter class MassTable: @@ -18,10 +23,81 @@ def __init__(self) -> None: def _parse_files(self) -> pd.DataFrame: data_path = importlib.resources.files("nuclearmasses").joinpath("data") - common_columns = ["A", "Z", "N", "TableYear", "Symbol"] + common_columns = ["A", "Z", "N", "TableYear", "Symbol", "DataSource"] return pd.merge(AME(data_path).ame_df, NUBASE(data_path).nubase_df, on=common_columns, how="outer") + def add_user_data( + self, + data: str | pathlib.Path | typing.IO, + source: int = 1, + common_values: dict[str, typing.Any] | None = None, + ) -> None: + """Merge user data into the mass table""" + # We are going to force at least 3 columns in the user data + # Two in the input file: A and Z to uniquely identify the isotope + # One via code: DataSource to differentiate from the original table data + required_columns = {"A", "Z", "DataSource"} + + # Is the string a json string or filename + if isinstance(data, str): + path = pathlib.Path(data) + + if path.is_file(): + data = path + else: + data = io.StringIO(data) + + # Read the file, should be valid json so nice and simple + user_df: pd.DataFrame = pd.read_json(data, dtype={"A": int, "Z": int}) + + # Add any additional data that is constant for the user data, e.g. TableYear + if common_values is not None: + for k, v in common_values.items(): + user_df[k] = v + + # We need to validate the columns so let's get a unique list + user_columns = set(user_df.columns) + # The symbol is commonly used so if it wasn't in the file, create it as a column + if "Symbol" not in user_columns: + user_df["Symbol"] = pd.to_numeric(user_df["Z"]).map(Converter().get_symbol) + + # Set the source value using the function parameter if it hasn't already been set + if "DataSource" not in user_columns: + user_df["DataSource"] = source + + # Refresh user column list + user_columns = set(user_df.columns) + + # Check we have the necessary columns + if missing := required_columns - user_columns: + raise ValueError(f"ERROR: Missing required columns: {missing}") + + # Check any columns, in addition to those required, match the existing ones + mt_columns = self._complete_df.columns + if unexpected := user_columns - set(mt_columns): + for col in unexpected: + msg = f"ERROR: Column '{col}' not recognised." + if suggestion := get_close_matches(col, mt_columns, n=1): + msg += f" Did you mean '{suggestion[0]}'?" + + raise ValueError(msg) + + # Confirm the provided columns are not empty or null + if user_df[list(required_columns)].isna().any().any(): + raise ValueError("Required columns have missing values.") + + # Check the user hasn't duplicated rows + if user_df.duplicated(subset=required_columns).any(): + raise ValueError("Duplicate rows, will not guess which should be used.") + + # Expand the user dataframe to have all the columns present in the mass table, setting those that aren't + # present to NaN. This isn't strictly necessary as concat aligns automatically, but it should hopefully avoid + # any issues later on + user_df = user_df.reindex(columns=mt_columns) + + self._complete_df = pd.concat([self._complete_df, user_df], ignore_index=True) + @property def data(self) -> pd.DataFrame: """Access the complete mass table dataframe""" diff --git a/tests/test_ame_mass_parse.py b/tests/test_ame_mass_parse.py index f2078e5..7ce15e4 100644 --- a/tests/test_ame_mass_parse.py +++ b/tests/test_ame_mass_parse.py @@ -32,6 +32,7 @@ def test_1983_mass(): "BetaDecayEnergyError": [20.646], "AtomicMass": [66.931579167], "AtomicMassError": [20.457 / 1.0e6], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -64,6 +65,7 @@ def test_1993_mass(): "BetaDecayEnergyError": [543.150], "AtomicMass": [66.950000000], "AtomicMassError": [500.0 / 1.0e6], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -97,6 +99,7 @@ def test_1995_mass(): "BetaDecayEnergyError": [543.150], "AtomicMass": [66.950000000], "AtomicMassError": [500.0 / 1.0e6], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -129,6 +132,7 @@ def test_2003_mass(): "BetaDecayEnergyError": [523.438], "AtomicMass": [66.950947244], "AtomicMassError": [446.132 / 1.0e6], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -161,6 +165,7 @@ def test_2012_mass(): "BetaDecayEnergyError": [218.067], "AtomicMass": [66.950543395], "AtomicMassError": [234.002 / 1.0e6], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -193,6 +198,7 @@ def test_2016_mass(): "BetaDecayEnergyError": [270.362], "AtomicMass": [66.951035482], "AtomicMassError": [290.163 / 1.0e6], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -225,6 +231,7 @@ def test_2020_mass(): "BetaDecayEnergyError": [7.4900], "AtomicMass": [66.950930], "AtomicMassError": [4.100 / 1.0e6], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) diff --git a/tests/test_ame_reaction_1_parse.py b/tests/test_ame_reaction_1_parse.py index a382556..d7825e4 100644 --- a/tests/test_ame_reaction_1_parse.py +++ b/tests/test_ame_reaction_1_parse.py @@ -36,6 +36,7 @@ def test_1983_rct1(): "QEpsilonError": [20], "QBetaNeutron": [-10640], "QBetaNeutronError": [200], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -71,6 +72,7 @@ def test_1993_rct1(): "QEpsilonError": [20.03], "QBetaNeutron": [-10622], "QBetaNeutronError": [230], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -106,6 +108,7 @@ def test_1995_rct1(): "QEpsilonError": [20.03], "QBetaNeutron": [-10682.00], "QBetaNeutronError": [207.60], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -141,6 +144,7 @@ def test_2003_rct1(): "QEpsilonError": [16.57], "QBetaNeutron": [-10561.10], "QBetaNeutronError": [44.19], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -176,6 +180,7 @@ def test_2012_rct1(): "QEpsilonError": [16.57], "QBetaNeutron": [-10557.95], "QBetaNeutronError": [30.67], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -211,6 +216,7 @@ def test_2016_rct1(): "QEpsilonError": [16.55], "QBetaNeutron": [-10555.52], "QBetaNeutronError": [30.67], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) @@ -246,6 +252,7 @@ def test_2020_rct1(): "QEpsilonError": [16.5459], "QBetaNeutron": [-10555.5245], "QBetaNeutronError": [30.6658], + "DataSource": [0], } ) expected = expected.astype(parser._data_types()) diff --git a/tests/test_ame_reaction_2_parse.py b/tests/test_ame_reaction_2_parse.py index f58ae7c..a104b57 100644 --- a/tests/test_ame_reaction_2_parse.py +++ b/tests/test_ame_reaction_2_parse.py @@ -34,6 +34,7 @@ def test_1983_rct2(): "QProtonAlphaError": [1.8], "QNeutronAlpha": [7180], "QNeutronAlphaError": [50], + "DataSource": [0], } ) @@ -70,6 +71,7 @@ def test_1993_rct2(): "QProtonAlphaError": [1.25], "QNeutronAlpha": [7690.59], "QNeutronAlphaError": [15.05], + "DataSource": [0], } ) @@ -106,6 +108,7 @@ def test_1995_rct2(): "QProtonAlphaError": [1.24], "QNeutronAlpha": [7702.97], "QNeutronAlphaError": [3.35], + "DataSource": [0], } ) @@ -142,6 +145,7 @@ def test_2003_rct2(): "QProtonAlphaError": [1.16], "QNeutronAlpha": [7701.54], "QNeutronAlphaError": [3.34], + "DataSource": [0], } ) @@ -178,6 +182,7 @@ def test_2012_rct2(): "QProtonAlphaError": [1.15], "QNeutronAlpha": [7701.67], "QNeutronAlphaError": [3.33], + "DataSource": [0], } ) @@ -214,6 +219,7 @@ def test_2016_rct2(): "QProtonAlphaError": [1.07], "QNeutronAlpha": [7700.97], "QNeutronAlphaError": [3.31], + "DataSource": [0], } ) @@ -250,6 +256,7 @@ def test_2020_rct2(): "QProtonAlphaError": [1.0721], "QNeutronAlpha": [7701.0380], "QNeutronAlphaError": [3.3084], + "DataSource": [0], } ) diff --git a/tests/test_mass_table.py b/tests/test_mass_table.py index 2331d5f..8d36866 100644 --- a/tests/test_mass_table.py +++ b/tests/test_mass_table.py @@ -1,8 +1,60 @@ +import pytest + from nuclearmasses.mass_table import MassTable -def test_initial_complete_parse(): - data = MassTable().data - expected_shape = (21421, 50) +@pytest.fixture +def the_table(): + return MassTable() + + +def test_initial_complete_parse(the_table): + expected_shape = (21421, 51) + assert expected_shape == the_table.data.shape + + +def test_valid_user_data(the_table): + data = '[{"A": 5, "Z": 2, "AMEMassExcess": 123456.7}]' + the_table.add_user_data(data) + df = the_table.data + assert (21422, 51) == df.shape + assert df[(df["A"] == 5) & (df["Z"] == 2) & (df["DataSource"] == 1)]["AMEMassExcess"].iloc[0] == 123456.7 + + +def test_common_value_user_data(the_table): + data = '[{"A": 5, "Z": 2, "AMEMassExcess": 123456.7}]' + common_val = {"TableYear": 2099} + the_table.add_user_data(data, common_values=common_val) + df = the_table.data + assert (21422, 51) == df.shape + assert df[(df["A"] == 5) & (df["Z"] == 2) & (df["DataSource"] == 1)]["TableYear"].iloc[0] == 2099 + + +def test_missing_column_user_data(the_table): + data = '[{"Z": 2, "AMEMassExcess": 123456.7}]' + with pytest.raises(ValueError, match="ERROR: Missing required columns:.*A.*"): + the_table.add_user_data(data) + + +def test_typo_column_user_data(the_table): + data = '[{"A": 5, "Z": 2, "AMEMassexcess": 123456.7}]' + with pytest.raises(ValueError, match="ERROR: Column.*not recognised. Did you mean.*?"): + the_table.add_user_data(data) + + +def test_invalid_column_user_data(the_table): + data = '[{"A": 5, "Z": 2, "MadeUpColumn": 123456.7}]' + with pytest.raises(ValueError, match="ERROR: Column.*not recognised."): + the_table.add_user_data(data) + + +def test_na_column_user_data(the_table): + data = '[{"A": 5, "Z": NaN, "AMEMassExcess": 123456.7}]' + with pytest.raises(ValueError, match="Required columns have missing values."): + the_table.add_user_data(data) + - assert expected_shape == data.shape +def test_duplicate_row_user_data(the_table): + data = '[{"A": 5, "Z": 2, "AMEMassExcess": 123456.7},{"A": 5, "Z": 2, "AMEMassExcess": 123456.7}]' + with pytest.raises(ValueError, match="Duplicate rows, will not guess which should be used."): + the_table.add_user_data(data) diff --git a/tests/test_nubase_parse.py b/tests/test_nubase_parse.py index 08740f7..20a7d0b 100644 --- a/tests/test_nubase_parse.py +++ b/tests/test_nubase_parse.py @@ -31,6 +31,7 @@ def test_1995_nubase(): "HalfLifeErrorSeconds": [4.2], "Spin": ["3+"], "DecayModes": ["B-=100"], + "DataSource": [0], } ) @@ -64,6 +65,7 @@ def test_2003_nubase(): "HalfLifeErrorSeconds": [4.2], "Spin": ["3+"], "DecayModes": ["B-=100"], + "DataSource": [0], } ) @@ -98,6 +100,7 @@ def test_2012_nubase(): "Spin": ["3+"], "DiscoveryYear": [1960], "DecayModes": ["B-=100"], + "DataSource": [0], } ) @@ -132,6 +135,7 @@ def test_2016_nubase(): "Spin": ["3+"], "DiscoveryYear": [1960], "DecayModes": ["B-=100"], + "DataSource": [0], } ) @@ -167,6 +171,7 @@ def test_2020_nubase(): "Spin": ["3+"], "DiscoveryYear": [1960], "DecayModes": ["B-=100"], + "DataSource": [0], } )