Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 63 additions & 4 deletions src/nuclearmasses/io/ame.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
The ame module defines the ``AME`` class to work on and store details related to the AME data.
"""

from importlib.resources.abc import Traversable

import pandas as pd
Expand All @@ -8,7 +12,31 @@


class AME:
"""Top level storage and functionality for AME data"""
"""
Container class to store details related to the years a AME table was published.

This is a high level class that tracks details, but delegates the parsing of the files.

Parameters
----------
data_path : Traversable
Absolute path to the data files location. Not an actual file, rather the top level directory that contains from
where we access the year then individual file

Attributes
----------
data_path : Traversable
Absolute path to the data files location. Not an actual file, rather the top level directory that contains from
where we access the year then individual file.
years : list[ints]
An ordered list of the years in which a NUBASE table was published.
ame_files : list[tuple[str, str, str]]
The filenames each AME data file in year order.
files : dict[int, tuple[str, str, str]]
A dictionary mapping published year to filename.
nubase_df : pandas.DataFrame
A dataframe containing the NUBASE data from all published years.
"""

def __init__(self, data_path: Traversable):
self.data_path = data_path
Expand All @@ -26,14 +54,38 @@ def __init__(self, data_path: Traversable):
self.ame_df: pd.DataFrame = self.parse_all_years()

def get_datafiles(self, year: int) -> tuple[Traversable, Traversable, Traversable]:
"""Use the given year to locate the 3 AME data file and return the absolute paths."""
"""
Construct the absolute paths to the files for the given ``year``.

Parameters
----------
year : int
The published year to get the file for.

Returns
-------
tuple[Traversable,Traversable,Traversable]
The absolute paths to the three AME data files.
"""
root = self.data_path / str(year)
mass, rct1, rct2 = self.files[year]

return root / mass, root / rct1, root / rct2

def parse_year(self, year: int) -> pd.DataFrame:
"""Combine all the AME files from the given ``year``"""
"""
Parse the data from the given ``year``.

Parameters
----------
year : int
The published year to get the data for.

Returns
-------
pandas.DataFrame
The data from ``year`` as a dataframe
"""
ame_mass, ame_reaction_1, ame_reaction_2 = self.get_datafiles(year)

mass_df = AMEMassParser(filename=ame_mass, year=year).read_file()
Expand All @@ -45,5 +97,12 @@ def parse_year(self, year: int) -> pd.DataFrame:
return mass_df.merge(rct1_df, on=common_columns, how="outer").merge(rct2_df, on=common_columns, how="outer")

def parse_all_years(self) -> pd.DataFrame:
"""Parse the files for all available years"""
"""
Parse the files for all available years.

Returns
-------
pandas.DataFrame
The data from all published years as a single dataframe.
"""
return pd.concat((self.parse_year(y) for y in self.years), ignore_index=True)
34 changes: 33 additions & 1 deletion src/nuclearmasses/io/ame_mass_file.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,37 @@
"""
The ame_mass_file module defines the ``AMEMassFile`` class. This class stores the column positions of the start and
finish location of the different parameters recorded in the AME mass data file. The positions have changed between
years so the year of the table is given as a parameter at construction.
"""


class AMEMassFile:
"""Easy access to the variables in the AME mass file."""
"""
Storage class for the data in the AME mass data file.

The AME mass data file is fixed-width file format so we will store the format details in this class.

Note we have not listed all parameters in the attributes section as there are so many. The naming convention is
however shown, along with a description.

Parameters
----------
year : int
The year the file being parsed was published

Attributes
----------
HEADER : int
The number of lines in the file to be interpreted as the header.
FOOTER : int
The number of lines in the file to be interpreted as the footer.
START_X : int
The first column of parameter X.
END_X : int or None
The last column of parameter X or None to represent the end of the line.
column_limits : list[tuple[int, int]]
The start and end positions of all parameters as a list of tuples that can be passed to :meth:`pandas.read_fwf`.
"""

def __init__(self, year: int, **kwargs):
super().__init__(**kwargs)
Expand Down
87 changes: 72 additions & 15 deletions src/nuclearmasses/io/ame_mass_parse.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,51 @@
"""
The ame_mass_parse module defines the ``AMEMassParser`` class. This class contains the logic required to sort and
organise the inputs to :meth:`pandas.read_fwf` dependent on the year of the file. Once parsed, known typos and
inconsistencies are cleaned from the resultant dataframe.
"""

import pandas as pd

from nuclearmasses.io.ame_mass_file import AMEMassFile
from nuclearmasses.utils.converter import Converter, DataInput


class AMEMassParser(AMEMassFile, Converter):
"""Parse the AME mass file.

The format is known but the provided string does not match all lines.
We will therefore use START and END markers, which are inherited, and
read the columns are interested in.
"""
Parse the AME mass file, doing the necessary preparation and clean ups of data.

There are some quirks to the format used in the file. It's based on fixed-width format, but deviates in various
places so additional work is required once the file is parsed.

Parameters
----------
filename : DataInput
The file-like object to parse.
year : int
The published year of the data file.

Attributes
----------
filename : DataInput
The file-like object to parse.
year : int
The published year of the data file.
"""

def __init__(self, filename: DataInput, year: int):
"""Set the file to read and table year"""
super().__init__(year=year)
self.filename: DataInput = filename
self.year: int = year

def _column_names(self) -> list[str]:
"""Set the column name depending on the year"""
"""
Set the column name depending on the year.

Returns
-------
list[str]
An ordered list of the columns that exist in the file.
"""
return [
"Z",
"A",
Expand All @@ -35,7 +61,14 @@ def _column_names(self) -> list[str]:
]

def _data_types(self) -> dict:
"""Set the data type depending on the year"""
"""
Set the column data types depending on the year.

Returns
-------
dict[str, str]
A dictionary of the columns that exist and their data type
"""
return {
"TableYear": "Int64",
"Symbol": "string",
Expand All @@ -54,7 +87,14 @@ def _data_types(self) -> dict:
}

def _na_values(self) -> dict:
"""Set the columns that have placeholder values"""
"""
Set the columns that have empty fields that should be NaN'd depending on the year.

Returns
-------
dict[str, list[str]]
A dictionary of the columns that will have values that should be interpreted as NaN.
"""
na_vals = {
"A": [""],
"BetaDecayEnergy": ["", "*"],
Expand All @@ -67,9 +107,21 @@ def _na_values(self) -> dict:
return na_vals

def calculate_relative_error(self, raw_df) -> pd.DataFrame:
"""Calculate the relative error of the mass excess
"""
Calculate the relative error of the mass excess.

12C has a 0.0 +/- 0.0 mass excess definition by definition so ensure that is still true.
12C has a 0.0 +/- 0.0 mass excess by definition, so relative error is 0.0. The division by zero will put a NaN
value in the column for 12C so we will manually correct and set to 0.0.

Parameters
----------
raw_df : pandas.DataFrame
The raw dataframe upon which we will act.

Returns
-------
pandas.DataFrame
The updated dataframe with a new relative mass excess column.
"""
raw_df["AMERelativeError"] = abs(
raw_df["AMEMassExcessError"].astype(float) / raw_df["AMEMassExcess"].astype(float)
Expand All @@ -79,11 +131,16 @@ def calculate_relative_error(self, raw_df) -> pd.DataFrame:
return raw_df

def read_file(self) -> pd.DataFrame:
"""Read the file using it's known format
"""
Read the file-like object ``self.filename`` into a dataframe

The ``AMEMassFile`` and other functions in this class have hopefully sanitized the column names, data types and
locations of the date so we can now make the generic call to parse the file.

The AMEMassFile and other functions in this class have hopefully sanitized the
column names, data types and locations of the date so we can now make the generic
call to parse the file.
Returns
-------
pandas.DataFrame
A dataframe containing the parsed and organised contents of the AME mass data file
"""
df = Converter.read_fwf(
self.filename,
Expand Down
35 changes: 33 additions & 2 deletions src/nuclearmasses/io/ame_reaction_1_file.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,39 @@
"""
The ame_reaction_1_file module defines the ``AMEReactionFileOne`` class. This class stores the column positions of the
start and finish location of the different parameters recorded in the AME reaction 1 data file. The positions have
changed between years so the year of the table is given as a parameter at construction.
"""


class AMEReactionFileOne:
"""Easy access to the variables in the first AME reaction file."""
"""
Storage class for the data in the AME reaction 1 data file.

The AME reaction 1 data file is fixed-width file format so we will store the format details in this class.

Note we have not listed all parameters in the attributes section as there are so many. The naming convention is
however shown, along with a description.

Parameters
----------
year : int
The year the file being parsed was published

Attributes
----------
HEADER : int
The number of lines in the file to be interpreted as the header.
FOOTER : int
The number of lines in the file to be interpreted as the footer.
START_X : int
The first column of parameter X.
END_X : int or None
The last column of parameter X or None to represent the end of the line.
column_limits : list[tuple[int, int]]
The start and end positions of all parameters as a list of tuples that can be passed to :meth:`pandas.read_fwf`.
"""

def __init__(self, year: int, **kwargs):
"""Setup the values that locate the variable."""
super().__init__(**kwargs)
match year:
case 1983:
Expand Down
Loading
Loading