Skip to content
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## [0.4.0] - 2026-05-??
- [#27](https://github.com/php1ic/nuclearmasses/pull/27)
* Refactor Converter class in utils into separate modules
* Update how the column locations of the parameters are set and accessed

## [0.3.0] - 2026-04-28
- [#26](https://github.com/php1ic/nuclearmasses/pull/26)
* Optimise initial parsing of the files.
Expand Down
8 changes: 4 additions & 4 deletions src/nuclearmasses/io/ame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import pandas as pd

from nuclearmasses.io.ame_mass_parse import AMEMassParser
from nuclearmasses.io.ame_reaction_1_parse import AMEReactionParserOne
from nuclearmasses.io.ame_reaction_2_parse import AMEReactionParserTwo
from nuclearmasses.io.ame_reaction_1_parse import AMEReactionOneParser
from nuclearmasses.io.ame_reaction_2_parse import AMEReactionTwoParser


class AME:
Expand Down Expand Up @@ -89,8 +89,8 @@ def parse_year(self, year: int) -> pd.DataFrame:
ame_mass, ame_reaction_1, ame_reaction_2 = self.get_datafiles(year)

mass_df = AMEMassParser(filename=ame_mass, year=year).read_file()
rct1_df = AMEReactionParserOne(filename=ame_reaction_1, year=year).read_file()
rct2_df = AMEReactionParserTwo(filename=ame_reaction_2, year=year).read_file()
rct1_df = AMEReactionOneParser(filename=ame_reaction_1, year=year).read_file()
rct2_df = AMEReactionTwoParser(filename=ame_reaction_2, year=year).read_file()

# Merge all 3 of the AME dataframes into one
common_columns = ["A", "Z", "N", "TableYear", "Symbol", "DataSource"]
Expand Down
317 changes: 165 additions & 152 deletions src/nuclearmasses/io/ame_mass_file.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
"""
The ame_mass_file module defines the ``AMEMassFile`` class. This class stores the column positions of the start and
finish location of the different parameters recorded in the AME mass data file. The positions have changed between
years so the year of the table is given as a parameter at construction.
The ame_mass_file module defines the ``AMEMassLayout`` and ``AMEMassFile`` classes. The ``AMEMassLayout`` class acts
like a base class, storing the common column names and the start and end positions of the values within the AME data
file. The positions change as time progress so the ``AMEMassFile`` class uses the year, passed as a parameter, to
update the values as required.

The years 2003, 2012 and 2016 have identical formatting so are used as the base, not the 1983 format.
"""

import dataclasses

class AMEMassFile:

@dataclasses.dataclass(kw_only=True)
class AMEMassLayout:
"""
Storage class for the data in the AME mass data file.
Storage class for the most common data in the AME mass data file.

The AME mass data file is fixed-width file format so we will store the format details in this class.

Note we have not listed all parameters in the attributes section as there are so many. The naming convention is
however shown, along with a description.

Parameters
----------
year : int
The year the file being parsed was published
The attribute names align with column names as a string to allow dynamic creation of other variables and attributes
in other parts of the code.

Attributes
----------
Expand All @@ -29,149 +33,158 @@ class AMEMassFile:
The first column of parameter X.
END_X : int or None
The last column of parameter X or None to represent the end of the line.
column_limits : list[tuple[int, int]]
The start and end positions of all parameters as a list of tuples that can be passed to :meth:`pandas.read_fwf`.
column : list[str]
The list of columns that appear in the file
positions : list[tuple[str, str, str]]
A list of tuples detailing column name alongside start and end position in the line.
"""

def __init__(self, year: int, **kwargs):
super().__init__(**kwargs)
match year:
case 1983:
self.HEADER = 35
self.FOOTER = 0
self.START_Z = 11
self.END_Z = 14
self.START_A = 16
self.END_A = 19
self.START_ME = 29
self.END_ME = 39
self.START_DME = 41
self.END_DME = 48
self.START_BE_PER_A = 49
self.END_BE_PER_A = 59
self.START_DBE_PER_A = 61
self.END_DBE_PER_A = 68
self.START_BETA_DECAY_ENERGY = 76
self.END_BETA_DECAY_ENERGY = 85
self.START_DBETA_DECAY_ENERGY = 87
self.END_DBETA_DECAY_ENERGY = 94
self.START_AM = 97
self.END_AM = 99
self.START_MICRO_U = 100
self.END_MICRO_U = 110
self.START_MICRO_DU = 113
self.END_MICRO_DU = 120
case 1993:
self.HEADER = 40
self.FOOTER = 0
self.START_Z = 11
self.END_Z = 14
self.START_A = 16
self.END_A = 19
self.START_ME = 29
self.END_ME = 39
self.START_DME = 41
self.END_DME = 48
self.START_BE_PER_A = 49
self.END_BE_PER_A = 59
self.START_DBE_PER_A = 61
self.END_DBE_PER_A = 68
self.START_BETA_DECAY_ENERGY = 76
self.END_BETA_DECAY_ENERGY = 85
self.START_DBETA_DECAY_ENERGY = 87
self.END_DBETA_DECAY_ENERGY = 94
self.START_AM = 97
self.END_AM = 99
self.START_MICRO_U = 100
self.END_MICRO_U = 110
self.START_MICRO_DU = 112
self.END_MICRO_DU = 120
case 1995:
self.HEADER = 39
self.FOOTER = 0
self.START_Z = 11
self.END_Z = 14
self.START_A = 16
self.END_A = 19
self.START_ME = 29
self.END_ME = 39
self.START_DME = 41
self.END_DME = 48
self.START_BE_PER_A = 49
self.END_BE_PER_A = 59
self.START_DBE_PER_A = 61
self.END_DBE_PER_A = 68
self.START_BETA_DECAY_ENERGY = 76
self.END_BETA_DECAY_ENERGY = 85
self.START_DBETA_DECAY_ENERGY = 87
self.END_DBETA_DECAY_ENERGY = 94
self.START_AM = 97
self.END_AM = 99
self.START_MICRO_U = 100
self.END_MICRO_U = 110
self.START_MICRO_DU = 112
self.END_MICRO_DU = 120
case 2020:
self.HEADER = 36
self.FOOTER = 0
self.START_Z = 11
self.END_Z = 14
self.START_A = 16
self.END_A = 19
self.START_ME = 29
self.END_ME = 42
self.START_DME = 43
self.END_DME = 53
self.START_BE_PER_A = 56
self.END_BE_PER_A = 66
self.START_DBE_PER_A = 69
self.END_DBE_PER_A = 77
self.START_BETA_DECAY_ENERGY = 82
self.END_BETA_DECAY_ENERGY = 93
self.START_DBETA_DECAY_ENERGY = 95
self.END_DBETA_DECAY_ENERGY = 104
self.START_AM = 106
self.END_AM = 109
self.START_MICRO_U = 110
self.END_MICRO_U = 120
self.START_MICRO_DU = 124
self.END_MICRO_DU = 135
case _:
self.HEADER = 39
self.FOOTER = 0
self.START_Z = 11
self.END_Z = 14
self.START_A = 16
self.END_A = 19
self.START_ME = 29
self.END_ME = 41
self.START_DME = 42
self.END_DME = 53
self.START_BE_PER_A = 54
self.END_BE_PER_A = 64
self.START_DBE_PER_A = 65
self.END_DBE_PER_A = 72
self.START_BETA_DECAY_ENERGY = 76
self.END_BETA_DECAY_ENERGY = 86
self.START_DBETA_DECAY_ENERGY = 87
self.END_DBETA_DECAY_ENERGY = 95
self.START_AM = 96
self.END_AM = 99
self.START_MICRO_U = 100
self.END_MICRO_U = 112
self.START_MICRO_DU = 113
self.END_MICRO_DU = 120

self.column_limits = [
(self.START_Z, self.END_Z),
(self.START_A, self.END_A),
(self.START_ME, self.END_ME),
(self.START_DME, self.END_DME),
(self.START_BE_PER_A, self.END_BE_PER_A),
(self.START_DBE_PER_A, self.END_DBE_PER_A),
(self.START_BETA_DECAY_ENERGY, self.END_BETA_DECAY_ENERGY),
(self.START_DBETA_DECAY_ENERGY, self.END_DBETA_DECAY_ENERGY),
(self.START_AM, self.END_AM),
(self.START_MICRO_U, self.END_MICRO_U),
(self.START_MICRO_DU, self.END_MICRO_DU),
HEADER: int = 39
FOOTER: int = 0
START_Z: int = 11
END_Z: int = 14
START_A: int = 16
END_A: int = 19
START_AMEMassExcess: int = 29
END_AMEMassExcess: int = 41
START_AMEMassExcessError: int = 42
END_AMEMassExcessError: int = 53
START_BindingEnergyPerA: int = 54
END_BindingEnergyPerA: int = 64
START_BindingEnergyPerAError: int = 65
END_BindingEnergyPerAError: int = 72
START_BetaDecayEnergy: int = 76
END_BetaDecayEnergy: int = 86
START_BetaDecayEnergyError: int = 87
END_BetaDecayEnergyError: int = 95
START_AtomicNumber: int = 96
END_AtomicNumber: int = 99
START_AtomicMass: int = 100
END_AtomicMass: int = 112
START_AtomicMassError: int = 113
END_AtomicMassError: int = 120

def __post_init__(self) -> None:
self.columns: list[str] = [
"Z",
"A",
"AMEMassExcess",
"AMEMassExcessError",
"BindingEnergyPerA",
"BindingEnergyPerAError",
"BetaDecayEnergy",
"BetaDecayEnergyError",
"AtomicNumber",
"AtomicMass",
"AtomicMassError",
]

self.positions: list[tuple[str, str, str]] = [(f"{c}", f"START_{c}", f"END_{c}") for c in self.columns]


class AMEMassFile:
"""
Storage class for the year specific data in the AME mass data file.

The base ``AMEMassLayout`` class is constructed and updated as required for the given ``year``.

Parameters
----------
year : int
The year the file being parsed was published

Attributes
----------
AME_MASS_YEAR_OVERRIDES : dict[int | str, dict[str, int]]
Year specific updates and changes required to ``AMEMassLayout``.
layout : AMEMassLayout
A storage class containing details of parameters and their locations in the line.
"""

AME_MASS_YEAR_OVERRIDES: dict[int | str, dict[str, int]] = {
"default": {},
1983: {
"HEADER": 35,
"END_AMEMassExcess": 39,
"START_AMEMassExcessError": 41,
"END_AMEMassExcessError": 48,
"START_BindingEnergyPerA": 49,
"END_BindingEnergyPerA": 59,
"START_BindingEnergyPerAError": 61,
"END_BindingEnergyPerAError": 68,
"START_BetaDecayEnergy": 76,
"END_BetaDecayEnergy": 85,
"START_BetaDecayEnergyError": 87,
"END_BetaDecayEnergyError": 94,
"START_AtomicNumber": 97,
"END_AtomicNumber": 99,
"START_AtomicMass": 100,
"END_AtomicMass": 110,
},
1993: {
"HEADER": 40,
"END_AMEMassExcess": 39,
"START_AMEMassExcessError": 41,
"END_AMEMassExcessError": 48,
"START_BindingEnergyPerA": 49,
"END_BindingEnergyPerA": 59,
"START_BindingEnergyPerAError": 61,
"END_BindingEnergyPerAError": 68,
"START_BetaDecayEnergy": 76,
"END_BetaDecayEnergy": 85,
"START_BetaDecayEnergyError": 87,
"END_BetaDecayEnergyError": 94,
"START_AtomicNumber": 97,
"END_AtomicNumber": 99,
"START_AtomicMass": 100,
"END_AtomicMass": 110,
"START_AtomicMassError": 112,
},
1995: {
"END_AMEMassExcess": 39,
"START_AMEMassExcessError": 41,
"END_AMEMassExcessError": 48,
"START_BindingEnergyPerA": 49,
"END_BindingEnergyPerA": 59,
"START_BindingEnergyPerAError": 61,
"END_BindingEnergyPerAError": 68,
"START_BetaDecayEnergy": 76,
"END_BetaDecayEnergy": 85,
"START_BetaDecayEnergyError": 87,
"END_BetaDecayEnergyError": 94,
"START_AtomicNumber": 97,
"END_AtomicNumber": 99,
"START_AtomicMass": 100,
"END_AtomicMass": 110,
"START_AtomicMassError": 112,
},
# The years 2003, 2012 and 2016 have identical formatting so are used as the base
2003: {},
2012: {},
2016: {},
2020: {
"HEADER": 36,
"END_AMEMassExcess": 42,
"START_AMEMassExcessError": 43,
"END_AMEMassExcessError": 53,
"START_BindingEnergyPerA": 56,
"END_BindingEnergyPerA": 66,
"START_BindingEnergyPerAError": 69,
"END_BindingEnergyPerAError": 77,
"START_BetaDecayEnergy": 82,
"END_BetaDecayEnergy": 93,
"START_BetaDecayEnergyError": 95,
"END_BetaDecayEnergyError": 104,
"START_AtomicNumber": 106,
"END_AtomicNumber": 109,
"START_AtomicMass": 110,
"END_AtomicMass": 120,
"START_AtomicMassError": 124,
"END_AtomicMassError": 135,
},
}

def __init__(self, year: int) -> None:
self.layout = AMEMassLayout(
**AMEMassFile.AME_MASS_YEAR_OVERRIDES.get(year, AMEMassFile.AME_MASS_YEAR_OVERRIDES["default"])
)
Loading
Loading