Refactor replace into its own function

php1ic · php1ic · commit 6524f56a638f · 2026-04-24T19:37:21.000+01:00
diff --git a/src/nuclearmasses/io/ame_mass_parse.py b/src/nuclearmasses/io/ame_mass_parse.py
@@ -97,8 +97,7 @@ def read_file(self) -> pd.DataFrame:
         )
         # We use the NUBASE data to define whether or not an isotope is experimentally measured,
         # so for this data we'll just drop any and all '#' characters
-        str_cols = df.select_dtypes(include=["object", "string"]).columns
-        df[str_cols] = df[str_cols].astype(str).apply(lambda s: s.str.replace("#", "", regex=False))
+        df = self.strip_char_from_string_columns(df, "#")
 
         if self.year == 1983:
             # The column headers and units are repeated in the 1983 table
diff --git a/src/nuclearmasses/io/ame_reaction_1_parse.py b/src/nuclearmasses/io/ame_reaction_1_parse.py
@@ -95,8 +95,7 @@ def read_file(self) -> pd.DataFrame:
         )
         # We use the NUBASE data to define whether or not an isotope is experimentally measured,
         # so for this data we'll just drop any and all '#' characters
-        str_cols = df.select_dtypes(include=["object", "string"]).columns
-        df[str_cols] = df[str_cols].astype(str).apply(lambda s: s.str.replace("#", "", regex=False))
+        df = self.strip_char_from_string_columns(df, "#")
 
         if self.year == 1983:
             # The column headers and units are repeated in the 1983 table
diff --git a/src/nuclearmasses/io/ame_reaction_2_parse.py b/src/nuclearmasses/io/ame_reaction_2_parse.py
@@ -95,8 +95,7 @@ def read_file(self) -> pd.DataFrame:
         )
         # We use the NUBASE data to define whether or not an isotope is experimentally measured,
         # so for this data we'll just drop any and all '#' characters
-        str_cols = df.select_dtypes(include=["object", "string"]).columns
-        df[str_cols] = df[str_cols].astype(str).apply(lambda s: s.str.replace("#", "", regex=False))
+        df = self.strip_char_from_string_columns(df, "#")
 
         if self.year == 1983:
             # The column headers and units are repeated in the 1983 table
diff --git a/src/nuclearmasses/io/nubase_parse.py b/src/nuclearmasses/io/nubase_parse.py
@@ -185,8 +185,7 @@ def read_file(self) -> pd.DataFrame:
         # We use the NUBASE data to define whether or not an isotope is experimentally measured,
         df["Experimental"] = ~df["NUBASEMassExcess"].astype("string").str.contains("#", na=False)
         # Once we have used the '#' to determine if it's experimental or not, we can remove all instances of it
-        str_cols = df.select_dtypes(include=["object", "string"]).columns
-        df[str_cols] = df[str_cols].astype(str).apply(lambda s: s.str.replace("#", "", regex=False))
+        df = self.strip_char_from_string_columns(df, "#")
 
         df = self.parse_half_life(df)
         df = self.calculate_relative_error(df)
diff --git a/src/nuclearmasses/utils/converter.py b/src/nuclearmasses/utils/converter.py
@@ -134,3 +134,9 @@ def read_fwf(base: DataInput, **kwargs):
 
         # Filesystem path
         return pd.read_fwf(base, **kwargs)
+
+    @staticmethod
+    def strip_char_from_string_columns(df: pd.DataFrame, char: str) -> pd.DataFrame:
+        cols = df.select_dtypes(include=["object", "string"]).columns
+        df[cols] = df[cols].apply(lambda s: s.str.replace(char, "", regex=False))
+        return df

Original file line number	Diff line number	Diff line change
`@@ -97,8 +97,7 @@ def read_file(self) -> pd.DataFrame:`
`97`	`97`	`)`
`98`	`98`	`# We use the NUBASE data to define whether or not an isotope is experimentally measured,`
`99`	`99`	`# so for this data we'll just drop any and all '#' characters`
`100`		`- str_cols = df.select_dtypes(include=["object", "string"]).columns`
`101`		`- df[str_cols] = df[str_cols].astype(str).apply(lambda s: s.str.replace("#", "", regex=False))`
	`100`	`+ df = self.strip_char_from_string_columns(df, "#")`
`102`	`101`
`103`	`102`	`if self.year == 1983:`
`104`	`103`	`# The column headers and units are repeated in the 1983 table`
Original file line number	Diff line number	Diff line change
`@@ -95,8 +95,7 @@ def read_file(self) -> pd.DataFrame:`
`95`	`95`	`)`
`96`	`96`	`# We use the NUBASE data to define whether or not an isotope is experimentally measured,`
`97`	`97`	`# so for this data we'll just drop any and all '#' characters`
`98`		`- str_cols = df.select_dtypes(include=["object", "string"]).columns`
`99`		`- df[str_cols] = df[str_cols].astype(str).apply(lambda s: s.str.replace("#", "", regex=False))`
	`98`	`+ df = self.strip_char_from_string_columns(df, "#")`
`100`	`99`
`101`	`100`	`if self.year == 1983:`
`102`	`101`	`# The column headers and units are repeated in the 1983 table`