Skip to content

Commit 58cb52a

Browse files
committed
Allow Excel file overwrites on S3 - see HEA-807
1 parent e32f170 commit 58cb52a

1 file changed

Lines changed: 23 additions & 11 deletions

File tree

pipelines/resources.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -127,24 +127,36 @@ def dump_to_path(
127127
obj = {f"Sheet{i+1}": df for i, df in enumerate(obj)}
128128

129129
# Set the ExcelWriter parameters
130-
mode = "w"
130+
mode = "wb"
131131
if self.path_exists(path):
132-
mode = "a"
132+
mode = "ab"
133133
if self.if_sheet_exists is None:
134134
self.if_sheet_exists = "replace"
135135
if "keep_vba" not in self.engine_kwargs:
136136
self.engine_kwargs["keep_vba"] = True
137137

138138
# Write the DataFrames to the Excel file
139-
with pd.ExcelWriter(
140-
path,
141-
engine="openpyxl",
142-
mode=mode,
143-
if_sheet_exists=self.if_sheet_exists,
144-
engine_kwargs=self.engine_kwargs,
145-
) as writer:
146-
for sheet_name, df in obj.items():
147-
df.to_excel(writer, sheet_name=sheet_name, **self.to_excel_kwargs)
139+
try:
140+
with pd.ExcelWriter(
141+
path,
142+
engine="openpyxl",
143+
mode=mode,
144+
if_sheet_exists=self.if_sheet_exists,
145+
engine_kwargs=self.engine_kwargs,
146+
) as writer:
147+
for sheet_name, df in obj.items():
148+
df.to_excel(writer, sheet_name=sheet_name, **self.to_excel_kwargs)
149+
except NotImplementedError:
150+
context.log.warning(
151+
f"Storage layer does not support appending to existing files. Overwriting file: {path}"
152+
)
153+
with pd.ExcelWriter(
154+
path,
155+
engine="openpyxl",
156+
mode="w",
157+
) as writer:
158+
for sheet_name, df in obj.items():
159+
df.to_excel(writer, sheet_name=sheet_name, **self.to_excel_kwargs)
148160

149161
def load_from_path(self, context: InputContext, path: UPath) -> dict[str, pd.DataFrame]:
150162
with pd.ExcelFile(path, engine="openpyxl") as xls:

0 commit comments

Comments
 (0)