Skip to content

Commit 740ae98

Browse files
committed
feat: ensure xlsx_errors column is added after data fields in add_xlsx_worksheet function
Signed-off-by: Gaurav Sethi <gauravsethi4627@gmail.com>
1 parent 771981e commit 740ae98

File tree

2 files changed

+46
-1
lines changed

2 files changed

+46
-1
lines changed

scanpipe/pipes/output.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ def add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
382382
worksheet.write_row(row=0, col=0, data=header, cell_format=cell_format)
383383

384384
errors_count = 0
385-
errors_col_index = len(fields) - 1 # rows and cols are zero-indexed
385+
errors_col_index = len(fields) # xlsx_errors column comes after all data fields
386386

387387
for row_index, record in enumerate(rows, start=1):
388388
row_errors = []

scanpipe/tests/pipes/test_output.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,51 @@ def test_add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
810810
if r != x:
811811
self.assertEqual(r[-50:], x)
812812

813+
def test_add_xlsx_worksheet_writes_xlsx_errors_after_all_data_columns(self):
814+
"""
815+
With multiple data columns, truncation errors must go to the trailing
816+
``xlsx_errors`` column, not overwrite the last data field.
817+
"""
818+
test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
819+
output_file = test_dir / "multi-field-long-cell.xlsx"
820+
len_32760_string = "f" * 32760
821+
len_10_string = "0123456789"
822+
long_name = len_32760_string + len_10_string
823+
path_value = "/expected/path/value"
824+
825+
rows = [{"name": long_name, "path": path_value}]
826+
fields = ["name", "path"]
827+
828+
with xlsxwriter.Workbook(str(output_file)) as workbook:
829+
output.add_xlsx_worksheet(
830+
workbook=workbook,
831+
worksheet_name="packages",
832+
rows=rows,
833+
fields=fields,
834+
)
835+
836+
wb = openpyxl.load_workbook(output_file, read_only=True, data_only=True)
837+
try:
838+
ws = wb["packages"]
839+
840+
self.assertEqual(ws.cell(1, 1).value, "name")
841+
self.assertEqual(ws.cell(1, 2).value, "path")
842+
self.assertEqual(ws.cell(1, 3).value, "xlsx_errors")
843+
844+
name_cell = ws.cell(2, 1).value
845+
path_cell = ws.cell(2, 2).value
846+
errors_cell = ws.cell(2, 3).value
847+
finally:
848+
wb.close()
849+
850+
self.assertEqual(len(name_cell), 32767)
851+
self.assertTrue(name_cell.startswith("f"))
852+
self.assertEqual(path_value, path_cell)
853+
self.assertIsNotNone(errors_cell)
854+
self.assertIn("name", errors_cell)
855+
self.assertIn("truncated", errors_cell.lower())
856+
self.assertIn("32767", errors_cell)
857+
813858
def test_add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
814859
# This test verifies that we do not truncate long text silently
815860

0 commit comments

Comments
 (0)