diff --git a/.changes/unreleased/Features-20260104-231744.yaml b/.changes/unreleased/Features-20260104-231744.yaml new file mode 100644 index 00000000..80c29e5e --- /dev/null +++ b/.changes/unreleased/Features-20260104-231744.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add seed header option support +time: 2026-01-04T23:17:44.313671+01:00 +custom: + Author: aammett + Issue: "341" diff --git a/dbt_common/clients/agate_helper.py b/dbt_common/clients/agate_helper.py index 45f525a0..1c9c8d52 100644 --- a/dbt_common/clients/agate_helper.py +++ b/dbt_common/clients/agate_helper.py @@ -149,12 +149,24 @@ def as_matrix(table): return [r.values() for r in table.rows.values()] -def from_csv(abspath, text_columns, delimiter=",") -> agate.Table: +def from_csv( + abspath: str, + text_columns: dict, + delimiter: str = ",", + column_names: Optional[List[str]] = None, + header: bool = True, +) -> agate.Table: type_tester = build_type_tester(text_columns=text_columns) with open(abspath, encoding="utf-8") as fp: if fp.read(1) != BOM: fp.seek(0) - return agate.Table.from_csv(fp, column_types=type_tester, delimiter=delimiter) + return agate.Table.from_csv( + fp, + column_types=type_tester, + delimiter=delimiter, + column_names=column_names, + header=header, + ) class _NullMarker: diff --git a/tests/unit/test_agate_helper.py b/tests/unit/test_agate_helper.py index fff0d4c6..a27ed365 100644 --- a/tests/unit/test_agate_helper.py +++ b/tests/unit/test_agate_helper.py @@ -10,12 +10,16 @@ from tempfile import mkdtemp from dbt_common.clients import agate_helper -SAMPLE_CSV_DATA = """a,b,c,d,e,f,g +SAMPLE_CSV_DATA = """col1,col2,col3,col4,col5,col6,col7 1,n,test,3.2,20180806T11:33:29.320Z,True,NULL 2,y,asdf,900,20180806T11:35:29.320Z,False,a string""" SAMPLE_CSV_BOM_DATA = "\ufeff" + SAMPLE_CSV_DATA +SAMPLE_CSV_NO_HEADER_DATA = """1,n,test,3.2,20180806T11:33:29.320Z,True,NULL +2,y,asdf,900,20180806T11:35:29.320Z,False,a string""" + +EXPECTED_COLUMN_NAMES = ("col1", "col2", "col3", "col4", "col5", "col6", "col7") EXPECTED = [ [ @@ -74,8 +78,33 @@ def test_from_csv_all_reserved(self) -> None: path = os.path.join(self.tempdir, "input.csv") with open(path, "wb") as fp: fp.write(SAMPLE_CSV_DATA.encode("utf-8")) - tbl = agate_helper.from_csv(path, tuple("abcdefg")) + tbl = agate_helper.from_csv(path, EXPECTED_COLUMN_NAMES) + self.assertEqual(tbl.column_names, EXPECTED_COLUMN_NAMES) + self.assertEqual(len(tbl), len(EXPECTED_STRINGS)) + for expected, row in zip(EXPECTED_STRINGS, tbl): + self.assertEqual(list(row), expected) + + def test_from_csv_no_header(self) -> None: + path = os.path.join(self.tempdir, "input.csv") + with open(path, "wb") as fp: + fp.write(SAMPLE_CSV_NO_HEADER_DATA.encode("utf-8")) + tbl = agate_helper.from_csv(path, (), header=False) + self.assertEqual(len(tbl), len(EXPECTED)) + # Check column names are auto-generated (a, b, c, d, e, f, g) + expected_column_names = ("a", "b", "c", "d", "e", "f", "g") + self.assertEqual(tbl.column_names, expected_column_names) + for idx, row in enumerate(tbl): + self.assertEqual(list(row), EXPECTED[idx]) + + def test_from_csv_no_header_all_reserved(self) -> None: + path = os.path.join(self.tempdir, "input.csv") + with open(path, "wb") as fp: + fp.write(SAMPLE_CSV_NO_HEADER_DATA.encode("utf-8")) + tbl = agate_helper.from_csv(path, tuple("abcdefg"), header=False) self.assertEqual(len(tbl), len(EXPECTED_STRINGS)) + # Check column names are auto-generated (a, b, c, d, e, f, g) + expected_column_names = ("a", "b", "c", "d", "e", "f", "g") + self.assertEqual(tbl.column_names, expected_column_names) for expected, row in zip(EXPECTED_STRINGS, tbl): self.assertEqual(list(row), expected)