From bebdfe9fe340b44e24c8bc62b930c9f2ed63024d Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 17 Oct 2025 16:06:14 -0400
Subject: [PATCH 1/4] iterable_columns in spec

---
 CHANGELOG.md                              |  6 ++++++
 docs/source/reference/esm-catalog-spec.md |  1 +
 intake_esm/cat.py                         | 11 +++++++++++
 intake_esm/core.py                        |  7 +++++++
 4 files changed, 25 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a887dd72..48857b15 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,12 @@
 
 [Full Changelog](https://github.com/intake/intake-esm/compare/v2025.2.3...v2025.7.9)
 
+## vUNRELEASED
+
+### New features added
+
+- New ``iterable_columns`` field in the ESM catalog spec to specify which columns should be read as iterables (tuples). Deprecates argument ``columns_with_iterable`` of the ``esm_datastore`` by @aulemahal in https://github.com/intake/intake-esm/pull/752
+
 ## v2025.7.9
 
 ### New features added
diff --git a/docs/source/reference/esm-catalog-spec.md b/docs/source/reference/esm-catalog-spec.md
index 44c44d3b..7ba69665 100644
--- a/docs/source/reference/esm-catalog-spec.md
+++ b/docs/source/reference/esm-catalog-spec.md
@@ -67,6 +67,7 @@ They should be either [URIs](https://en.wikipedia.org/wiki/Uniform_Resource_Iden
 | description         | string                                                    | **REQUIRED.** Detailed multi-line description to fully explain the catalog. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. |
 | catalog_file        | string                                                    | **REQUIRED.** Path to a the CSV file with the catalog contents.                                                                                                        |
 | catalog_dict        | array                                                     | If specified, it is mutually exclusive with `catalog_file`. An array of dictionaries that represents the data that would otherwise be in the csv.                      |
+| iterable_columns    | array                                                     | A list of columns names to that contain iterable values instead of scalar ones.                                                                                        |
 | attributes          | [[Attribute Object](#attribute-object)]                   | **REQUIRED.** A list of attribute columns in the data set.                                                                                                             |
 | assets              | [Assets Object](#assets-object)                           | **REQUIRED.** Description of how the assets (data files) are referenced in the CSV catalog file.                                                                       |
 | aggregation_control | [Aggregation Control Object](#aggregation-control-object) | **OPTIONAL.** Description of how to support aggregation of multiple assets into a single xarray data set.                                                              |
diff --git a/intake_esm/cat.py b/intake_esm/cat.py
index 662fd43e..f461423e 100644
--- a/intake_esm/cat.py
+++ b/intake_esm/cat.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import ast
 import builtins
 import datetime
 import enum
@@ -115,6 +116,7 @@ class ESMCatalogModel(pydantic.BaseModel):
     id: str = ''
     catalog_dict: list[dict] | None = None
     catalog_file: pydantic.StrictStr | None = None
+    iterable_columns: set[pydantic.StrictStr] | None = None
     description: pydantic.StrictStr | None = None
     title: pydantic.StrictStr | None = None
     last_updated: datetime.datetime | datetime.date | None = None
@@ -320,6 +322,15 @@ def _df_from_file(
             csv_path = f'{os.path.dirname(_mapper.root)}/{cat.catalog_file}'
         cat.catalog_file = csv_path
 
+        if self.iterable_columns:
+            converter = ast.literal_eval
+            read_kwargs.setdefault('converters', {})
+            for col in self.iterable_columns:
+                if read_kwargs['converters'].setdefault(col, converter) != converter:
+                    raise ValueError(
+                        f"Cannot provide converter for '{col}' via `read_kwargs` when '{col}' is also specified in `iterable_columns`"
+                    )
+
         reader = CatalogFileDataReader(cat.catalog_file, storage_options, **read_kwargs)
         self._iterable_dtype_map = reader.dtype_map
         return reader.frames
diff --git a/intake_esm/core.py b/intake_esm/core.py
index 26b72d5b..31818822 100644
--- a/intake_esm/core.py
+++ b/intake_esm/core.py
@@ -120,6 +120,13 @@ def __init__(
 
         read_kwargs = read_kwargs or {}
         if columns_with_iterables:
+            warnings.warn(
+                "columns_with_iterables is deprecated as an argument to esm_datastore "
+                "and will be removed in a future version.  Please set 'iterable_columns' "
+                r"in the catalog's json definition or pass read_kwargs={'converters': {COL: ast.literal_eval}}.",
+                DeprecationWarning,
+                stacklevel=2
+            )
             converter = ast.literal_eval
             read_kwargs.setdefault('converters', {})
             for col in columns_with_iterables:

From e083d380645efeaf65141186eee474ab1779a28d Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 17 Oct 2025 16:22:01 -0400
Subject: [PATCH 2/4] Add a test

---
 CHANGELOG.md       | 2 +-
 intake_esm/core.py | 4 ++--
 tests/test_core.py | 2 ++
 tests/utils.py     | 3 +++
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 48857b15..ccbb577d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,7 @@
 
 ### New features added
 
-- New ``iterable_columns`` field in the ESM catalog spec to specify which columns should be read as iterables (tuples). Deprecates argument ``columns_with_iterable`` of the ``esm_datastore`` by @aulemahal in https://github.com/intake/intake-esm/pull/752
+- New `iterable_columns` field in the ESM catalog spec to specify which columns should be read as iterables (tuples). Deprecates argument `columns_with_iterable` of the `esm_datastore` by @aulemahal in https://github.com/intake/intake-esm/pull/752
 
 ## v2025.7.9
 
diff --git a/intake_esm/core.py b/intake_esm/core.py
index 31818822..79e7ea18 100644
--- a/intake_esm/core.py
+++ b/intake_esm/core.py
@@ -121,11 +121,11 @@ def __init__(
         read_kwargs = read_kwargs or {}
         if columns_with_iterables:
             warnings.warn(
-                "columns_with_iterables is deprecated as an argument to esm_datastore "
+                'columns_with_iterables is deprecated as an argument to esm_datastore '
                 "and will be removed in a future version.  Please set 'iterable_columns' "
                 r"in the catalog's json definition or pass read_kwargs={'converters': {COL: ast.literal_eval}}.",
                 DeprecationWarning,
-                stacklevel=2
+                stacklevel=2,
             )
             converter = ast.literal_eval
             read_kwargs.setdefault('converters', {})
diff --git a/tests/test_core.py b/tests/test_core.py
index 6d4f5bc5..cb24442f 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -28,6 +28,7 @@
     cdf_cat_sample_cmip6_noagg,
     mixed_cat_sample_cmip6,
     multi_variable_cat,
+    multi_variable_hard_cat,
     opendap_cat_sample_noaa,
     sample_df,
     sample_esmcat_data,
@@ -158,6 +159,7 @@ def test_catalog_init_back_compat(capsys, obj, sep, read_kwargs, read_csv_kwargs
     [
         (multi_variable_cat, {'converters': {'variable': ast.literal_eval}}, None),
         (multi_variable_cat, None, ['variable']),
+        (multi_variable_hard_cat, None, None),
     ],
 )
 def test_columns_with_iterables(capsys, obj, read_kwargs, columns_with_iterables):
diff --git a/tests/utils.py b/tests/utils.py
index 7096f1b9..e8e78ffd 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -7,6 +7,9 @@
 zarr_cat_pangeo_cmip6 = 'https://storage.googleapis.com/cmip6/pangeo-cmip6.json'
 cdf_cat_sample_cmip6 = os.path.join(here, 'sample-catalogs/cmip6-netcdf.json')
 multi_variable_cat = os.path.join(here, 'sample-catalogs/multi-variable-catalog.json')
+multi_variable_hard_cat = os.path.join(
+    here, 'sample-catalogs/multi-variable-hardcoded-catalog.json'
+)
 cdf_cat_sample_cmip5 = os.path.join(here, 'sample-catalogs/cmip5-netcdf.json')
 cdf_cat_sample_cmip5_pq = os.path.join(here, 'sample-catalogs/cmip5-netcdf-parquet.json')
 cdf_cat_sample_cesmle = os.path.join(here, 'sample-catalogs/cesm1-lens-netcdf.json')

From c83923d839cd79aaf0e54e467c79c2320810ef12 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 17 Oct 2025 16:29:58 -0400
Subject: [PATCH 3/4] Remove deprecation add note to docstring

---
 intake_esm/core.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/intake_esm/core.py b/intake_esm/core.py
index 79e7ea18..167b90e7 100644
--- a/intake_esm/core.py
+++ b/intake_esm/core.py
@@ -59,6 +59,7 @@ class esm_datastore(Catalog):
         A list of columns in the csv file containing iterables. Values in columns specified here will be
         converted with `ast.literal_eval` when :py:func:`~pandas.read_csv` is called (i.e., this is a
         shortcut to passing converters to `read_kwargs`).
+        Catalogs might also have such columns configured in their ``iterable_columns`` field.
     storage_options : dict, optional
         Parameters passed to the backend file-system such as Google Cloud Storage,
         Amazon Web Service S3.
@@ -120,13 +121,6 @@ def __init__(
 
         read_kwargs = read_kwargs or {}
         if columns_with_iterables:
-            warnings.warn(
-                'columns_with_iterables is deprecated as an argument to esm_datastore '
-                "and will be removed in a future version.  Please set 'iterable_columns' "
-                r"in the catalog's json definition or pass read_kwargs={'converters': {COL: ast.literal_eval}}.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
             converter = ast.literal_eval
             read_kwargs.setdefault('converters', {})
             for col in columns_with_iterables:

From 06997681108f867fa1188cc194470e6330e8bd36 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 17 Oct 2025 17:09:09 -0400
Subject: [PATCH 4/4] add new test json

---
 .../multi-variable-hardcoded-catalog.json     | 55 +++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 tests/sample-catalogs/multi-variable-hardcoded-catalog.json

diff --git a/tests/sample-catalogs/multi-variable-hardcoded-catalog.json b/tests/sample-catalogs/multi-variable-hardcoded-catalog.json
new file mode 100644
index 00000000..1b9fa393
--- /dev/null
+++ b/tests/sample-catalogs/multi-variable-hardcoded-catalog.json
@@ -0,0 +1,55 @@
+{
+  "esmcat_version": "0.1.0",
+  "id": "sample-multi-variable-cesm1-lens",
+  "description": "This is a sample ESM catalog emulating multi variable/history files for CESM1-LENS",
+  "catalog_file": "multi-variable-catalog.csv",
+  "iterable_columns": ["variable"],
+  "attributes": [
+    {
+      "column_name": "experiment",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "case",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "component",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "stream",
+      "vocabulary": ""
+    },
+    { "column_name": "variable", "vocabulary": "" },
+    {
+      "column_name": "member_id",
+      "vocabulary": ""
+    }
+  ],
+  "assets": {
+    "column_name": "path",
+    "format": "netcdf"
+  },
+
+  "aggregation_control": {
+    "variable_column_name": "variable",
+    "groupby_attrs": ["component", "experiment", "stream"],
+    "aggregations": [
+      {
+        "type": "join_new",
+        "attribute_name": "member_id",
+        "options": { "coords": "minimal", "compat": "override" }
+      },
+      {
+        "type": "join_existing",
+        "attribute_name": "time_range",
+        "options": { "dim": "time" }
+      },
+      {
+        "type": "union",
+        "attribute_name": "variable"
+      }
+    ]
+  }
+}