Add option to include timestamps without values when fetching data via get_samples_aggregate() (#147)

wojciech-bochniarz-4ss · web-flow · commit 97ff9ee8ccfb · 2026-03-02T13:38:27.000+01:00
diff --git a/datareservoirio/client.py b/datareservoirio/client.py
@@ -431,6 +431,7 @@ def get(
             df = pd.DataFrame(columns=("index", "values")).astype({"index": "int64"})
 
         try:
+            # When we move to pandas 3, the .loc here breaks with None start and end, haven't dug into why yet
             series = (
                 df.set_index("index").squeeze("columns").loc[start:end].copy(deep=True)
             )
@@ -466,6 +467,7 @@ def get_samples_aggregate(
         aggregation_period=None,
         aggregation_function=None,
         max_page_size=_DEFAULT_MAX_PAGE_SIZE,
+        include_empty_aggregations=False,
     ):
         """
         Retrieve a series from DataReservoir.io using the samples/aggregate endpoint.
@@ -489,6 +491,8 @@ def get_samples_aggregate(
         max_page_size : optional
             Maximum number of samples to return per page. The method automatically follows links
             to next pages and returns the entire series. For advanced usage.
+        include_empty_aggregations : optional
+            Whether to include empty aggregations with no data in the returned series. Default is False.
         Returns
         -------
         pandas.Series
@@ -550,6 +554,7 @@ def get_samples_aggregate(
         params["aggregationFunction"] = aggregation_function
         params["start"] = start.isoformat()
         params["end"] = end.isoformat()
+        params["includeEmptyAggregations"] = include_empty_aggregations
 
         next_page_link = f"{environment.api_base_url}reservoir/timeseries/{series_id}/samples/aggregate?{urlencode(params)}"
 
diff --git a/docs/user_guide/access_data.rst b/docs/user_guide/access_data.rst
@@ -29,12 +29,19 @@ is *"tick"* (100 nanoseconds).
                             aggregation_period='15m',
                             aggregation_function='mean')
 
-    # Get all data for selected time period
+    # Get all available data for selected time period
     timeseries = client.get_samples_aggregate(series_id, 
                             start='2024-01-01', end='2024-01-02', 
                             aggregation_period='tick',
                             aggregation_function='mean')
 
+    # Get all datapoints resampled to 1 minute even if there is no data. Empty values will be filled with NaN.
+    timeseries = client.get_samples_aggregate(series_id, 
+                            start='2024-01-01', end='2024-01-02', 
+                            aggregation_period='1m',
+                            aggregation_function='mean',
+                            include_empty_aggregations=True)                        
+
 .. note::
 
     :py:meth:`Client.get_samples_aggregate` returns a :py:class:`pandas.Series`. The :py:mod:`start`, :py:mod:`end`, :py:mod:`aggregation_period` and :py:mod:`aggregation_function` parameters are required.   
diff --git a/docs/user_guide/advanced_config.rst b/docs/user_guide/advanced_config.rst
@@ -151,4 +151,84 @@ Using the :py:mod:`max_page_size` parameter in :py:mod:`get_samples_aggregate` m
 
 The :py:meth:`Client.get_samples_aggregate` method uses an endpoint that has support for paging of responses. This means that instead of making one big request, it might make a series of smaller requests traversing links to next pages returned in each partial response.
 
-Normally this is something you don't have to think about. In case you do want to change the maximum number of results returned in one page, you can use the parameter called ``max_page_size`` to alter this number. 
+Normally this is something you don't have to think about. In case you do want to change the maximum number of results returned in one page, you can use the parameter called ``max_page_size`` to alter this number.
+
+Using the :py:mod:`include_empty_aggregations` parameter in :py:mod:`get_samples_aggregate` method
+---------------------------------------------------------------------------------------------------
+
+The :py:meth:`Client.get_samples_aggregate` method aggregates data into fixed intervals based on the ``aggregation_period`` parameter. By default, the method only returns aggregations that contain data.
+
+The ``include_empty_aggregations`` parameter controls whether to include aggregation intervals that have no data points. This is useful when you need a complete time series with regular intervals, even for periods where no measurements were recorded.
+
+**Default behavior (include_empty_aggregations=False):**
+
+When ``include_empty_aggregations`` is ``False`` (default), only aggregations with data are returned. This results in a sparse series that may have gaps.
+
+.. code-block:: python
+
+    import datareservoirio as drio
+
+    auth = drio.Authenticator()
+    client = drio.Client(auth)
+
+    # Returns only aggregations with data
+    timeseries = client.get_samples_aggregate(
+        'your-series-id',
+        start='2026-02-23',
+        end='2026-02-24',
+        aggregation_period='1m',
+        aggregation_function='mean',
+        include_empty_aggregations=False  # Default
+    )
+
+    print(timeseries)
+
+    # Result will only include time intervals that have data.
+    # 2026-02-23 00:03:00+00:00   2.2
+    # 2026-02-23 23:56:00+00:00   1.0
+
+**With empty aggregations (include_empty_aggregations=True):**
+
+When ``include_empty_aggregations`` is ``True``, all aggregation intervals within the specified time range are returned, with ``NaN`` (Not a Number) values for intervals that contain no data.
+
+.. code-block:: python
+
+    import datareservoirio as drio
+
+    auth = drio.Authenticator()
+    client = drio.Client(auth)
+
+    # Returns all aggregations, including those with no data
+    timeseries = client.get_samples_aggregate(
+        'your-series-id',
+        start='2026-02-23',
+        end='2026-02-24',
+        aggregation_period='1m',
+        aggregation_function='mean',
+        include_empty_aggregations=True
+    )
+    
+    print(timeseries)
+
+    # Result has a complete time series with NaN values where data is missing
+    # 2026-02-23 00:00:00+00:00   NaN
+    # 2026-02-23 00:01:00+00:00   NaN
+    # 2026-02-23 00:02:00+00:00   NaN
+    # 2026-02-23 00:03:00+00:00   2.2
+    # 2026-02-23 00:04:00+00:00   NaN
+    #                             ..
+    # 2026-02-23 23:55:00+00:00   NaN
+    # 2026-02-23 23:56:00+00:00   1.0
+    # 2026-02-23 23:57:00+00:00   NaN
+    # 2026-02-23 23:58:00+00:00   NaN
+    # 2026-02-23 23:59:00+00:00   NaN
+
+**Use Cases:**
+
+* **Analysis requiring regular intervals:** Set ``include_empty_aggregations=True`` when your analysis requires evenly-spaced data points (e.g., time-series forecasting models that expect regular intervals).
+
+* **Detecting data gaps:** Set ``include_empty_aggregations=True`` if you need to identify periods with missing measurements.
+
+* **Visualization:** Set ``include_empty_aggregations=True`` when creating time-series plots that should display the full time range uniformly.
+
+* **Memory efficiency:** Use ``include_empty_aggregations=False`` (default) if storage or memory is a concern and you only need data-bearing intervals. 
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ classifiers = [
 dependencies = [
   "numpy",
   "oauthlib",
-  "pandas",
+  "pandas < 3",
   "pyarrow",
   "requests",
   "requests-oauthlib",