diff --git a/python/docs/source/tutorial/sql/python_data_source.rst b/python/docs/source/tutorial/sql/python_data_source.rst index 07f35722e73ff..259006266c5ac 100644 --- a/python/docs/source/tutorial/sql/python_data_source.rst +++ b/python/docs/source/tutorial/sql/python_data_source.rst @@ -238,7 +238,7 @@ This is a dummy streaming data reader that generates 2 rows in every microbatch. """ return {"offset": 0} - def latestOffset(self) -> dict: + def latestOffset(self, start: dict, limit) -> dict: """ Return the current latest offset that the next microbatch will read to. """ diff --git a/python/pyspark/sql/datasource.py b/python/pyspark/sql/datasource.py index 062dcec5c1003..1985415a6ec93 100644 --- a/python/pyspark/sql/datasource.py +++ b/python/pyspark/sql/datasource.py @@ -756,7 +756,7 @@ def latestOffset(self, start: dict, limit: ReadLimit) -> dict: ... if isinstance(limit, ReadAllAvailable): ... return {"index": start["index"] + 10} ... else: # e.g., limit is ReadMaxRows(5) - ... return {"index": start["index"] + min(10, limit.maxRows)} + ... return {"index": start["index"] + min(10, limit.max_rows)} """ # NOTE: Previous Spark versions didn't have start offset and read limit parameters for this # method. While Spark will ensure the backward compatibility for existing data sources, the