From 47a1378cac157362b1648cfb6cccfff9bb184623 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 21:28:26 +0000 Subject: [PATCH 01/11] [pre-commit.ci] pre-commit suggestions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/codespell-project/codespell: v2.4.1 → v2.4.2](https://github.com/codespell-project/codespell/compare/v2.4.1...v2.4.2) - [github.com/astral-sh/ruff-pre-commit: v0.14.10 → v0.15.9](https://github.com/astral-sh/ruff-pre-commit/compare/v0.14.10...v0.15.9) - [github.com/executablebooks/mdformat: 0.7.22 → 1.0.0](https://github.com/executablebooks/mdformat/compare/0.7.22...1.0.0) - [github.com/pre-commit/mirrors-prettier: v3.1.0 → v4.0.0-alpha.8](https://github.com/pre-commit/mirrors-prettier/compare/v3.1.0...v4.0.0-alpha.8) - [github.com/tox-dev/pyproject-fmt: v2.8.0 → v2.21.0](https://github.com/tox-dev/pyproject-fmt/compare/v2.8.0...v2.21.0) - [github.com/abravalheri/validate-pyproject: v0.24.1 → v0.25](https://github.com/abravalheri/validate-pyproject/compare/v0.24.1...v0.25) --- .pre-commit-config.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 725015af..80821f1c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -44,7 +44,7 @@ repos: - id: detect-private-key - repo: https://github.com/codespell-project/codespell - rev: v2.4.1 + rev: v2.4.2 hooks: - id: codespell additional_dependencies: [tomli] @@ -55,14 +55,14 @@ repos: #args: ["--write-changes"] # uncomment if you want to get automatic fixing - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.10 + rev: v0.15.9 hooks: - id: ruff args: ["--fix"] - id: ruff-format - repo: https://github.com/executablebooks/mdformat - rev: 0.7.22 + rev: 1.0.0 hooks: - id: mdformat additional_dependencies: @@ -77,7 +77,7 @@ repos: )$ - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.1.0 + rev: v4.0.0-alpha.8 hooks: - id: prettier files: \.(json|yml|yaml|toml) @@ -85,11 +85,11 @@ repos: args: ["--print-width=120"] - repo: https://github.com/tox-dev/pyproject-fmt - rev: v2.8.0 + rev: v2.21.0 hooks: - id: pyproject-fmt additional_dependencies: [tox] - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.24.1 + rev: v0.25 hooks: - id: validate-pyproject From ea2a314c8bba859bea133b8bcb6a1deb29b9dba2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 13:24:28 +0000 Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pyproject.toml | 64 ++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index add0a0da..f3ec1725 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - [build-system] requires = [ "setuptools", "wheel" ] @@ -98,7 +97,7 @@ lint.pydocstyle.convention = "google" [tool.codespell] # Todo: enable also python files in a next step -#skip = '*.py' +# skip = '*.py' quiet-level = 3 # comma separated list of words; waiting for: # https://github.com/codespell-project/codespell/issues/2839#issuecomment-1731601603 @@ -113,30 +112,6 @@ wrap-summaries = 119 wrap-descriptions = 120 blank = true -[tool.pytest.ini_options] -testpaths = [ "tests" ] -norecursedirs = [ ".git", ".github", "dist", "build", "docs" ] -addopts = [ - "--strict-markers", - "--doctest-modules", - "--color=yes", - "--disable-pytest-warnings", - "--ignore=legacy/checkpoints", -] -markers = [ "cloud: Run the cloud tests for example" ] -filterwarnings = [ "error::FutureWarning" ] -xfail_strict = true -junit_duration_report = "call" - -[tool.coverage.report] -exclude_lines = [ - "pragma: no cover", - "warnings", - "pass", - "rank_zero_warn", - "raise NotImplementedError", -] - [tool.mypy] files = [ "src" ] # This section is for folders with "-" as they are not valid python modules @@ -165,11 +140,34 @@ allow_redefinition = "True" disable_error_code = "attr-defined" # style choices warn_no_return = "False" +overrides = [ + # Ignore mypy errors for these files + # TODO: the goal is for this to be empty + # the list can be generated with: + # mypy --no-error-summary 2>&1 | tr ':' ' ' | awk '{print $1}' | sort | uniq | sed 's/\.py//g; s|src/||g; s|\/|\.|g' | xargs -I {} echo '"{}",' + { module = [], ignore_errors = "True" }, +] + +[tool.pytest] +ini_options.testpaths = [ "tests" ] +ini_options.norecursedirs = [ ".git", ".github", "dist", "build", "docs" ] +ini_options.addopts = [ + "--strict-markers", + "--doctest-modules", + "--color=yes", + "--disable-pytest-warnings", + "--ignore=legacy/checkpoints", +] +ini_options.markers = [ "cloud: Run the cloud tests for example" ] +ini_options.filterwarnings = [ "error::FutureWarning" ] +ini_options.xfail_strict = true +ini_options.junit_duration_report = "call" -# Ignore mypy errors for these files -# TODO: the goal is for this to be empty -[[tool.mypy.overrides]] -# the list can be generated with: -# mypy --no-error-summary 2>&1 | tr ':' ' ' | awk '{print $1}' | sort | uniq | sed 's/\.py//g; s|src/||g; s|\/|\.|g' | xargs -I {} echo '"{}",' -module = [ ] -ignore_errors = "True" +[tool.coverage] +report.exclude_lines = [ + "pass", + "pragma: no cover", + "raise NotImplementedError", + "rank_zero_warn", + "warnings", +] From cedbc2275af2651a5b833740a10857ad4a236db8 Mon Sep 17 00:00:00 2001 From: deependujha Date: Thu, 16 Apr 2026 17:24:30 +0530 Subject: [PATCH 03/11] update --- tests/streaming/test_dataset.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/streaming/test_dataset.py b/tests/streaming/test_dataset.py index 15a5babd..c1ba4858 100644 --- a/tests/streaming/test_dataset.py +++ b/tests/streaming/test_dataset.py @@ -1535,16 +1535,15 @@ def test_dataset_with_mosaic_mds_data(tmpdir): dataset = StreamingDataset(input_dir=str(tmpdir)) dataloader = DataLoader(dataset, batch_size=4, drop_last=True) - i = 0 - for batch in dataloader: + + for i, batch in enumerate(dataloader): assert len(batch["class"]) == 4 assert len(batch["image"]) == 4 assert list(batch["class"]) == [4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3] - i += 1 dataloader = DataLoader(dataset, batch_size=4, drop_last=False) - i = 0 - for batch in dataloader: + + for i, batch in enumerate(dataloader): if i == 2: # last batch is smaller than batch_size assert len(batch["class"]) == 2 @@ -1554,7 +1553,6 @@ def test_dataset_with_mosaic_mds_data(tmpdir): assert len(batch["class"]) == 4 assert len(batch["image"]) == 4 assert list(batch["class"]) == [4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3] - i += 1 @pytest.mark.parametrize("shuffle", [True, False]) From 5f13ac1fcaf51a4811dda83e1d7529c457dd7c44 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 24 Apr 2026 11:33:01 +0200 Subject: [PATCH 04/11] JoC0de/pre-commit-prettier --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 80821f1c..2eddb0c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -76,8 +76,8 @@ repos: README.md )$ - - repo: https://github.com/pre-commit/mirrors-prettier - rev: v4.0.0-alpha.8 + - repo: https://github.com/JoC0de/pre-commit-prettier + rev: v3.8.1 hooks: - id: prettier files: \.(json|yml|yaml|toml) From db32df1bc38fff5408fa3874f99154bd2a149a59 Mon Sep 17 00:00:00 2001 From: deependujha Date: Sat, 25 Apr 2026 09:56:51 +0530 Subject: [PATCH 05/11] update --- .github/workflows/ci-testing.yml | 2 ++ tests/streaming/test_dataset.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 4931d31e..302e0d5f 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -62,6 +62,7 @@ jobs: pytest tests \ --ignore=tests/processing \ --ignore=tests/raw \ + --ignore=tests/streaming/test_parquet.py::test_stream_hf_parquet_dataset -n 2 --cov=litdata --durations=0 --timeout=120 --capture=no --verbose - name: Run processing tests sequentially @@ -69,6 +70,7 @@ jobs: # note that the listed test should match ignored in the previous step pytest \ tests/processing tests/raw \ + tests/streaming/test_parquet.py::test_stream_hf_parquet_dataset[True-False] \ --cov=litdata --cov-append --durations=0 --timeout=120 --capture=no --verbose - name: Statistics diff --git a/tests/streaming/test_dataset.py b/tests/streaming/test_dataset.py index c1ba4858..953c934b 100644 --- a/tests/streaming/test_dataset.py +++ b/tests/streaming/test_dataset.py @@ -544,7 +544,7 @@ def test_dataset_cache_recreation(tmpdir): assert dataset.shuffler is shuffler # shuffler gets reused -@pytest.mark.timeout(30) +@pytest.mark.timeout(45) def test_len_called_before_dataloader_drop_last(tmpdir): cache = Cache(str(tmpdir), chunk_size=10) for i in range(100): From e18f542ad07cb3a80863a846476ece158817f136 Mon Sep 17 00:00:00 2001 From: deependujha Date: Sat, 25 Apr 2026 09:57:04 +0530 Subject: [PATCH 06/11] update --- .github/workflows/ci-testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 302e0d5f..90a49b3c 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -70,7 +70,7 @@ jobs: # note that the listed test should match ignored in the previous step pytest \ tests/processing tests/raw \ - tests/streaming/test_parquet.py::test_stream_hf_parquet_dataset[True-False] \ + tests/streaming/test_parquet.py::test_stream_hf_parquet_dataset \ --cov=litdata --cov-append --durations=0 --timeout=120 --capture=no --verbose - name: Statistics From 4c8fe61f17a7e99764509a928384aaee816cd1fd Mon Sep 17 00:00:00 2001 From: deependujha Date: Sat, 25 Apr 2026 10:34:17 +0530 Subject: [PATCH 07/11] update --- .github/workflows/ci-testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 90a49b3c..34c96094 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -62,7 +62,7 @@ jobs: pytest tests \ --ignore=tests/processing \ --ignore=tests/raw \ - --ignore=tests/streaming/test_parquet.py::test_stream_hf_parquet_dataset + --ignore=tests/streaming/test_parquet.py::test_stream_hf_parquet_dataset \ -n 2 --cov=litdata --durations=0 --timeout=120 --capture=no --verbose - name: Run processing tests sequentially From b57f920c3346af437e0fc7be67bd97742eeff25e Mon Sep 17 00:00:00 2001 From: deependujha Date: Sat, 25 Apr 2026 10:51:59 +0530 Subject: [PATCH 08/11] update --- .github/workflows/ci-testing.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 34c96094..e3d4d654 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -62,7 +62,7 @@ jobs: pytest tests \ --ignore=tests/processing \ --ignore=tests/raw \ - --ignore=tests/streaming/test_parquet.py::test_stream_hf_parquet_dataset \ + --ignore=tests/streaming/test_parquet.py \ -n 2 --cov=litdata --durations=0 --timeout=120 --capture=no --verbose - name: Run processing tests sequentially @@ -70,7 +70,7 @@ jobs: # note that the listed test should match ignored in the previous step pytest \ tests/processing tests/raw \ - tests/streaming/test_parquet.py::test_stream_hf_parquet_dataset \ + tests/streaming/test_parquet.py \ --cov=litdata --cov-append --durations=0 --timeout=120 --capture=no --verbose - name: Statistics From 9b8fe0ad1c827858baec293f61af566291b9a16e Mon Sep 17 00:00:00 2001 From: deependujha Date: Sat, 25 Apr 2026 12:26:18 +0530 Subject: [PATCH 09/11] update --- .github/workflows/ci-testing.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index e3d4d654..31e3a076 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -62,16 +62,14 @@ jobs: pytest tests \ --ignore=tests/processing \ --ignore=tests/raw \ - --ignore=tests/streaming/test_parquet.py \ - -n 2 --cov=litdata --durations=0 --timeout=120 --capture=no --verbose + -n 2 --cov=litdata --durations=0 --timeout=150 --capture=no --verbose - name: Run processing tests sequentially run: | # note that the listed test should match ignored in the previous step pytest \ tests/processing tests/raw \ - tests/streaming/test_parquet.py \ - --cov=litdata --cov-append --durations=0 --timeout=120 --capture=no --verbose + --cov=litdata --cov-append --durations=0 --timeout=150 --capture=no --verbose - name: Statistics continue-on-error: true From 9c39874cd88765a3971a301bae5ba504156cc29e Mon Sep 17 00:00:00 2001 From: deependujha Date: Tue, 28 Apr 2026 11:54:16 +0530 Subject: [PATCH 10/11] pin pyproject-fmt --- .pre-commit-config.yaml | 2 +- pyproject.toml | 55 ++++++++++++++++++++++------------------- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2eddb0c5..8cdecce8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -85,7 +85,7 @@ repos: args: ["--print-width=120"] - repo: https://github.com/tox-dev/pyproject-fmt - rev: v2.21.0 + rev: v2.8.0 # pinned to avoid timeout issues in CI hooks: - id: pyproject-fmt additional_dependencies: [tox] diff --git a/pyproject.toml b/pyproject.toml index f3ec1725..d9058e88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + [build-system] requires = [ "setuptools", "wheel" ] @@ -112,6 +113,33 @@ wrap-summaries = 119 wrap-descriptions = 120 blank = true +[tool.pyproject-fmt] +keep_full_version = true + +[tool.pytest] +ini_options.testpaths = [ "tests" ] +ini_options.norecursedirs = [ ".git", ".github", "dist", "build", "docs" ] +ini_options.addopts = [ + "--strict-markers", + "--doctest-modules", + "--color=yes", + "--disable-pytest-warnings", + "--ignore=legacy/checkpoints", +] +ini_options.markers = [ "cloud: Run the cloud tests for example" ] +ini_options.filterwarnings = [ "error::FutureWarning" ] +ini_options.xfail_strict = true +ini_options.junit_duration_report = "call" + +[tool.coverage] +report.exclude_lines = [ + "pass", + "pragma: no cover", + "raise NotImplementedError", + "rank_zero_warn", + "warnings", +] + [tool.mypy] files = [ "src" ] # This section is for folders with "-" as they are not valid python modules @@ -145,29 +173,6 @@ overrides = [ # TODO: the goal is for this to be empty # the list can be generated with: # mypy --no-error-summary 2>&1 | tr ':' ' ' | awk '{print $1}' | sort | uniq | sed 's/\.py//g; s|src/||g; s|\/|\.|g' | xargs -I {} echo '"{}",' - { module = [], ignore_errors = "True" }, -] - -[tool.pytest] -ini_options.testpaths = [ "tests" ] -ini_options.norecursedirs = [ ".git", ".github", "dist", "build", "docs" ] -ini_options.addopts = [ - "--strict-markers", - "--doctest-modules", - "--color=yes", - "--disable-pytest-warnings", - "--ignore=legacy/checkpoints", -] -ini_options.markers = [ "cloud: Run the cloud tests for example" ] -ini_options.filterwarnings = [ "error::FutureWarning" ] -ini_options.xfail_strict = true -ini_options.junit_duration_report = "call" - -[tool.coverage] -report.exclude_lines = [ - "pass", - "pragma: no cover", - "raise NotImplementedError", - "rank_zero_warn", - "warnings", + { module = [ + ], ignore_errors = "True" }, ] From 7c47f2353ba1650d4d20519421ddc66640b33eec Mon Sep 17 00:00:00 2001 From: deependujha Date: Tue, 28 Apr 2026 11:55:40 +0530 Subject: [PATCH 11/11] update --- pyproject.toml | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d9058e88..add0a0da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,7 +98,7 @@ lint.pydocstyle.convention = "google" [tool.codespell] # Todo: enable also python files in a next step -# skip = '*.py' +#skip = '*.py' quiet-level = 3 # comma separated list of words; waiting for: # https://github.com/codespell-project/codespell/issues/2839#issuecomment-1731601603 @@ -113,31 +113,28 @@ wrap-summaries = 119 wrap-descriptions = 120 blank = true -[tool.pyproject-fmt] -keep_full_version = true - -[tool.pytest] -ini_options.testpaths = [ "tests" ] -ini_options.norecursedirs = [ ".git", ".github", "dist", "build", "docs" ] -ini_options.addopts = [ +[tool.pytest.ini_options] +testpaths = [ "tests" ] +norecursedirs = [ ".git", ".github", "dist", "build", "docs" ] +addopts = [ "--strict-markers", "--doctest-modules", "--color=yes", "--disable-pytest-warnings", "--ignore=legacy/checkpoints", ] -ini_options.markers = [ "cloud: Run the cloud tests for example" ] -ini_options.filterwarnings = [ "error::FutureWarning" ] -ini_options.xfail_strict = true -ini_options.junit_duration_report = "call" +markers = [ "cloud: Run the cloud tests for example" ] +filterwarnings = [ "error::FutureWarning" ] +xfail_strict = true +junit_duration_report = "call" -[tool.coverage] -report.exclude_lines = [ - "pass", +[tool.coverage.report] +exclude_lines = [ "pragma: no cover", - "raise NotImplementedError", - "rank_zero_warn", "warnings", + "pass", + "rank_zero_warn", + "raise NotImplementedError", ] [tool.mypy] @@ -168,11 +165,11 @@ allow_redefinition = "True" disable_error_code = "attr-defined" # style choices warn_no_return = "False" -overrides = [ - # Ignore mypy errors for these files - # TODO: the goal is for this to be empty - # the list can be generated with: - # mypy --no-error-summary 2>&1 | tr ':' ' ' | awk '{print $1}' | sort | uniq | sed 's/\.py//g; s|src/||g; s|\/|\.|g' | xargs -I {} echo '"{}",' - { module = [ - ], ignore_errors = "True" }, -] + +# Ignore mypy errors for these files +# TODO: the goal is for this to be empty +[[tool.mypy.overrides]] +# the list can be generated with: +# mypy --no-error-summary 2>&1 | tr ':' ' ' | awk '{print $1}' | sort | uniq | sed 's/\.py//g; s|src/||g; s|\/|\.|g' | xargs -I {} echo '"{}",' +module = [ ] +ignore_errors = "True"