Skip to content
Open
4 changes: 2 additions & 2 deletions .github/workflows/ci-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,14 @@ jobs:
pytest tests \
--ignore=tests/processing \
--ignore=tests/raw \
-n 2 --cov=litdata --durations=0 --timeout=120 --capture=no --verbose
-n 2 --cov=litdata --durations=0 --timeout=150 --capture=no --verbose

- name: Run processing tests sequentially
run: |
# note that the listed test should match ignored in the previous step
pytest \
tests/processing tests/raw \
--cov=litdata --cov-append --durations=0 --timeout=120 --capture=no --verbose
--cov=litdata --cov-append --durations=0 --timeout=150 --capture=no --verbose

- name: Statistics
continue-on-error: true
Expand Down
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ repos:
- id: detect-private-key

- repo: https://github.com/codespell-project/codespell
rev: v2.4.1
rev: v2.4.2
hooks:
- id: codespell
additional_dependencies: [tomli]
Expand All @@ -55,14 +55,14 @@ repos:
#args: ["--write-changes"] # uncomment if you want to get automatic fixing

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.10
rev: v0.15.9
hooks:
- id: ruff
args: ["--fix"]
- id: ruff-format

- repo: https://github.com/executablebooks/mdformat
rev: 0.7.22
rev: 1.0.0
hooks:
- id: mdformat
additional_dependencies:
Expand All @@ -76,20 +76,20 @@ repos:
README.md
)$

- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.1.0
- repo: https://github.com/JoC0de/pre-commit-prettier
rev: v3.8.1
hooks:
- id: prettier
files: \.(json|yml|yaml|toml)
# https://prettier.io/docs/en/options.html#print-width
args: ["--print-width=120"]

- repo: https://github.com/tox-dev/pyproject-fmt
rev: v2.8.0
rev: v2.8.0 # pinned to avoid timeout issues in CI
hooks:
- id: pyproject-fmt
additional_dependencies: [tox]
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.24.1
rev: v0.25
hooks:
- id: validate-pyproject
12 changes: 5 additions & 7 deletions tests/streaming/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ def test_dataset_cache_recreation(tmpdir):
assert dataset.shuffler is shuffler # shuffler gets reused


@pytest.mark.timeout(30)
@pytest.mark.timeout(45)
def test_len_called_before_dataloader_drop_last(tmpdir):
cache = Cache(str(tmpdir), chunk_size=10)
for i in range(100):
Expand Down Expand Up @@ -1535,16 +1535,15 @@ def test_dataset_with_mosaic_mds_data(tmpdir):

dataset = StreamingDataset(input_dir=str(tmpdir))
dataloader = DataLoader(dataset, batch_size=4, drop_last=True)
i = 0
for batch in dataloader:

for i, batch in enumerate(dataloader):
assert len(batch["class"]) == 4
assert len(batch["image"]) == 4
assert list(batch["class"]) == [4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3]
i += 1

dataloader = DataLoader(dataset, batch_size=4, drop_last=False)
i = 0
for batch in dataloader:

for i, batch in enumerate(dataloader):
if i == 2:
# last batch is smaller than batch_size
assert len(batch["class"]) == 2
Expand All @@ -1554,7 +1553,6 @@ def test_dataset_with_mosaic_mds_data(tmpdir):
assert len(batch["class"]) == 4
assert len(batch["image"]) == 4
assert list(batch["class"]) == [4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3]
i += 1


@pytest.mark.parametrize("shuffle", [True, False])
Expand Down
Loading