Skip to content

Commit 11ec3d6

Browse files
committed
precommit
1 parent a9ad840 commit 11ec3d6

4 files changed

Lines changed: 34 additions & 19 deletions

File tree

docs/dataset_manager/DESIGN.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -103,16 +103,16 @@ Transforms are composed in order; each receives the output of the previous.
103103
Registered in `dataset.py` under `Dataset.PREDEFINED`. Referenced by name in rulesets and YAML
104104
configs. Each predefined dataset ships with default transforms for supported model families.
105105

106-
| Name | Source | Notes |
107-
| --------------------------- | ------------- | ------------------------------------------ |
108-
| `aime25` | AIME 2025 | Math reasoning |
109-
| `gpqa` | GPQA Diamond | Science QA |
110-
| `cnndailymail` | CNN/DailyMail | Summarization |
111-
| `open_orca` | OpenOrca | General instruction |
112-
| `livecodebench` | LiveCodeBench | Code generation; requires additional setup |
113-
| `shopify_product_catalogue` | Shopify | E-commerce Q&A (q3vl) |
106+
| Name | Source | Notes |
107+
| ------------------------------ | ------------- | ----------------------------------------------------- |
108+
| `aime25` | AIME 2025 | Math reasoning |
109+
| `gpqa` | GPQA Diamond | Science QA |
110+
| `cnndailymail` | CNN/DailyMail | Summarization |
111+
| `open_orca` | OpenOrca | General instruction |
112+
| `livecodebench` | LiveCodeBench | Code generation; requires additional setup |
113+
| `shopify_product_catalogue` | Shopify | E-commerce Q&A (q3vl) |
114114
| `shopify_product_catalogue_8k` | Shopify | 8k sample variant of Shopify product catalogue (q3vl) |
115-
| `random` | Synthetic | Generated prompts for throughput testing |
115+
| `random` | Synthetic | Generated prompts for throughput testing |
116116

117117
## Preset System
118118

src/inference_endpoint/dataset_manager/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@
2727
from .predefined.livecodebench import LiveCodeBench
2828
from .predefined.open_orca import OpenOrca
2929
from .predefined.random import RandomDataset
30-
from .predefined.shopify_product_catalogue import ShopifyProductCatalogue, ShopifyProductCatalogue8k
30+
from .predefined.shopify_product_catalogue import (
31+
ShopifyProductCatalogue,
32+
ShopifyProductCatalogue8k,
33+
)
3134
from .transforms import (
3235
AddStaticColumns,
3336
ColumnFilter,

src/inference_endpoint/dataset_manager/predefined/shopify_product_catalogue/__init__.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@
2424
from typing import Any, ClassVar
2525

2626
import pandas as pd
27-
from datasets import load_dataset
2827
from tqdm import tqdm
2928

29+
from datasets import load_dataset
30+
3031
from ...dataset import Dataset
3132
from . import presets
3233
from .metadata import ProductMetadata
@@ -139,9 +140,7 @@ def generate(
139140
load_options["revision"] = revision
140141

141142
ds = load_dataset(cls.REPO_ID, split=split_key, **load_options)
142-
logger.info(
143-
f"Loaded {len(ds)} samples from {cls.REPO_ID} ({split_key})"
144-
)
143+
logger.info(f"Loaded {len(ds)} samples from {cls.REPO_ID} ({split_key})")
145144

146145
all_rows: list[dict[str, Any]] = []
147146
for i in tqdm(

tests/unit/dataset_manager/test_shopify_product_catalogue.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@
2727
from unittest.mock import patch
2828

2929
import pandas as pd
30+
from inference_endpoint.dataset_manager.dataset import Dataset
3031
from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue import (
3132
BaseShopifyProductCatalogue,
3233
ShopifyProductCatalogue,
3334
ShopifyProductCatalogue8k,
3435
)
35-
from inference_endpoint.dataset_manager.dataset import Dataset
3636
from inference_endpoint.dataset_manager.predefined.shopify_product_catalogue.presets import (
3737
ShopifyMultimodalFormatter,
3838
q3vl,
@@ -372,7 +372,9 @@ def test_class_inherits_from_base(self) -> None:
372372

373373
def test_has_correct_repo_id(self) -> None:
374374
"""REPO_ID points to nvidia/Shopify-product-catalogue-8k."""
375-
assert ShopifyProductCatalogue8k.REPO_ID == "nvidia/Shopify-product-catalogue-8k"
375+
assert (
376+
ShopifyProductCatalogue8k.REPO_ID == "nvidia/Shopify-product-catalogue-8k"
377+
)
376378

377379
def test_has_correct_dataset_id(self) -> None:
378380
"""DATASET_ID is shopify_product_catalogue_8k."""
@@ -381,11 +383,17 @@ def test_has_correct_dataset_id(self) -> None:
381383
def test_registered_in_dataset_predefined(self) -> None:
382384
"""Class is auto-registered in Dataset.PREDEFINED."""
383385
assert "shopify_product_catalogue_8k" in Dataset.PREDEFINED
384-
assert Dataset.PREDEFINED["shopify_product_catalogue_8k"] is ShopifyProductCatalogue8k
386+
assert (
387+
Dataset.PREDEFINED["shopify_product_catalogue_8k"]
388+
is ShopifyProductCatalogue8k
389+
)
385390

386391
def test_shares_column_names_with_base(self) -> None:
387392
"""Column names are identical to ShopifyProductCatalogue."""
388-
assert ShopifyProductCatalogue8k.COLUMN_NAMES == ShopifyProductCatalogue.COLUMN_NAMES
393+
assert (
394+
ShopifyProductCatalogue8k.COLUMN_NAMES
395+
== ShopifyProductCatalogue.COLUMN_NAMES
396+
)
389397

390398
def test_shares_presets_with_base(self) -> None:
391399
"""Presets are shared with base class (q3vl works)."""
@@ -430,7 +438,12 @@ def test_generate_uses_correct_dataset_id_for_paths(
430438
force=True,
431439
)
432440
# Verify cache path uses shopify_product_catalogue_8k
433-
expected_path = tmp_path / "shopify_product_catalogue_8k" / "train" / "shopify_product_catalogue_8k_train.parquet"
441+
expected_path = (
442+
tmp_path
443+
/ "shopify_product_catalogue_8k"
444+
/ "train"
445+
/ "shopify_product_catalogue_8k_train.parquet"
446+
)
434447
assert expected_path.exists()
435448

436449
def test_get_dataloader_with_q3vl_preset(self, tmp_path: Path) -> None:

0 commit comments

Comments
 (0)