Simplify API: remove .filter(), use callable datasets

karaposu · karaposu · commit 3e3fe104a8b8 · 2026-02-16T12:36:34.000+03:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,7 +10,7 @@ Access Bright Data's pre-collected datasets with filtering and export capabiliti
 ```python
 async with BrightDataClient() as client:
     # Filter dataset records
-    snapshot_id = await client.datasets.amazon_products.filter(
+    snapshot_id = await client.datasets.amazon_products(
         filter={"name": "rating", "operator": ">=", "value": 4.5},
         records_limit=100
     )
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -4,4 +4,3 @@ include CHANGELOG.md
 include pyproject.toml
 recursive-include src *.py
 recursive-include src *.typed
-
diff --git a/notebooks/datasets/amazon/amazon.ipynb b/notebooks/datasets/amazon/amazon.ipynb
@@ -217,7 +217,7 @@
     "print(f\"Records limit: {LIMIT}\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.amazon_products.filter(\n",
+    "    snapshot_id = await client.datasets.amazon_products(\n",
     "        filter=FILTER,\n",
     "        records_limit=LIMIT\n",
     "    )\n",
@@ -304,7 +304,7 @@
     "print(f\"Records limit: 5\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.amazon_products.filter(\n",
+    "    snapshot_id = await client.datasets.amazon_products(\n",
     "        filter=PRICE_FILTER,\n",
     "        records_limit=5\n",
     "    )\n",
@@ -358,7 +358,7 @@
     "print(f\"Records limit: 5\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.amazon_products.filter(\n",
+    "    snapshot_id = await client.datasets.amazon_products(\n",
     "        filter=PRIME_FILTER,\n",
     "        records_limit=5\n",
     "    )\n",
@@ -413,7 +413,7 @@
     "print(f\"Records limit: 5\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.amazon_products.filter(\n",
+    "    snapshot_id = await client.datasets.amazon_products(\n",
     "        filter=BRAND_FILTER,\n",
     "        records_limit=5\n",
     "    )\n",
diff --git a/notebooks/datasets/crunchbase/crunchbase.ipynb b/notebooks/datasets/crunchbase/crunchbase.ipynb
@@ -188,7 +188,7 @@
     "print(f\"Records limit: {LIMIT}\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
+    "    snapshot_id = await client.datasets.crunchbase_companies(\n",
     "        filter=FILTER,\n",
     "        records_limit=LIMIT\n",
     "    )\n",
@@ -232,7 +232,7 @@
     "print(f\"Records limit: 5\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
+    "    snapshot_id = await client.datasets.crunchbase_companies(\n",
     "        filter=EMPLOYEE_FILTER,\n",
     "        records_limit=5\n",
     "    )\n",
@@ -276,7 +276,7 @@
     "print(f\"Records limit: 5\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
+    "    snapshot_id = await client.datasets.crunchbase_companies(\n",
     "        filter=COUNTRY_FILTER,\n",
     "        records_limit=5\n",
     "    )\n",
@@ -319,7 +319,7 @@
     "print(f\"Records limit: 5\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
+    "    snapshot_id = await client.datasets.crunchbase_companies(\n",
     "        filter=FUNDED_FILTER,\n",
     "        records_limit=5\n",
     "    )\n",
@@ -365,7 +365,7 @@
     "print(f\"Records limit: 5\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
+    "    snapshot_id = await client.datasets.crunchbase_companies(\n",
     "        filter=IPO_FILTER,\n",
     "        records_limit=5\n",
     "    )\n",
@@ -416,7 +416,7 @@
     "print(f\"Records limit: 5\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
+    "    snapshot_id = await client.datasets.crunchbase_companies(\n",
     "        filter=FUNDED_FILTER,\n",
     "        records_limit=5\n",
     "    )\n",
@@ -458,7 +458,7 @@
     "print(f\"Records limit: 5\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
+    "    snapshot_id = await client.datasets.crunchbase_companies(\n",
     "        filter=IPO_FILTER,\n",
     "        records_limit=5\n",
     "    )\n",
diff --git a/notebooks/datasets/linkedin/linkedin.ipynb b/notebooks/datasets/linkedin/linkedin.ipynb
@@ -431,7 +431,7 @@
     "print(f\"Records limit: {LIMIT}\\n\")\n",
     "\n",
     "async with client:\n",
-    "    snapshot_id = await client.datasets.linkedin_profiles.filter(\n",
+    "    snapshot_id = await client.datasets.linkedin_profiles(\n",
     "        filter=FILTER,\n",
     "        records_limit=LIMIT\n",
     "    )\n",
@@ -586,7 +586,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Step 1: Create filter\nCOMBINED_FILTER = {\n    \"operator\": \"and\",\n    \"filters\": [\n        {\"name\": \"country_code\", \"operator\": \"=\", \"value\": \"US\"},\n        {\"name\": \"followers\", \"operator\": \">\", \"value\": 5000}\n    ]\n}\n\nprint(\"Filter: US-based profiles with 5000+ followers\")\nprint(f\"Records limit: 5\\n\")\n\nasync with client:\n    snapshot_id = await client.datasets.linkedin_profiles.filter(\n        filter=COMBINED_FILTER,\n        records_limit=5\n    )\n\nprint(f\"Snapshot created: {snapshot_id}\")"
+   "source": "# Step 1: Create filter\nCOMBINED_FILTER = {\n    \"operator\": \"and\",\n    \"filters\": [\n        {\"name\": \"country_code\", \"operator\": \"=\", \"value\": \"US\"},\n        {\"name\": \"followers\", \"operator\": \">\", \"value\": 5000}\n    ]\n}\n\nprint(\"Filter: US-based profiles with 5000+ followers\")\nprint(f\"Records limit: 5\\n\")\n\nasync with client:\n    snapshot_id = await client.datasets.linkedin_profiles(\n        filter=COMBINED_FILTER,\n        records_limit=5\n    )\n\nprint(f\"Snapshot created: {snapshot_id}\")"
   },
   {
    "cell_type": "code",
@@ -610,7 +610,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Step 1: Create filter\nCOMPANY_FILTER = {\n    \"name\": \"company_size\",\n    \"operator\": \"=\",\n    \"value\": \"1001-5000 employees\"\n}\n\nprint(f\"Filter: {COMPANY_FILTER}\")\nprint(f\"Records limit: 5\\n\")\n\nasync with client:\n    snapshot_id = await client.datasets.linkedin_companies.filter(\n        filter=COMPANY_FILTER,\n        records_limit=5\n    )\n\nprint(f\"Snapshot created: {snapshot_id}\")"
+   "source": "# Step 1: Create filter\nCOMPANY_FILTER = {\n    \"name\": \"company_size\",\n    \"operator\": \"=\",\n    \"value\": \"1001-5000 employees\"\n}\n\nprint(f\"Filter: {COMPANY_FILTER}\")\nprint(f\"Records limit: 5\\n\")\n\nasync with client:\n    snapshot_id = await client.datasets.linkedin_companies(\n        filter=COMPANY_FILTER,\n        records_limit=5\n    )\n\nprint(f\"Snapshot created: {snapshot_id}\")"
   },
   {
    "cell_type": "code",
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -7,4 +7,3 @@ black>=23.0.0
 ruff>=0.1.0
 mypy>=1.5.0
 pre-commit>=3.4.0
-
diff --git a/src/brightdata/datasets/base.py b/src/brightdata/datasets/base.py
@@ -22,7 +22,8 @@ class BaseDataset:
     """
     Base class for all dataset types.
 
-    Provides common methods: get_metadata(), filter(), get_status(), download().
+    Provides common methods: get_metadata(), get_status(), download().
+    Call the dataset directly to filter: await dataset(filter=..., records_limit=...)
     Subclasses set their own DATASET_ID and can add dataset-specific helpers.
     """
 
@@ -60,7 +61,7 @@ async def get_metadata(self) -> DatasetMetadata:
             self._metadata = DatasetMetadata.from_dict(data)
         return self._metadata
 
-    async def filter(
+    async def __call__(
         self,
         filter: Dict[str, Any],
         records_limit: Optional[int] = None,
@@ -106,7 +107,7 @@ async def get_status(self, snapshot_id: str) -> SnapshotStatus:
         Check snapshot status.
 
         Args:
-            snapshot_id: Snapshot ID from filter()
+            snapshot_id: Snapshot ID from calling the dataset
 
         Returns:
             SnapshotStatus with status field: "scheduled", "building", "ready", or "failed"
@@ -130,7 +131,7 @@ async def download(
         Polls until snapshot is ready, then downloads and returns data.
 
         Args:
-            snapshot_id: Snapshot ID from filter()
+            snapshot_id: Snapshot ID from calling the dataset
             format: Response format (json, jsonl, csv)
             timeout: Max seconds to wait for snapshot to be ready
             poll_interval: Seconds between status checks

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ Access Bright Data's pre-collected datasets with filtering and export capabiliti`
`10`	`10`	```python
`11`	`11`	`async with BrightDataClient() as client:`
`12`	`12`	`# Filter dataset records`
`13`		`- snapshot_id = await client.datasets.amazon_products.filter(`
	`13`	`+ snapshot_id = await client.datasets.amazon_products(`
`14`	`14`	`filter={"name": "rating", "operator": ">=", "value": 4.5},`
`15`	`15`	`records_limit=100`
`16`	`16`	`)`