Skip to content

Commit 3e3fe10

Browse files
committed
Simplify API: remove .filter(), use callable datasets
1 parent 6d27ad5 commit 3e3fe10

7 files changed

Lines changed: 20 additions & 21 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Access Bright Data's pre-collected datasets with filtering and export capabiliti
1010
```python
1111
async with BrightDataClient() as client:
1212
# Filter dataset records
13-
snapshot_id = await client.datasets.amazon_products.filter(
13+
snapshot_id = await client.datasets.amazon_products(
1414
filter={"name": "rating", "operator": ">=", "value": 4.5},
1515
records_limit=100
1616
)

MANIFEST.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,3 @@ include CHANGELOG.md
44
include pyproject.toml
55
recursive-include src *.py
66
recursive-include src *.typed
7-

notebooks/datasets/amazon/amazon.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@
217217
"print(f\"Records limit: {LIMIT}\\n\")\n",
218218
"\n",
219219
"async with client:\n",
220-
" snapshot_id = await client.datasets.amazon_products.filter(\n",
220+
" snapshot_id = await client.datasets.amazon_products(\n",
221221
" filter=FILTER,\n",
222222
" records_limit=LIMIT\n",
223223
" )\n",
@@ -304,7 +304,7 @@
304304
"print(f\"Records limit: 5\\n\")\n",
305305
"\n",
306306
"async with client:\n",
307-
" snapshot_id = await client.datasets.amazon_products.filter(\n",
307+
" snapshot_id = await client.datasets.amazon_products(\n",
308308
" filter=PRICE_FILTER,\n",
309309
" records_limit=5\n",
310310
" )\n",
@@ -358,7 +358,7 @@
358358
"print(f\"Records limit: 5\\n\")\n",
359359
"\n",
360360
"async with client:\n",
361-
" snapshot_id = await client.datasets.amazon_products.filter(\n",
361+
" snapshot_id = await client.datasets.amazon_products(\n",
362362
" filter=PRIME_FILTER,\n",
363363
" records_limit=5\n",
364364
" )\n",
@@ -413,7 +413,7 @@
413413
"print(f\"Records limit: 5\\n\")\n",
414414
"\n",
415415
"async with client:\n",
416-
" snapshot_id = await client.datasets.amazon_products.filter(\n",
416+
" snapshot_id = await client.datasets.amazon_products(\n",
417417
" filter=BRAND_FILTER,\n",
418418
" records_limit=5\n",
419419
" )\n",

notebooks/datasets/crunchbase/crunchbase.ipynb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@
188188
"print(f\"Records limit: {LIMIT}\\n\")\n",
189189
"\n",
190190
"async with client:\n",
191-
" snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
191+
" snapshot_id = await client.datasets.crunchbase_companies(\n",
192192
" filter=FILTER,\n",
193193
" records_limit=LIMIT\n",
194194
" )\n",
@@ -232,7 +232,7 @@
232232
"print(f\"Records limit: 5\\n\")\n",
233233
"\n",
234234
"async with client:\n",
235-
" snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
235+
" snapshot_id = await client.datasets.crunchbase_companies(\n",
236236
" filter=EMPLOYEE_FILTER,\n",
237237
" records_limit=5\n",
238238
" )\n",
@@ -276,7 +276,7 @@
276276
"print(f\"Records limit: 5\\n\")\n",
277277
"\n",
278278
"async with client:\n",
279-
" snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
279+
" snapshot_id = await client.datasets.crunchbase_companies(\n",
280280
" filter=COUNTRY_FILTER,\n",
281281
" records_limit=5\n",
282282
" )\n",
@@ -319,7 +319,7 @@
319319
"print(f\"Records limit: 5\\n\")\n",
320320
"\n",
321321
"async with client:\n",
322-
" snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
322+
" snapshot_id = await client.datasets.crunchbase_companies(\n",
323323
" filter=FUNDED_FILTER,\n",
324324
" records_limit=5\n",
325325
" )\n",
@@ -365,7 +365,7 @@
365365
"print(f\"Records limit: 5\\n\")\n",
366366
"\n",
367367
"async with client:\n",
368-
" snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
368+
" snapshot_id = await client.datasets.crunchbase_companies(\n",
369369
" filter=IPO_FILTER,\n",
370370
" records_limit=5\n",
371371
" )\n",
@@ -416,7 +416,7 @@
416416
"print(f\"Records limit: 5\\n\")\n",
417417
"\n",
418418
"async with client:\n",
419-
" snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
419+
" snapshot_id = await client.datasets.crunchbase_companies(\n",
420420
" filter=FUNDED_FILTER,\n",
421421
" records_limit=5\n",
422422
" )\n",
@@ -458,7 +458,7 @@
458458
"print(f\"Records limit: 5\\n\")\n",
459459
"\n",
460460
"async with client:\n",
461-
" snapshot_id = await client.datasets.crunchbase_companies.filter(\n",
461+
" snapshot_id = await client.datasets.crunchbase_companies(\n",
462462
" filter=IPO_FILTER,\n",
463463
" records_limit=5\n",
464464
" )\n",

notebooks/datasets/linkedin/linkedin.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@
431431
"print(f\"Records limit: {LIMIT}\\n\")\n",
432432
"\n",
433433
"async with client:\n",
434-
" snapshot_id = await client.datasets.linkedin_profiles.filter(\n",
434+
" snapshot_id = await client.datasets.linkedin_profiles(\n",
435435
" filter=FILTER,\n",
436436
" records_limit=LIMIT\n",
437437
" )\n",
@@ -586,7 +586,7 @@
586586
"execution_count": null,
587587
"metadata": {},
588588
"outputs": [],
589-
"source": "# Step 1: Create filter\nCOMBINED_FILTER = {\n \"operator\": \"and\",\n \"filters\": [\n {\"name\": \"country_code\", \"operator\": \"=\", \"value\": \"US\"},\n {\"name\": \"followers\", \"operator\": \">\", \"value\": 5000}\n ]\n}\n\nprint(\"Filter: US-based profiles with 5000+ followers\")\nprint(f\"Records limit: 5\\n\")\n\nasync with client:\n snapshot_id = await client.datasets.linkedin_profiles.filter(\n filter=COMBINED_FILTER,\n records_limit=5\n )\n\nprint(f\"Snapshot created: {snapshot_id}\")"
589+
"source": "# Step 1: Create filter\nCOMBINED_FILTER = {\n \"operator\": \"and\",\n \"filters\": [\n {\"name\": \"country_code\", \"operator\": \"=\", \"value\": \"US\"},\n {\"name\": \"followers\", \"operator\": \">\", \"value\": 5000}\n ]\n}\n\nprint(\"Filter: US-based profiles with 5000+ followers\")\nprint(f\"Records limit: 5\\n\")\n\nasync with client:\n snapshot_id = await client.datasets.linkedin_profiles(\n filter=COMBINED_FILTER,\n records_limit=5\n )\n\nprint(f\"Snapshot created: {snapshot_id}\")"
590590
},
591591
{
592592
"cell_type": "code",
@@ -610,7 +610,7 @@
610610
"execution_count": null,
611611
"metadata": {},
612612
"outputs": [],
613-
"source": "# Step 1: Create filter\nCOMPANY_FILTER = {\n \"name\": \"company_size\",\n \"operator\": \"=\",\n \"value\": \"1001-5000 employees\"\n}\n\nprint(f\"Filter: {COMPANY_FILTER}\")\nprint(f\"Records limit: 5\\n\")\n\nasync with client:\n snapshot_id = await client.datasets.linkedin_companies.filter(\n filter=COMPANY_FILTER,\n records_limit=5\n )\n\nprint(f\"Snapshot created: {snapshot_id}\")"
613+
"source": "# Step 1: Create filter\nCOMPANY_FILTER = {\n \"name\": \"company_size\",\n \"operator\": \"=\",\n \"value\": \"1001-5000 employees\"\n}\n\nprint(f\"Filter: {COMPANY_FILTER}\")\nprint(f\"Records limit: 5\\n\")\n\nasync with client:\n snapshot_id = await client.datasets.linkedin_companies(\n filter=COMPANY_FILTER,\n records_limit=5\n )\n\nprint(f\"Snapshot created: {snapshot_id}\")"
614614
},
615615
{
616616
"cell_type": "code",

requirements-dev.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,3 @@ black>=23.0.0
77
ruff>=0.1.0
88
mypy>=1.5.0
99
pre-commit>=3.4.0
10-

src/brightdata/datasets/base.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ class BaseDataset:
2222
"""
2323
Base class for all dataset types.
2424
25-
Provides common methods: get_metadata(), filter(), get_status(), download().
25+
Provides common methods: get_metadata(), get_status(), download().
26+
Call the dataset directly to filter: await dataset(filter=..., records_limit=...)
2627
Subclasses set their own DATASET_ID and can add dataset-specific helpers.
2728
"""
2829

@@ -60,7 +61,7 @@ async def get_metadata(self) -> DatasetMetadata:
6061
self._metadata = DatasetMetadata.from_dict(data)
6162
return self._metadata
6263

63-
async def filter(
64+
async def __call__(
6465
self,
6566
filter: Dict[str, Any],
6667
records_limit: Optional[int] = None,
@@ -106,7 +107,7 @@ async def get_status(self, snapshot_id: str) -> SnapshotStatus:
106107
Check snapshot status.
107108
108109
Args:
109-
snapshot_id: Snapshot ID from filter()
110+
snapshot_id: Snapshot ID from calling the dataset
110111
111112
Returns:
112113
SnapshotStatus with status field: "scheduled", "building", "ready", or "failed"
@@ -130,7 +131,7 @@ async def download(
130131
Polls until snapshot is ready, then downloads and returns data.
131132
132133
Args:
133-
snapshot_id: Snapshot ID from filter()
134+
snapshot_id: Snapshot ID from calling the dataset
134135
format: Response format (json, jsonl, csv)
135136
timeout: Max seconds to wait for snapshot to be ready
136137
poll_interval: Seconds between status checks

0 commit comments

Comments
 (0)