-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathdataset_collection.py
More file actions
131 lines (105 loc) · 4.47 KB
/
dataset_collection.py
File metadata and controls
131 lines (105 loc) · 4.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from __future__ import annotations
from typing import Any
from apify_client._docs import docs_group
from apify_client._models import Dataset, DatasetResponse, ListOfDatasets, ListOfDatasetsResponse
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
from apify_client._utils import filter_none_values
@docs_group('Resource clients')
class DatasetCollectionClient(ResourceClient):
"""Sub-client for the dataset collection.
Provides methods to manage the dataset collection, e.g. list or create datasets. Obtain an instance via an
appropriate method on the `ApifyClient` class.
"""
def __init__(
self,
*,
resource_path: str = 'datasets',
**kwargs: Any,
) -> None:
super().__init__(
resource_path=resource_path,
**kwargs,
)
def list(
self,
*,
unnamed: bool | None = None,
limit: int | None = None,
offset: int | None = None,
desc: bool | None = None,
) -> ListOfDatasets:
"""List the available datasets.
https://docs.apify.com/api/v2#/reference/datasets/dataset-collection/get-list-of-datasets
Args:
unnamed: Whether to include unnamed datasets in the list.
limit: How many datasets to retrieve.
offset: What dataset to include as first when retrieving the list.
desc: Whether to sort the datasets in descending order based on their modification date.
Returns:
The list of available datasets matching the specified filters.
"""
result = self._list(unnamed=unnamed, limit=limit, offset=offset, desc=desc)
return ListOfDatasetsResponse.model_validate(result).data
def get_or_create(self, *, name: str | None = None, schema: dict | None = None) -> Dataset:
"""Retrieve a named dataset, or create a new one when it doesn't exist.
https://docs.apify.com/api/v2#/reference/datasets/dataset-collection/create-dataset
Args:
name: The name of the dataset to retrieve or create.
schema: The schema of the dataset.
Returns:
The retrieved or newly-created dataset.
"""
result = self._get_or_create(name=name, resource_fields=filter_none_values({'schema': schema}))
return DatasetResponse.model_validate(result).data
@docs_group('Resource clients')
class DatasetCollectionClientAsync(ResourceClientAsync):
"""Sub-client for the dataset collection.
Provides methods to manage the dataset collection, e.g. list or create datasets. Obtain an instance via an
appropriate method on the `ApifyClientAsync` class.
"""
def __init__(
self,
*,
resource_path: str = 'datasets',
**kwargs: Any,
) -> None:
super().__init__(
resource_path=resource_path,
**kwargs,
)
async def list(
self,
*,
unnamed: bool | None = None,
limit: int | None = None,
offset: int | None = None,
desc: bool | None = None,
) -> ListOfDatasets:
"""List the available datasets.
https://docs.apify.com/api/v2#/reference/datasets/dataset-collection/get-list-of-datasets
Args:
unnamed: Whether to include unnamed datasets in the list.
limit: How many datasets to retrieve.
offset: What dataset to include as first when retrieving the list.
desc: Whether to sort the datasets in descending order based on their modification date.
Returns:
The list of available datasets matching the specified filters.
"""
result = await self._list(unnamed=unnamed, limit=limit, offset=offset, desc=desc)
return ListOfDatasetsResponse.model_validate(result).data
async def get_or_create(
self,
*,
name: str | None = None,
schema: dict | None = None,
) -> Dataset:
"""Retrieve a named dataset, or create a new one when it doesn't exist.
https://docs.apify.com/api/v2#/reference/datasets/dataset-collection/create-dataset
Args:
name: The name of the dataset to retrieve or create.
schema: The schema of the dataset.
Returns:
The retrieved or newly-created dataset.
"""
result = await self._get_or_create(name=name, resource_fields=filter_none_values({'schema': schema}))
return DatasetResponse.model_validate(result).data