diff --git a/apify-api/openapi/components/objects/datasets/dataset-items.yaml b/apify-api/openapi/components/objects/datasets/dataset-items.yaml new file mode 100644 index 0000000000..6f4fdda223 --- /dev/null +++ b/apify-api/openapi/components/objects/datasets/dataset-items.yaml @@ -0,0 +1,453 @@ +commonErrors: &commonErrors + "400": + $ref: ../../responses/BadRequest.yaml + "401": + $ref: ../../responses/Unauthorized.yaml + "403": + $ref: ../../responses/Forbidden.yaml + "404": + $ref: ../../responses/NotFound.yaml + "405": + $ref: ../../responses/MethodNotAllowed.yaml + "429": + $ref: ../../responses/TooManyRequests.yaml + +sharedGet: &sharedGet + responses: + <<: *commonErrors + "200": + description: "" + headers: + $ref: ../../headers/ApifyPaginationHeaders.yaml + content: + application/json: + schema: + type: array + items: + type: object + example: [foo: bar, foo2: bar2] + application/jsonl: + schema: + type: string + example: '{"foo":"bar"}\n{"foo2":"bar2"}\n' + text/csv: + schema: + type: string + example: 'foo,bar\nfoo2,bar2\n' + text/html: + schema: + type: string + example:
foobar
foobar
foo2bar2
+ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet: + schema: + type: string + application/rss+xml: + schema: + type: string + example: barbar2 + application/xml: + schema: + type: string + example: barbar2 + deprecated: false + +getById: + <<: *sharedGet + tags: + - Storage/Datasets + summary: Get dataset items + description: | + Returns data stored in the dataset in a desired format. + + ### Response format + + The format of the response depends on format query parameter. + + The format parameter can have one of the following values: + json, jsonl, xml, html, + csv, xlsx and rss. + + The following table describes how each format is treated. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FormatItems
jsonThe response is a JSON, JSONL or XML array of raw item objects.
jsonl
xml
htmlThe response is a HTML, CSV or XLSX table, where columns correspond to the + properties of the item and rows correspond to each dataset item.
csv
xlsx
rssThe response is a RSS file. Each item is displayed as child elements of one + <item>.
+ + Note that CSV, XLSX and HTML tables are limited to 2000 columns and the column names cannot be longer than 200 characters. + JSON, XML and RSS formats do not have such restrictions. + + ### Hidden fields + + The top-level fields starting with the `#` character are considered hidden. + These are useful to store debugging information and can be omitted from the output by providing the `skipHidden=1` or `clean=1` query parameters. + For example, if you store the following object to the dataset: + + ``` + { + productName: "iPhone Xs", + description: "Welcome to the big screens." + #debug: { + url: "https://www.apple.com/lae/iphone-xs/", + crawledAt: "2019-01-21T16:06:03.683Z" + } + } + ``` + + The `#debug` field will be considered as hidden and can be omitted from the + results. This is useful to + provide nice cleaned data to end users, while keeping debugging info + available if needed. The Dataset object + returned by the API contains the number of such clean items in the`dataset.cleanItemCount` property. + + ### XML format extension + + When exporting results to XML or RSS formats, the names of object properties become XML tags and the corresponding values become tag's children. For example, the following JavaScript object: + + ``` + { + name: "Paul Newman", + address: [ + { type: "home", street: "21st", city: "Chicago" }, + { type: "office", street: null, city: null } + ] + } + ``` + + will be transformed to the following XML snippet: + + ``` + Paul Newman +
+ home + 21st + Chicago +
+
+ office + + +
+ ``` + + If the JavaScript object contains a property named `@` then its sub-properties are exported as attributes of the parent XML + element. + If the parent XML element does not have any child elements then its value is taken from a JavaScript object property named `#`. + + For example, the following JavaScript object: + + ``` + { + "address": [{ + "@": { + "type": "home" + }, + "street": "21st", + "city": "Chicago" + }, + { + "@": { + "type": "office" + }, + "#": 'unknown' + }] + } + ``` + + will be transformed to the following XML snippet: + + ``` +
+ 21st + Chicago +
+
unknown
+ ``` + + This feature is also useful to customize your RSS feeds generated for various websites. + + By default the whole result is wrapped in a `` element and each page object is wrapped in a `` element. + You can change this using xmlRoot and xmlRow url parameters. + + ### Pagination + + The generated response supports [pagination](#/introduction/pagination). + The pagination is always performed with the granularity of a single item, regardless whether unwind parameter was provided. + By default, the **Items** in the response are sorted by the time they were stored to the database, therefore you can use pagination to incrementally fetch the items as they are being added. + No limit exists to how many items can be returned in one response. + + If you specify `desc=1` query parameter, the results are returned in the reverse order than they were stored (i.e. from newest to oldest items). + Note that only the order of **Items** is reversed, but not the order of the `unwind` array elements. + operationId: dataset_items_get + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + - $ref: "../../parameters/datasetItemsParameters.yaml#/format" + - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" + - $ref: "../../parameters/paginationParameters.yaml#/offset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/limit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/fields" + - $ref: "../../parameters/datasetItemsParameters.yaml#/omit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/unwind" + - $ref: "../../parameters/datasetItemsParameters.yaml#/flatten" + - $ref: "../../parameters/datasetItemsParameters.yaml#/descDataset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/attachment" + - $ref: "../../parameters/datasetItemsParameters.yaml#/delimiter" + - $ref: "../../parameters/datasetItemsParameters.yaml#/bom" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRoot" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHeaderRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHidden" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipEmpty" + - $ref: "../../parameters/datasetItemsParameters.yaml#/simplified" + - $ref: "../../parameters/datasetItemsParameters.yaml#/view" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" + - $ref: "../../parameters/storageParameters.yaml#/signature" + x-legacy-doc-urls: + - https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items + - https://docs.apify.com/api/v2#/reference/datasets/get-items + - https://docs.apify.com/api/v2#tag/DatasetsItem-collection/operation/dataset_items_get + x-js-parent: DatasetClient + x-js-name: listItems + x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#listItems + x-py-parent: DatasetClientAsync + x-py-name: stream_items + x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#stream_items + +getDefault: + <<: *sharedGet + tags: + - Default storages + summary: Get default dataset items + description: | + Returns data stored in the default dataset of the Actor run in the desired format. + + This endpoint is a shortcut that resolves the run's `defaultDatasetId` and proxies to the + [Get dataset items](/api/v2/dataset-items-get) endpoint. + operationId: actorRun_dataset_items_get + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + - $ref: "../../parameters/datasetItemsParameters.yaml#/format" + - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" + - $ref: "../../parameters/paginationParameters.yaml#/offset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/limit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/fields" + - $ref: "../../parameters/datasetItemsParameters.yaml#/omit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/unwind" + - $ref: "../../parameters/datasetItemsParameters.yaml#/flatten" + - $ref: "../../parameters/datasetItemsParameters.yaml#/descDataset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/attachment" + - $ref: "../../parameters/datasetItemsParameters.yaml#/delimiter" + - $ref: "../../parameters/datasetItemsParameters.yaml#/bom" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRoot" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHeaderRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHidden" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipEmpty" + - $ref: "../../parameters/datasetItemsParameters.yaml#/simplified" + - $ref: "../../parameters/datasetItemsParameters.yaml#/view" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" + - $ref: "../../parameters/storageParameters.yaml#/signature" + +getLastRun: + <<: *sharedGet + tags: + - Last Actor run + summary: Get last run's dataset items + description: | + Returns data stored in the default dataset of the last Actor run in the desired format. + + This endpoint is a shortcut that resolves the last run's `defaultDatasetId` and proxies to the + [Get dataset items](/api/v2/dataset-items-get) endpoint. + operationId: act_runs_last_dataset_items_get + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" + - $ref: "../../parameters/datasetItemsParameters.yaml#/format" + - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" + - $ref: "../../parameters/paginationParameters.yaml#/offset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/limit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/fields" + - $ref: "../../parameters/datasetItemsParameters.yaml#/omit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/unwind" + - $ref: "../../parameters/datasetItemsParameters.yaml#/flatten" + - $ref: "../../parameters/datasetItemsParameters.yaml#/descDataset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/attachment" + - $ref: "../../parameters/datasetItemsParameters.yaml#/delimiter" + - $ref: "../../parameters/datasetItemsParameters.yaml#/bom" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRoot" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHeaderRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHidden" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipEmpty" + - $ref: "../../parameters/datasetItemsParameters.yaml#/simplified" + - $ref: "../../parameters/datasetItemsParameters.yaml#/view" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" + - $ref: "../../parameters/storageParameters.yaml#/signature" + +headById: + responses: + "200": + description: "" + headers: + $ref: ../../headers/ApifyPaginationHeaders.yaml + content: {} + "400": + $ref: ../../responses/BadRequest.yaml + deprecated: false + tags: + - Storage/Datasets + summary: Get dataset items headers + description: | + Returns only the HTTP headers for the dataset items endpoint, without the response body. + This is useful to check pagination metadata or verify access without downloading the full dataset. + operationId: dataset_items_head + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + - $ref: "../../parameters/datasetItemsParameters.yaml#/format" + - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" + - $ref: "../../parameters/paginationParameters.yaml#/offset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/limit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/fields" + - $ref: "../../parameters/datasetItemsParameters.yaml#/omit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/unwind" + - $ref: "../../parameters/datasetItemsParameters.yaml#/flatten" + - $ref: "../../parameters/datasetItemsParameters.yaml#/descDataset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/attachment" + - $ref: "../../parameters/datasetItemsParameters.yaml#/delimiter" + - $ref: "../../parameters/datasetItemsParameters.yaml#/bom" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRoot" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHeaderRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHidden" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipEmpty" + - $ref: "../../parameters/datasetItemsParameters.yaml#/simplified" + - $ref: "../../parameters/datasetItemsParameters.yaml#/view" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" + - $ref: "../../parameters/storageParameters.yaml#/signature" + +sharedPost: &sharedPost + requestBody: + description: "" + content: + application/json: + schema: + oneOf: + - $ref: ../../schemas/datasets/PutItemsRequest.yaml + - type: array + items: + $ref: ../../schemas/datasets/PutItemsRequest.yaml + description: "" + required: true + responses: + "201": + description: "" + headers: + Location: + content: + text/plain: + schema: + type: string + example: https://api.apify.com/v2/datasets/WkzbQMuFYuamGv3YF/items + content: + application/json: + schema: + type: object + example: {} + "400": + description: "" + headers: {} + content: + application/json: + schema: + $ref: ../../schemas/datasets/PutItemResponseError.yaml + "403": + $ref: ../../responses/Forbidden.yaml + "404": + $ref: ../../responses/NotFound.yaml + deprecated: false + +postById: + <<: *sharedPost + tags: + - Storage/Datasets + summary: Store items + description: | + Appends an item or an array of items to the end of the dataset. + The POST payload is a JSON object or a JSON array of objects to save into the dataset. + + If the data you attempt to store in the dataset is invalid (meaning any of the items received by the API fails the validation), the whole request is discarded and the API will return a response with status code 400. + For more information about dataset schema validation, see [Dataset schema](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation). + + **IMPORTANT:** The limit of request payload size for the dataset is 5 MB. If the array exceeds the size, you'll need to split it into a number of smaller arrays. + operationId: dataset_items_post + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + x-legacy-doc-urls: + - https://docs.apify.com/api/v2#/reference/datasets/item-collection/put-items + - https://docs.apify.com/api/v2#/reference/datasets/put-items + - https://docs.apify.com/api/v2#tag/DatasetsItem-collection/operation/dataset_items_post + x-js-parent: DatasetClient + x-js-name: pushItems + x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#pushItems + x-py-parent: DatasetClientAsync + x-py-name: push_items + x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#push_items + +postDefault: + <<: *sharedPost + tags: + - Default storages + summary: Store items + description: | + Appends an item or an array of items to the end of the Actor run's default dataset. + + This endpoint is a shortcut that resolves the run's `defaultDatasetId` and proxies to the + [Store items](/api/v2/dataset-items-post) endpoint. + + operationId: actorRun_dataset_items_post + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + +postLastRun: + <<: *sharedPost + tags: + - Last Actor run + summary: Store items in last run's dataset + description: | + Appends an item or an array of items to the end of the last Actor run's default dataset. + + This endpoint is a shortcut that resolves the last run's `defaultDatasetId` and proxies to the + [Store items](/api/v2/dataset-items-post) endpoint. + + operationId: act_runs_last_dataset_items_post + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" diff --git a/apify-api/openapi/components/objects/datasets/dataset-statistics.yaml b/apify-api/openapi/components/objects/datasets/dataset-statistics.yaml new file mode 100644 index 0000000000..e808ae2c35 --- /dev/null +++ b/apify-api/openapi/components/objects/datasets/dataset-statistics.yaml @@ -0,0 +1,63 @@ +sharedGet: &sharedGet + responses: + "200": + description: "" + content: + application/json: + schema: + $ref: ../../schemas/datasets/DatasetStatisticsResponse.yaml + "400": + $ref: ../../responses/BadRequest.yaml + "401": + $ref: ../../responses/Unauthorized.yaml + "403": + $ref: ../../responses/Forbidden.yaml + "404": + $ref: ../../responses/NotFound.yaml + "405": + $ref: ../../responses/MethodNotAllowed.yaml + "429": + $ref: ../../responses/TooManyRequests.yaml + +getById: + <<: *sharedGet + tags: + - Storage/Datasets + summary: Get dataset statistics + description: | + Returns statistics for given dataset. + + Provides only [field statistics](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation#dataset-field-statistics). + + operationId: dataset_statistics_get + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + +getDefault: + <<: *sharedGet + tags: + - Default storages + summary: Get default dataset statistics + description: | + Returns statistics for the Actor run's default dataset. + + This endpoint is a shortcut that resolves the run's `defaultDatasetId` and proxies to the + [Get dataset statistics](/api/v2/dataset-statistics-get) endpoint. + operationId: actorRun_dataset_statistics_get + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + +getLastRun: + <<: *sharedGet + tags: + - Last Actor run + summary: Get last run's dataset statistics + description: | + Returns statistics for the last Actor run's default dataset. + + This endpoint is a shortcut that resolves the last run's `defaultDatasetId` and proxies to the + [Get dataset statistics](/api/v2/dataset-statistics-get) endpoint. + operationId: act_runs_last_dataset_statistics_get + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" diff --git a/apify-api/openapi/components/objects/datasets/dataset.yaml b/apify-api/openapi/components/objects/datasets/dataset.yaml new file mode 100644 index 0000000000..a678ddd46c --- /dev/null +++ b/apify-api/openapi/components/objects/datasets/dataset.yaml @@ -0,0 +1,225 @@ +commonErrors: &commonErrors + "400": + $ref: ../../responses/BadRequest.yaml + "401": + $ref: ../../responses/Unauthorized.yaml + "403": + $ref: ../../responses/Forbidden.yaml + "404": + $ref: ../../responses/NotFound.yaml + "405": + $ref: ../../responses/MethodNotAllowed.yaml + "429": + $ref: ../../responses/TooManyRequests.yaml + +sharedGet: &sharedGet + responses: + <<: *commonErrors + "200": + description: "" + headers: {} + content: + application/json: + schema: + $ref: ../../schemas/datasets/DatasetResponse.yaml + deprecated: false + +getById: + <<: *sharedGet + tags: + - Storage/Datasets + summary: Get dataset + description: | + Returns dataset object for given dataset ID. + + This does not return dataset items, only information about the storage itself. + To retrieve dataset items, use the [List dataset items](/api/v2/dataset-items-get) endpoint. + + :::note + + Keep in mind that attributes `itemCount` and `cleanItemCount` are not propagated right away after data are pushed into a dataset. + + ::: + + There is a short period (up to 5 seconds) during which these counters may not match with exact counts in dataset items. + operationId: dataset_get + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + - name: token + in: query + description: | + API authentication token. It is required only when using the `username~dataset-name` format for `datasetId`. + style: form + explode: true + schema: + type: string + example: soSkq9ekdmfOslopH + x-legacy-doc-urls: + - https://docs.apify.com/api/v2#/reference/datasets/dataset/get-dataset + - https://docs.apify.com/api/v2#/reference/datasets/get-dataset + - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_get + x-js-parent: DatasetClient + x-js-name: get + x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#get + x-py-parent: DatasetClientAsync + x-py-name: get + x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#get + +getDefault: + <<: *sharedGet + tags: + - Default storages + summary: Get default dataset + description: | + Returns the default dataset associated with an Actor run. + + This endpoint is a shortcut for getting the run's `defaultDatasetId` and then using the + [Get dataset](/api/v2/dataset-get) endpoint. + + operationId: actorRun_dataset_get + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + +getLastRun: + <<: *sharedGet + tags: + - Last Actor run + summary: Get last run's default dataset + description: | + Returns the default dataset associated with the last Actor run. + + This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the + [Get dataset](/api/v2/dataset-get) endpoint. + operationId: act_runs_last_dataset_get + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" + +sharedPut: &sharedPut + requestBody: + description: "" + content: + application/json: + schema: + $ref: ../../schemas/datasets/UpdateDatasetRequest.yaml + required: true + responses: + <<: *commonErrors + "200": + description: "" + headers: {} + content: + application/json: + schema: + $ref: ../../schemas/datasets/DatasetResponse.yaml + "413": + $ref: ../../responses/PayloadTooLarge.yaml + "415": + $ref: ../../responses/UnsupportedMediaType.yaml + deprecated: false + +putById: + <<: *sharedPut + tags: + - Storage/Datasets + summary: Update dataset + description: | + Updates a dataset's name and general resource access level using a value specified by a JSON object passed in the PUT payload. + The response is the updated dataset object, as returned by the [Get dataset](/api/v2/dataset-get) API endpoint. + operationId: + dataset_put + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + x-legacy-doc-urls: + - https://docs.apify.com/api/v2#/reference/datasets/dataset/update-dataset + - https://docs.apify.com/api/v2#/reference/datasets/update-dataset + - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_put + x-js-parent: DatasetClient + x-js-name: update + x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#update + x-py-parent: DatasetClientAsync + x-py-name: update + x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#update + +putDefault: + <<: *sharedPut + tags: + - Default storages + summary: Update default dataset + description: | + Updates the default dataset associated with an Actor run. + + This endpoint is a shortcut for getting the run's `defaultDatasetId` and then using the + [Put dataset](/api/v2/dataset-put) endpoint. + + operationId: actorRun_dataset_put + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + +putLastRun: + <<: *sharedPut + tags: + - Last Actor run + summary: Update last run's default dataset + description: | + Updates the default dataset associated with the last Actor run. + + This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the + [Update dataset](/api/v2/dataset-put) endpoint. + operationId: act_runs_last_dataset_put + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" + +sharedDelete: &sharedDelete + responses: + <<: *commonErrors + "204": + $ref: ../../responses/NoContent.yaml + deprecated: false + +deleteById: + <<: *sharedDelete + tags: + - Storage/Datasets + summary: Delete dataset + description: Deletes a specific dataset. + operationId: dataset_delete + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + x-legacy-doc-urls: + - https://docs.apify.com/api/v2#/reference/datasets/dataset/delete-dataset + - https://docs.apify.com/api/v2#/reference/datasets/delete-dataset + - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_delete + x-js-parent: DatasetClient + x-js-name: delete + x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#delete + +deleteDefault: + <<: *sharedDelete + tags: + - Default storages + summary: Delete default dataset + description: | + Deletes default dataset associated with an Actor run. + + This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the + [ Delete dataset ](/api/v2/dataset-delete) endpoint. + operationId: actorRun_dataset_delete + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + +deleteLastRun: + <<: *sharedDelete + tags: + - Last Actor run + summary: Delete last run's default dataset + description: | + Deletes the default dataset associated with the last Actor run. + + This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the + [Delete dataset](/api/v2/dataset-delete) endpoint. + operationId: act_runs_last_dataset_delete + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" diff --git a/apify-api/openapi/components/tags.yaml b/apify-api/openapi/components/tags.yaml index e79a87a270..694a433e84 100644 --- a/apify-api/openapi/components/tags.yaml +++ b/apify-api/openapi/components/tags.yaml @@ -406,20 +406,6 @@ ::: x-trait: true -- name: Tools - x-displayName: Tools - Introduction - description: | - The API endpoints described in this section provide utility tools for encoding, - signing, and verifying data, as well as inspecting HTTP request details. - - - **Browser info** (`/v2/browser-info`) - Returns details about the incoming HTTP request, - including the client IP address, country code, and headers. Accepts any HTTP method - (GET, POST, PUT, DELETE) so you can use it to test proxy behavior and verify that - client IP addresses are anonymized correctly. - - **Encode and sign** (`/v2/tools/encode-and-sign`) - Encodes and signs a JSON object, - tying it to the authenticated user's identity. - - **Decode and verify** (`/v2/tools/decode-and-verify`) - Decodes and verifies a value - previously created by the encode-and-sign endpoint. - name: Users x-displayName: Users - Introduction x-legacy-doc-urls: @@ -436,3 +422,16 @@ description: The API endpoints described in this section return information about user accounts. x-trait: true +- name: Tools + x-displayName: Tools - Introduction + description: | + The API endpoints described in this section provide utility tools for encoding, + signing, and verifying data, as well as inspecting HTTP request details. +- name: Default storages + x-displayName: Default storages - Introduction + description: | + The API endpoints described in this section are convenience endpoints that provide access to Actor run's default storages (dataset, key-value store, and request queue) without the need to resolve the storage ID first. +- name: Last Actor run + x-displayName: Last Actor run - Introduction + description: | + The API endpoints described in this section are convenience endpoints that provide access to Actor's last run and all it's sub resources without the need to resolve the run ID first. diff --git a/apify-api/openapi/components/x-tag-groups.yaml b/apify-api/openapi/components/x-tag-groups.yaml index 5c1cf2fdf2..e454dbb44b 100644 --- a/apify-api/openapi/components/x-tag-groups.yaml +++ b/apify-api/openapi/components/x-tag-groups.yaml @@ -41,3 +41,7 @@ - name: Tools tags: - Tools +- name: Convenience endpoints + tags: + - Default storages + - Last Actor run diff --git a/apify-api/openapi/openapi.yaml b/apify-api/openapi/openapi.yaml index af3cc58791..8aac76d3ec 100644 --- a/apify-api/openapi/openapi.yaml +++ b/apify-api/openapi/openapi.yaml @@ -514,6 +514,12 @@ paths: $ref: "paths/actors/acts@{actorId}@runs@{runId}@resurrect.yaml" "/v2/acts/{actorId}/runs/last": $ref: "paths/actors/acts@{actorId}@runs@last.yaml" + "/v2/acts/{actorId}/runs/last/dataset": + $ref: "paths/actors/acts@{actorId}@runs@last@dataset.yaml" + "/v2/acts/{actorId}/runs/last/dataset/items": + $ref: "paths/actors/acts@{actorId}@runs@last@dataset@items.yaml" + "/v2/acts/{actorId}/runs/last/dataset/statistics": + $ref: "paths/actors/acts@{actorId}@runs@last@dataset@statistics.yaml" "/v2/acts/{actorId}/runs/{runId}": $ref: "paths/actors/acts@{actorId}@runs@{runId}.yaml" "/v2/acts/{actorId}/runs/{runId}/abort": @@ -550,6 +556,12 @@ paths: $ref: paths/actor-runs/actor-runs@{runId}@resurrect.yaml "/v2/actor-runs/{runId}/charge": $ref: paths/actor-runs/actor-runs@{runId}@charge.yaml + "/v2/actor-runs/{runId}/dataset": + $ref: paths/actor-runs/actor-runs@{runId}@dataset.yaml + "/v2/actor-runs/{runId}/dataset/items": + $ref: "paths/actor-runs/actor-runs@{runId}@dataset@items.yaml" + "/v2/actor-runs/{runId}/dataset/statistics": + $ref: "paths/actor-runs/actor-runs@{runId}@dataset@statistics.yaml" /v2/actor-builds: $ref: paths/actor-builds/actor-builds.yaml "/v2/actor-builds/{buildId}": diff --git a/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset.yaml b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset.yaml new file mode 100644 index 0000000000..ee2b1539be --- /dev/null +++ b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset.yaml @@ -0,0 +1,6 @@ +get: + $ref: "../../components/objects/datasets/dataset.yaml#/getDefault" +put: + $ref: "../../components/objects/datasets/dataset.yaml#/putDefault" +delete: + $ref: "../../components/objects/datasets/dataset.yaml#/deleteDefault" diff --git a/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@items.yaml b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@items.yaml new file mode 100644 index 0000000000..bee4fbf290 --- /dev/null +++ b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@items.yaml @@ -0,0 +1,4 @@ +get: + $ref: "../../components/objects/datasets/dataset-items.yaml#/getDefault" +post: + $ref: "../../components/objects/datasets/dataset-items.yaml#/postDefault" diff --git a/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@statistics.yaml b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@statistics.yaml new file mode 100644 index 0000000000..d6c0cc556d --- /dev/null +++ b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@statistics.yaml @@ -0,0 +1,2 @@ +get: + $ref: "../../components/objects/datasets/dataset-statistics.yaml#/getDefault" diff --git a/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset.yaml b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset.yaml new file mode 100644 index 0000000000..7cb247f576 --- /dev/null +++ b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset.yaml @@ -0,0 +1,6 @@ +get: + $ref: "../../components/objects/datasets/dataset.yaml#/getLastRun" +put: + $ref: "../../components/objects/datasets/dataset.yaml#/putLastRun" +delete: + $ref: "../../components/objects/datasets/dataset.yaml#/deleteLastRun" diff --git a/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@items.yaml b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@items.yaml new file mode 100644 index 0000000000..b3731bb35a --- /dev/null +++ b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@items.yaml @@ -0,0 +1,4 @@ +get: + $ref: "../../components/objects/datasets/dataset-items.yaml#/getLastRun" +post: + $ref: "../../components/objects/datasets/dataset-items.yaml#/postLastRun" diff --git a/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@statistics.yaml b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@statistics.yaml new file mode 100644 index 0000000000..c585c7cd25 --- /dev/null +++ b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@statistics.yaml @@ -0,0 +1,2 @@ +get: + $ref: "../../components/objects/datasets/dataset-statistics.yaml#/getLastRun" diff --git a/apify-api/openapi/paths/datasets/datasets@{datasetId}.yaml b/apify-api/openapi/paths/datasets/datasets@{datasetId}.yaml index d5dabe15d7..522ca28746 100644 --- a/apify-api/openapi/paths/datasets/datasets@{datasetId}.yaml +++ b/apify-api/openapi/paths/datasets/datasets@{datasetId}.yaml @@ -1,143 +1,6 @@ get: - tags: - - Storage/Datasets - summary: Get dataset - description: | - Returns dataset object for given dataset ID. - - This does not return dataset items, only information about the storage itself. - To retrieve dataset items, use the [List dataset items](/api/v2/dataset-items-get) endpoint. - - :::note - - Keep in mind that attributes `itemCount` and `cleanItemCount` are not propagated right away after data are pushed into a dataset. - - ::: - - There is a short period (up to 5 seconds) during which these counters may not match with exact counts in dataset items. - operationId: dataset_get - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - - name: token - in: query - description: | - API authentication token. It is required only when using the `username~dataset-name` format for `datasetId`. - style: form - explode: true - schema: - type: string - example: soSkq9ekdmfOslopH - responses: - "200": - description: "" - headers: {} - content: - application/json: - schema: - $ref: ../../components/schemas/datasets/DatasetResponse.yaml - "400": - $ref: ../../components/responses/BadRequest.yaml - "401": - $ref: ../../components/responses/Unauthorized.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - "405": - $ref: ../../components/responses/MethodNotAllowed.yaml - "429": - $ref: ../../components/responses/TooManyRequests.yaml - deprecated: false - x-legacy-doc-urls: - - https://docs.apify.com/api/v2#/reference/datasets/dataset/get-dataset - - https://docs.apify.com/api/v2#/reference/datasets/get-dataset - - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_get - x-js-parent: DatasetClient - x-js-name: get - x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#get - x-py-parent: DatasetClientAsync - x-py-name: get - x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#get + $ref: "../../components/objects/datasets/dataset.yaml#/getById" put: - tags: - - Storage/Datasets - summary: Update dataset - description: | - Updates a dataset's name and general resource access level using a value specified by a JSON object passed in the PUT payload. - The response is the updated dataset object, as returned by the [Get dataset](#/reference/datasets/dataset-collection/get-dataset) API endpoint. - operationId: dataset_put - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - requestBody: - description: "" - content: - application/json: - schema: - $ref: ../../components/schemas/datasets/UpdateDatasetRequest.yaml - required: true - responses: - "200": - description: "" - headers: {} - content: - application/json: - schema: - $ref: ../../components/schemas/datasets/DatasetResponse.yaml - "400": - $ref: ../../components/responses/BadRequest.yaml - "401": - $ref: ../../components/responses/Unauthorized.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - "405": - $ref: ../../components/responses/MethodNotAllowed.yaml - "413": - $ref: ../../components/responses/PayloadTooLarge.yaml - "415": - $ref: ../../components/responses/UnsupportedMediaType.yaml - "429": - $ref: ../../components/responses/TooManyRequests.yaml - deprecated: false - x-legacy-doc-urls: - - https://docs.apify.com/api/v2#/reference/datasets/dataset/update-dataset - - https://docs.apify.com/api/v2#/reference/datasets/update-dataset - - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_put - x-js-parent: DatasetClient - x-js-name: update - x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#update - x-py-parent: DatasetClientAsync - x-py-name: update - x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#update + $ref: "../../components/objects/datasets/dataset.yaml#/putById" delete: - tags: - - Storage/Datasets - summary: Delete dataset - description: Deletes a specific dataset. - operationId: dataset_delete - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - responses: - "204": - $ref: ../../components/responses/NoContent.yaml - "400": - $ref: ../../components/responses/BadRequest.yaml - "401": - $ref: ../../components/responses/Unauthorized.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - "405": - $ref: ../../components/responses/MethodNotAllowed.yaml - "429": - $ref: ../../components/responses/TooManyRequests.yaml - deprecated: false - x-legacy-doc-urls: - - https://docs.apify.com/api/v2#/reference/datasets/dataset/delete-dataset - - https://docs.apify.com/api/v2#/reference/datasets/delete-dataset - - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_delete - x-js-parent: DatasetClient - x-js-name: delete - x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#delete + $ref: "../../components/objects/datasets/dataset.yaml#/deleteById" diff --git a/apify-api/openapi/paths/datasets/datasets@{datasetId}@items.yaml b/apify-api/openapi/paths/datasets/datasets@{datasetId}@items.yaml index 7eae2a03ae..204a1a6f65 100644 --- a/apify-api/openapi/paths/datasets/datasets@{datasetId}@items.yaml +++ b/apify-api/openapi/paths/datasets/datasets@{datasetId}@items.yaml @@ -1,340 +1,6 @@ get: - tags: - - Storage/Datasets - summary: Get dataset items - description: | - Returns data stored in the dataset in a desired format. - - ### Response format - - The format of the response depends on format query parameter. - - The format parameter can have one of the following values: - json, jsonl, xml, html, - csv, xlsx and rss. - - The following table describes how each format is treated. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FormatItems
jsonThe response is a JSON, JSONL or XML array of raw item objects.
jsonl
xml
htmlThe response is a HTML, CSV or XLSX table, where columns correspond to the - properties of the item and rows correspond to each dataset item.
csv
xlsx
rssThe response is a RSS file. Each item is displayed as child elements of one - <item>.
- - Note that CSV, XLSX and HTML tables are limited to 2000 columns and the column names cannot be longer than 200 characters. - JSON, XML and RSS formats do not have such restrictions. - - ### Hidden fields - - The top-level fields starting with the `#` character are considered hidden. - These are useful to store debugging information and can be omitted from the output by providing the `skipHidden=1` or `clean=1` query parameters. - For example, if you store the following object to the dataset: - - ``` - { - productName: "iPhone Xs", - description: "Welcome to the big screens." - #debug: { - url: "https://www.apple.com/lae/iphone-xs/", - crawledAt: "2019-01-21T16:06:03.683Z" - } - } - ``` - - The `#debug` field will be considered as hidden and can be omitted from the - results. This is useful to - provide nice cleaned data to end users, while keeping debugging info - available if needed. The Dataset object - returned by the API contains the number of such clean items in the`dataset.cleanItemCount` property. - - ### XML format extension - - When exporting results to XML or RSS formats, the names of object properties become XML tags and the corresponding values become tag's children. For example, the following JavaScript object: - - ``` - { - name: "Paul Newman", - address: [ - { type: "home", street: "21st", city: "Chicago" }, - { type: "office", street: null, city: null } - ] - } - ``` - - will be transformed to the following XML snippet: - - ``` - Paul Newman -
- home - 21st - Chicago -
-
- office - - -
- ``` - - If the JavaScript object contains a property named `@` then its sub-properties are exported as attributes of the parent XML - element. - If the parent XML element does not have any child elements then its value is taken from a JavaScript object property named `#`. - - For example, the following JavaScript object: - - ``` - { - "address": [{ - "@": { - "type": "home" - }, - "street": "21st", - "city": "Chicago" - }, - { - "@": { - "type": "office" - }, - "#": 'unknown' - }] - } - ``` - - will be transformed to the following XML snippet: - - ``` -
- 21st - Chicago -
-
unknown
- ``` - - This feature is also useful to customize your RSS feeds generated for various websites. - - By default the whole result is wrapped in a `` element and each page object is wrapped in a `` element. - You can change this using xmlRoot and xmlRow url parameters. - - ### Pagination - - The generated response supports [pagination](#/introduction/pagination). - The pagination is always performed with the granularity of a single item, regardless whether unwind parameter was provided. - By default, the **Items** in the response are sorted by the time they were stored to the database, therefore you can use pagination to incrementally fetch the items as they are being added. - No limit exists to how many items can be returned in one response. - - If you specify `desc=1` query parameter, the results are returned in the reverse order than they were stored (i.e. from newest to oldest items). - Note that only the order of **Items** is reversed, but not the order of the `unwind` array elements. - operationId: dataset_items_get - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/format" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/clean" - - $ref: "../../components/parameters/paginationParameters.yaml#/offset" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/limit" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/fields" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/omit" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/unwind" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/flatten" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/descDataset" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/attachment" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/delimiter" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/bom" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/xmlRoot" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/xmlRow" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipHeaderRow" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipHidden" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipEmpty" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/simplified" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/view" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipFailedPages" - - $ref: "../../components/parameters/storageParameters.yaml#/signature" - responses: - "200": - description: "" - headers: - $ref: ../../components/headers/ApifyPaginationHeaders.yaml - content: - application/json: - schema: - type: array - items: - type: object - example: [foo: bar, foo2: bar2] - application/jsonl: - schema: - type: string - example: '{"foo":"bar"}\n{"foo2":"bar2"}\n' - text/csv: - schema: - type: string - example: 'foo,bar\nfoo2,bar2\n' - text/html: - schema: - type: string - example:
foobar
foobar
foo2bar2
- application/vnd.openxmlformats-officedocument.spreadsheetml.sheet: - schema: - type: string - application/rss+xml: - schema: - type: string - example: barbar2 - application/xml: - schema: - type: string - example: barbar2 - "400": - $ref: ../../components/responses/BadRequest.yaml - "401": - $ref: ../../components/responses/Unauthorized.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - "405": - $ref: ../../components/responses/MethodNotAllowed.yaml - "429": - $ref: ../../components/responses/TooManyRequests.yaml - deprecated: false - x-legacy-doc-urls: - - https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items - - https://docs.apify.com/api/v2#/reference/datasets/get-items - - https://docs.apify.com/api/v2#tag/DatasetsItem-collection/operation/dataset_items_get - x-js-parent: DatasetClient - x-js-name: listItems - x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#listItems - x-py-parent: DatasetClientAsync - x-py-name: stream_items - x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#stream_items + $ref: "../../components/objects/datasets/dataset-items.yaml#/getById" head: - tags: - - Storage/Datasets - summary: Get dataset items headers - description: | - Returns only the HTTP headers for the dataset items endpoint, without the response body. - This is useful to check pagination metadata or verify access without downloading the full dataset. - operationId: dataset_items_head - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/format" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/clean" - - $ref: "../../components/parameters/paginationParameters.yaml#/offset" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/limit" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/fields" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/omit" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/unwind" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/flatten" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/descDataset" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/attachment" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/delimiter" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/bom" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/xmlRoot" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/xmlRow" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipHeaderRow" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipHidden" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipEmpty" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/simplified" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/view" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipFailedPages" - - $ref: "../../components/parameters/storageParameters.yaml#/signature" - responses: - "200": - description: "" - headers: - $ref: ../../components/headers/ApifyPaginationHeaders.yaml - content: {} - "400": - $ref: ../../components/responses/BadRequest.yaml - deprecated: false + $ref: "../../components/objects/datasets/dataset-items.yaml#/headById" post: - tags: - - Storage/Datasets - summary: Store items - description: | - Appends an item or an array of items to the end of the dataset. - The POST payload is a JSON object or a JSON array of objects to save into the dataset. - - If the data you attempt to store in the dataset is invalid (meaning any of the items received by the API fails the validation), the whole request is discarded and the API will return a response with status code 400. - For more information about dataset schema validation, see [Dataset schema](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation). - - **IMPORTANT:** The limit of request payload size for the dataset is 5 MB. If the array exceeds the size, you'll need to split it into a number of smaller arrays. - operationId: dataset_items_post - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - requestBody: - description: "" - content: - application/json: - schema: - oneOf: - - $ref: ../../components/schemas/datasets/PutItemsRequest.yaml - - type: array - items: - $ref: ../../components/schemas/datasets/PutItemsRequest.yaml - description: "" - required: true - responses: - "201": - description: "" - headers: - Location: - content: - text/plain: - schema: - type: string - example: https://api.apify.com/v2/datasets/WkzbQMuFYuamGv3YF/items - content: - application/json: - schema: - type: object - example: {} - "400": - description: "" - headers: {} - content: - application/json: - schema: - $ref: ../../components/schemas/datasets/PutItemResponseError.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - deprecated: false - x-legacy-doc-urls: - - https://docs.apify.com/api/v2#/reference/datasets/item-collection/put-items - - https://docs.apify.com/api/v2#/reference/datasets/put-items - - https://docs.apify.com/api/v2#tag/DatasetsItem-collection/operation/dataset_items_post - x-js-parent: DatasetClient - x-js-name: pushItems - x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#pushItems - x-py-parent: DatasetClientAsync - x-py-name: push_items - x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#push_items + $ref: "../../components/objects/datasets/dataset-items.yaml#/postById" diff --git a/apify-api/openapi/paths/datasets/datasets@{datasetId}@statistics.yaml b/apify-api/openapi/paths/datasets/datasets@{datasetId}@statistics.yaml index 6dbc8d0028..af6c3dd054 100644 --- a/apify-api/openapi/paths/datasets/datasets@{datasetId}@statistics.yaml +++ b/apify-api/openapi/paths/datasets/datasets@{datasetId}@statistics.yaml @@ -1,38 +1,2 @@ get: - tags: - - Storage/Datasets - summary: Get dataset statistics - description: | - Returns statistics for given dataset. - - Provides only [field statistics](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation#dataset-field-statistics). - - operationId: dataset_statistics_get - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - responses: - "200": - description: "" - content: - application/json: - schema: - $ref: ../../components/schemas/datasets/DatasetStatisticsResponse.yaml - # TODO: add clients methods - # x-js-parent: DatasetClient - # x-js-name: statistics - # x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#statistics - # x-py-parent: DatasetClientAsync - # x-py-name: statistics - # x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#statistics - "400": - $ref: ../../components/responses/BadRequest.yaml - "401": - $ref: ../../components/responses/Unauthorized.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - "405": - $ref: ../../components/responses/MethodNotAllowed.yaml - "429": - $ref: ../../components/responses/TooManyRequests.yaml + $ref: "../../components/objects/datasets/dataset-statistics.yaml#/getById"