From 2f4cf266f05e1bfde0aa7f2a58dba9f4f2741d8f Mon Sep 17 00:00:00 2001 From: Hector Castejon Diaz Date: Mon, 8 Jun 2026 14:28:01 +0000 Subject: [PATCH] =?UTF-8?q?[DRY-RUN]=20Sample=20sync=20from=20Universe=20v?= =?UTF-8?q?0.115.0=20=E2=80=94=20do=20not=20merge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sample OSS-side PR produced by walking through release.md step 6 (`genkit sync-sdk`). Captures everything the mirror writes from the Universe subtree on top of the most recent real OSS release. Reference: see Universe PR #2047219 (the companion dry-run release PR) and `deco/oss/docs/databricks-sdk-py/dryrun.md` in Universe for the walk-through, findings, and outstanding fixes. Notable issues this diff exposes: - mirror deletes OSS-only files (tagging.py, .codegen/_openapi_sha, .claude/, .vscode/) — they're in skip_from_source but not preserve_in_target. Do not merge — sample for review only. --- .claude/commands/write-pr-description.md | 77 - .codegen.json | 15 +- .codegen/_openapi_sha | 1 - .gitattributes | 3 +- .release_metadata.json | 2 +- .vscode/settings.json | 11 - CHANGELOG.md | 36 +- NEXT_CHANGELOG.md | 4 +- databricks/sdk/__init__.py | 18 +- databricks/sdk/errors/parser.py | 2 +- databricks/sdk/service/aisearch.py | 1818 +++++++++++++++++ databricks/sdk/service/apps.py | 13 +- databricks/sdk/service/bundledeployments.py | 1272 ++++++++++++ databricks/sdk/service/catalog.py | 99 +- databricks/sdk/service/disasterrecovery.py | 6 +- databricks/sdk/service/jobs.py | 5 + databricks/sdk/service/ml.py | 49 +- databricks/sdk/service/pipelines.py | 76 +- databricks/sdk/service/postgres.py | 56 + databricks/sdk/service/settings.py | 36 + databricks/sdk/service/sql.py | 2 - databricks/sdk/service/vectorsearch.py | 82 +- databricks/sdk/service/workspace.py | 14 + docs/dbdataclasses/aisearch.rst | 228 +++ docs/dbdataclasses/bundledeployments.rst | 225 ++ docs/dbdataclasses/catalog.rst | 6 +- docs/dbdataclasses/index.rst | 3 +- docs/dbdataclasses/ml.rst | 4 + docs/dbdataclasses/settings.rst | 4 + docs/dbdataclasses/vectorsearch.rst | 4 + docs/packages.py | 3 +- docs/workspace/aisearch/ai_search.rst | 229 +++ docs/workspace/aisearch/index.rst | 10 + .../bundledeployments/bundle_deployments.rst | 219 ++ docs/workspace/bundledeployments/index.rst | 10 + docs/workspace/catalog/catalogs.rst | 12 +- docs/workspace/catalog/connections.rst | 8 +- docs/workspace/catalog/external_locations.rst | 20 +- docs/workspace/catalog/schemas.rst | 16 +- .../workspace/catalog/storage_credentials.rst | 14 +- docs/workspace/iam/permissions.rst | 2 +- docs/workspace/index.rst | 3 +- docs/workspace/ml/model_registry.rst | 15 +- .../vectorsearch/vector_search_indexes.rst | 12 +- docs/workspace/workspace/repos.rst | 13 +- docs/workspace/workspace/workspace.rst | 29 +- tagging.py | 1057 ---------- tests/test_errors.py | 6 +- 48 files changed, 4594 insertions(+), 1255 deletions(-) delete mode 100644 .claude/commands/write-pr-description.md delete mode 100755 .codegen/_openapi_sha delete mode 100644 .vscode/settings.json create mode 100755 databricks/sdk/service/aisearch.py create mode 100755 databricks/sdk/service/bundledeployments.py create mode 100644 docs/dbdataclasses/aisearch.rst create mode 100644 docs/dbdataclasses/bundledeployments.rst create mode 100644 docs/workspace/aisearch/ai_search.rst create mode 100644 docs/workspace/aisearch/index.rst create mode 100644 docs/workspace/bundledeployments/bundle_deployments.rst create mode 100644 docs/workspace/bundledeployments/index.rst delete mode 100755 tagging.py diff --git a/.claude/commands/write-pr-description.md b/.claude/commands/write-pr-description.md deleted file mode 100644 index 6889c809c..000000000 --- a/.claude/commands/write-pr-description.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -description: > - Write or improve a GitHub pull request description. Use when the user asks - to "write a PR description", "improve the PR description", "update the PR - body", or provides a PR URL and asks for a better description. - Keywords: PR description, pull request summary, PR body, PR writeup. -allowed-tools: [Read, Glob, Grep, Bash, ToolSearch] ---- - -# Write PR Description - -Generate a structured PR description that explains **why** the change exists, -not just what files were touched. - -## When to use - -- The user provides a PR URL and asks to write or improve its description. -- The user asks to draft a PR description for the current branch. -- An agent needs to open a PR and wants a high-quality description. - -## Workflow - -### Phase 1: Gather context - -Collect all the information needed to understand the change: - -1. **Read the PR metadata** — title, current description, author, branch name. -2. **Read the full diff** — understand every file changed, every function added - or removed, every signature change. Do not skip files. -3. **Read surrounding code when needed** — if the diff modifies an interface or - a struct, read the full file to understand how the change fits into the - existing architecture. -4. **Check for linked issues or docs** — the PR or commit messages may - reference issues, design docs, or RFCs that explain motivation. - -### Phase 2: Analyze the change - -Before writing, answer these questions internally: - -- **What was the status quo before this PR?** What limitation, bug, or missing - capability existed? -- **Why is this change needed now?** What concrete problem does it solve? Who - benefits? -- **What are the key design decisions?** Why was this approach chosen over - alternatives? -- **What is the new API surface?** Any new public types, functions, or - configuration options? -- **What are the architectural changes?** How does the internal structure - change? What moves where? What gets refactored? -- **Are there behavioral changes?** If not, say so explicitly — this reassures - reviewers. - -### Phase 3: Write the description - -Use the structure defined in `.github/PULL_REQUEST_TEMPLATE.md` as the -template. The tone should be direct and technical. Write for a reviewer who is -familiar with the codebase but has not seen this change before. - -**Key principles:** - -- **Lead with why, not what.** The diff already shows the what. The description - should explain the reasoning that is not visible in the code. -- **Be specific.** Instead of "improves extensibility", say "allows internal - tools to compose their own auth chain from individual credential strategies". -- **Name things.** Reference actual types, functions, files, and config fields. - Use backticks for code references. -- **State non-changes explicitly.** If the PR is a refactor with no behavioral - change, say "No behavioral changes. Existing users are unaffected." This is - valuable information for reviewers. -- **Keep the summary to one or two sentences.** It should be scannable. -- **Use the motivation section to tell a story.** What was the problem? Why - couldn't it be solved before? What does this PR unlock? - -### Phase 4: Update the PR - -Use the GitHub MCP tools or `gh` CLI to update the PR body with the new -description. Confirm with the user before pushing if unsure. diff --git a/.codegen.json b/.codegen.json index 2ef68a1a1..7e4a82609 100644 --- a/.codegen.json +++ b/.codegen.json @@ -1,9 +1,22 @@ { "mode": "py_v0", - "api_changelog": true, + "api_changelog": false, "version": { "databricks/sdk/version.py": "__version__ = \"$VERSION\"" }, + "sync": { + "skip_from_source": [ + "BUILD.bazel", + "CLAUDE.md", + ".claude", + ".codegen", + ".cursor", + ".cursorrules", + ".nextchanges", + "changelog_validate", + "tagging.py" + ] + }, "toolchain": { "required": ["python3.12", "uv"], "pre_setup": [ diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha deleted file mode 100755 index c30ba6afa..000000000 --- a/.codegen/_openapi_sha +++ /dev/null @@ -1 +0,0 @@ -c68a27fa9c9d838e839c584be8018eb3b68377d5 \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index 5066a4de2..727d36594 100755 --- a/.gitattributes +++ b/.gitattributes @@ -2,9 +2,10 @@ databricks/sdk/__init__.py linguist-generated=true databricks/sdk/errors/overrides.py linguist-generated=true databricks/sdk/errors/platform.py linguist-generated=true databricks/sdk/service/agentbricks.py linguist-generated=true +databricks/sdk/service/aisearch.py linguist-generated=true databricks/sdk/service/apps.py linguist-generated=true databricks/sdk/service/billing.py linguist-generated=true -databricks/sdk/service/bundle.py linguist-generated=true +databricks/sdk/service/bundledeployments.py linguist-generated=true databricks/sdk/service/catalog.py linguist-generated=true databricks/sdk/service/cleanrooms.py linguist-generated=true databricks/sdk/service/compute.py linguist-generated=true diff --git a/.release_metadata.json b/.release_metadata.json index 70854112f..b57b7ca6f 100644 --- a/.release_metadata.json +++ b/.release_metadata.json @@ -1,3 +1,3 @@ { - "timestamp": "2026-06-08 09:40:21+0000" + "timestamp": "2026-06-02 09:31:06+0000" } \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index c2c2020ec..000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "python.testing.pytestArgs": [ - "tests" - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, - "python.envFile": "${workspaceRoot}/.env", - "databricks.python.envFile": "${workspaceFolder}/.env", - "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", - "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------" -} diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ddc3e43b..1026fbb70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,9 +3,41 @@ ## Release v0.115.0 (2026-06-08) ### New Features and Improvements +* * Dry-run cutover entry — sample changelog to verify the + `.nextchanges/` → `prepare-release` → ledger flow end-to-end. + Remove before opening the real release PR. -* Honor the Vercel `AI_AGENT=` env var as a secondary fallback for AI agent detection in the User-Agent header (after the agents.md `AGENT=` standard). Unrecognized fallback values now pass through the User-Agent sanitized and length-capped at 64 chars instead of being coerced to `agent/unknown`, so versioned variants such as `claude-code_2-1-141_agent` surface as-is. - +### API Changes +* * Add `databricks.sdk.service.aisearch` and `databricks.sdk.service.bundledeployments` packages. +* Add [w.ai_search](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/aisearch/ai_search.html) workspace-level service. +* Add [w.bundle_deployments](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/bundledeployments/bundle_deployments.html) workspace-level service. +* Add `running_instances` field for `databricks.sdk.service.apps.ApplicationStatus`. +* Add `custom_max_retention_hours` field for `databricks.sdk.service.catalog.CatalogInfo`. +* Add `environment_settings` field for `databricks.sdk.service.catalog.ConnectionInfo`. +* Add `custom_max_retention_hours` field for `databricks.sdk.service.catalog.CreateCatalog`. +* Add `environment_settings` field for `databricks.sdk.service.catalog.CreateConnection`. +* Add `custom_max_retention_hours` field for `databricks.sdk.service.catalog.CreateSchema`. +* Add `custom_max_retention_hours` field for `databricks.sdk.service.catalog.SchemaInfo`. +* Add `custom_max_retention_hours` field for `databricks.sdk.service.catalog.UpdateCatalog`. +* Add `environment_settings` field for `databricks.sdk.service.catalog.UpdateConnection`. +* Add `custom_max_retention_hours` field for `databricks.sdk.service.catalog.UpdateSchema`. +* Add `stream_source` field for `databricks.sdk.service.ml.DataSource`. +* Add `ingestion_config` field for `databricks.sdk.service.ml.KafkaConfig`. +* Add `clustering_columns`, `enable_auto_clustering` and `table_properties` fields for `databricks.sdk.service.pipelines.TableSpecificConfig`. +* Add `branch_id` field for `databricks.sdk.service.postgres.Branch`. +* Add `catalog_id` field for `databricks.sdk.service.postgres.Catalog`. +* Add `database_id` field for `databricks.sdk.service.postgres.Database`. +* Add `endpoint_id` field for `databricks.sdk.service.postgres.Endpoint`. +* Add `project_id` field for `databricks.sdk.service.postgres.Project`. +* Add `role_id` field for `databricks.sdk.service.postgres.Role`. +* Add `synced_table_id` field for `databricks.sdk.service.postgres.SyncedTable`. +* Add `allowed_databricks_destinations` field for `databricks.sdk.service.settings.EgressNetworkPolicyNetworkAccessPolicy`. +* Add `facets`, `query_columns` and `sort_columns` fields for `databricks.sdk.service.vectorsearch.QueryVectorIndexRequest`. +* Add `facet_result` field for `databricks.sdk.service.vectorsearch.QueryVectorIndexResponse`. +* Add `facet_column_count` and `facet_columns` fields for `databricks.sdk.service.vectorsearch.ResultManifest`. +* Add `dangerously_force_discard_all` field for `databricks.sdk.service.workspace.UpdateRepoRequest`. +* [Breaking] Remove `databricks.sdk.service.bundle` package. +* [Breaking] Remove [w.bundle](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/bundle/bundle.html) workspace-level service. ## Release v0.114.0 (2026-06-02) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 332edd0bb..32901b0cd 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -1,9 +1,11 @@ # NEXT CHANGELOG -## Release v0.116.0 +## Release v0.115.0 ### New Features and Improvements +* Honor the Vercel `AI_AGENT=` env var as a secondary fallback for AI agent detection in the User-Agent header (after the agents.md `AGENT=` standard). Unrecognized fallback values now pass through the User-Agent sanitized and length-capped at 64 chars instead of being coerced to `agent/unknown`, so versioned variants such as `claude-code_2-1-141_agent` surface as-is. + ### Security ### Bug Fixes diff --git a/databricks/sdk/__init__.py b/databricks/sdk/__init__.py index f5cb7cb4c..4c911e26f 100755 --- a/databricks/sdk/__init__.py +++ b/databricks/sdk/__init__.py @@ -17,9 +17,10 @@ from databricks.sdk.mixins.workspace import WorkspaceExt from databricks.sdk.oauth import AuthorizationDetail from databricks.sdk.service import agentbricks as pkg_agentbricks +from databricks.sdk.service import aisearch as pkg_aisearch from databricks.sdk.service import apps as pkg_apps from databricks.sdk.service import billing as pkg_billing -from databricks.sdk.service import bundle as pkg_bundle +from databricks.sdk.service import bundledeployments as pkg_bundledeployments from databricks.sdk.service import catalog as pkg_catalog from databricks.sdk.service import cleanrooms as pkg_cleanrooms from databricks.sdk.service import compute as pkg_compute @@ -52,6 +53,7 @@ from databricks.sdk.service import vectorsearch as pkg_vectorsearch from databricks.sdk.service import workspace as pkg_workspace from databricks.sdk.service.agentbricks import AgentBricksAPI +from databricks.sdk.service.aisearch import AiSearchAPI from databricks.sdk.service.apps import AppsAPI, AppsSettingsAPI from databricks.sdk.service.billing import ( BillableUsageAPI, @@ -60,7 +62,7 @@ LogDeliveryAPI, UsageDashboardsAPI, ) -from databricks.sdk.service.bundle import BundleAPI +from databricks.sdk.service.bundledeployments import BundleDeploymentsAPI from databricks.sdk.service.catalog import ( AccountMetastoreAssignmentsAPI, AccountMetastoresAPI, @@ -363,13 +365,14 @@ def __init__( self._access_control = pkg_iam.AccessControlAPI(self._api_client) self._account_access_control_proxy = pkg_iam.AccountAccessControlProxyAPI(self._api_client) self._agent_bricks = pkg_agentbricks.AgentBricksAPI(self._api_client) + self._ai_search = pkg_aisearch.AiSearchAPI(self._api_client) self._alerts = pkg_sql.AlertsAPI(self._api_client) self._alerts_legacy = pkg_sql.AlertsLegacyAPI(self._api_client) self._alerts_v2 = pkg_sql.AlertsV2API(self._api_client) self._apps = pkg_apps.AppsAPI(self._api_client) self._apps_settings = pkg_apps.AppsSettingsAPI(self._api_client) self._artifact_allowlists = pkg_catalog.ArtifactAllowlistsAPI(self._api_client) - self._bundle = pkg_bundle.BundleAPI(self._api_client) + self._bundle_deployments = pkg_bundledeployments.BundleDeploymentsAPI(self._api_client) self._catalogs = pkg_catalog.CatalogsAPI(self._api_client) self._clean_room_asset_revisions = pkg_cleanrooms.CleanRoomAssetRevisionsAPI(self._api_client) self._clean_room_assets = pkg_cleanrooms.CleanRoomAssetsAPI(self._api_client) @@ -527,6 +530,11 @@ def agent_bricks(self) -> pkg_agentbricks.AgentBricksAPI: """The Custom LLMs service manages state and powers the UI for the Custom LLM product.""" return self._agent_bricks + @property + def ai_search(self) -> pkg_aisearch.AiSearchAPI: + """**AI Search Endpoint**: Represents the compute resources to host AI Search indexes.""" + return self._ai_search + @property def alerts(self) -> pkg_sql.AlertsAPI: """The alerts API can be used to perform CRUD operations on alerts.""" @@ -558,9 +566,9 @@ def artifact_allowlists(self) -> pkg_catalog.ArtifactAllowlistsAPI: return self._artifact_allowlists @property - def bundle(self) -> pkg_bundle.BundleAPI: + def bundle_deployments(self) -> pkg_bundledeployments.BundleDeploymentsAPI: """Service for managing bundle deployment metadata.""" - return self._bundle + return self._bundle_deployments @property def catalogs(self) -> pkg_catalog.CatalogsAPI: diff --git a/databricks/sdk/errors/parser.py b/databricks/sdk/errors/parser.py index 2f6bfe25e..e88dd7870 100644 --- a/databricks/sdk/errors/parser.py +++ b/databricks/sdk/errors/parser.py @@ -46,7 +46,7 @@ def _unknown_error(response: requests.Response, debug_headers: bool = False) -> return ( "This is likely a bug in the Databricks SDK for Python or the underlying " "API. Please report this issue with the following debugging information to the SDK issue tracker at " - f"https://github.com/databricks/databricks-sdk-py/issues. Request log:```{request_log}```" + f"https://github.com/databricks/databricks-sdk-go/issues. Request log:```{request_log}```" ) diff --git a/databricks/sdk/service/aisearch.py b/databricks/sdk/service/aisearch.py new file mode 100755 index 000000000..bbb329756 --- /dev/null +++ b/databricks/sdk/service/aisearch.py @@ -0,0 +1,1818 @@ +# Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. +# ruff: noqa: F811, F841 +# F401 is intentionally NOT covered: `make fmt` uses `ruff check --fix-only` +# to strip the fat-import header below; ignoring F401 would defeat that. + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from enum import Enum +from typing import Any, Dict, Iterator, List, Optional + +from google.protobuf.timestamp_pb2 import Timestamp + +from databricks.sdk.common.types.fieldmask import FieldMask +from databricks.sdk.service._internal import ( + _enum, + _from_dict, + _repeated_dict, + _timestamp, +) + +_LOG = logging.getLogger("databricks.sdk") + + +# all definitions in this file are in alphabetical order + + +@dataclass +class ColumnInfo: + """Column information (name and data type) for an index column. Surfaced on `Index.column_info`.""" + + name: Optional[str] = None + """Name of the column.""" + + type_text: Optional[str] = None + """Data type of the column (e.g., "string", "int", "array").""" + + def as_dict(self) -> dict: + """Serializes the ColumnInfo into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.name is not None: + body["name"] = self.name + if self.type_text is not None: + body["type_text"] = self.type_text + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ColumnInfo into a shallow dictionary of its immediate attributes.""" + body = {} + if self.name is not None: + body["name"] = self.name + if self.type_text is not None: + body["type_text"] = self.type_text + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ColumnInfo: + """Deserializes the ColumnInfo from a dictionary.""" + return cls(name=d.get("name", None), type_text=d.get("type_text", None)) + + +@dataclass +class CustomTag: + """User-defined key/value tag attached to an AI Search endpoint for cost attribution and access + control.""" + + key: str + """Key field for an AI Search endpoint tag.""" + + value: Optional[str] = None + """[Optional] Value field for an AI Search endpoint tag.""" + + def as_dict(self) -> dict: + """Serializes the CustomTag into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.key is not None: + body["key"] = self.key + if self.value is not None: + body["value"] = self.value + return body + + def as_shallow_dict(self) -> dict: + """Serializes the CustomTag into a shallow dictionary of its immediate attributes.""" + body = {} + if self.key is not None: + body["key"] = self.key + if self.value is not None: + body["value"] = self.value + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> CustomTag: + """Deserializes the CustomTag from a dictionary.""" + return cls(key=d.get("key", None), value=d.get("value", None)) + + +@dataclass +class DataModificationResult: + """Per-row outcome of a data-plane upsert or delete operation.""" + + failed_primary_keys: Optional[List[str]] = None + """Primary keys of rows that failed to process.""" + + success_row_count: Optional[int] = None + """Count of rows processed successfully.""" + + def as_dict(self) -> dict: + """Serializes the DataModificationResult into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.failed_primary_keys: + body["failed_primary_keys"] = [v for v in self.failed_primary_keys] + if self.success_row_count is not None: + body["success_row_count"] = self.success_row_count + return body + + def as_shallow_dict(self) -> dict: + """Serializes the DataModificationResult into a shallow dictionary of its immediate attributes.""" + body = {} + if self.failed_primary_keys: + body["failed_primary_keys"] = self.failed_primary_keys + if self.success_row_count is not None: + body["success_row_count"] = self.success_row_count + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> DataModificationResult: + """Deserializes the DataModificationResult from a dictionary.""" + return cls( + failed_primary_keys=d.get("failed_primary_keys", None), success_row_count=d.get("success_row_count", None) + ) + + +class DataModificationStatus(Enum): + """Overall outcome of a data-plane upsert or delete. Mirrors the legacy + `databricks.brickindexscheduler.UpsertDeleteDataStatus` value-for-value.""" + + FAILURE = "FAILURE" + PARTIAL_SUCCESS = "PARTIAL_SUCCESS" + SUCCESS = "SUCCESS" + + +@dataclass +class DeltaSyncIndexSpec: + """Specification for a Delta Sync index — the index is kept in sync with a source Delta table.""" + + pipeline_type: PipelineType + """Pipeline execution mode. Required on create — the backend rejects an unset value. Storage + Optimized endpoints accept only `TRIGGERED`; Standard endpoints accept both. No explicit `stage` + — a REQUIRED field staged below its service would be dropped from combined specs while + remaining in `required`, tripping the OpenAPI required-vs-properties consistency check. The + field inherits the service's launch stage.""" + + columns_to_sync: Optional[List[str]] = None + """[Optional] Select the columns to sync with the index. If left blank, all columns from the source + table are synced. The primary key column and embedding source or vector column are always + synced.""" + + embedding_source_columns: Optional[List[EmbeddingSourceColumn]] = None + """The columns that contain the embedding source.""" + + embedding_vector_columns: Optional[List[EmbeddingVectorColumn]] = None + """The columns that contain the embedding vectors.""" + + embedding_writeback_table: Optional[str] = None + """[Optional] Name of the Delta table to sync the index contents and computed embeddings to.""" + + pipeline_id: Optional[str] = None + """The ID of the pipeline that is used to sync the index.""" + + source_table: Optional[str] = None + """The full name of the source Delta table.""" + + def as_dict(self) -> dict: + """Serializes the DeltaSyncIndexSpec into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.columns_to_sync: + body["columns_to_sync"] = [v for v in self.columns_to_sync] + if self.embedding_source_columns: + body["embedding_source_columns"] = [v.as_dict() for v in self.embedding_source_columns] + if self.embedding_vector_columns: + body["embedding_vector_columns"] = [v.as_dict() for v in self.embedding_vector_columns] + if self.embedding_writeback_table is not None: + body["embedding_writeback_table"] = self.embedding_writeback_table + if self.pipeline_id is not None: + body["pipeline_id"] = self.pipeline_id + if self.pipeline_type is not None: + body["pipeline_type"] = self.pipeline_type.value + if self.source_table is not None: + body["source_table"] = self.source_table + return body + + def as_shallow_dict(self) -> dict: + """Serializes the DeltaSyncIndexSpec into a shallow dictionary of its immediate attributes.""" + body = {} + if self.columns_to_sync: + body["columns_to_sync"] = self.columns_to_sync + if self.embedding_source_columns: + body["embedding_source_columns"] = self.embedding_source_columns + if self.embedding_vector_columns: + body["embedding_vector_columns"] = self.embedding_vector_columns + if self.embedding_writeback_table is not None: + body["embedding_writeback_table"] = self.embedding_writeback_table + if self.pipeline_id is not None: + body["pipeline_id"] = self.pipeline_id + if self.pipeline_type is not None: + body["pipeline_type"] = self.pipeline_type + if self.source_table is not None: + body["source_table"] = self.source_table + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> DeltaSyncIndexSpec: + """Deserializes the DeltaSyncIndexSpec from a dictionary.""" + return cls( + columns_to_sync=d.get("columns_to_sync", None), + embedding_source_columns=_repeated_dict(d, "embedding_source_columns", EmbeddingSourceColumn), + embedding_vector_columns=_repeated_dict(d, "embedding_vector_columns", EmbeddingVectorColumn), + embedding_writeback_table=d.get("embedding_writeback_table", None), + pipeline_id=d.get("pipeline_id", None), + pipeline_type=_enum(d, "pipeline_type", PipelineType), + source_table=d.get("source_table", None), + ) + + +@dataclass +class DirectAccessIndexSpec: + """Specification for a Direct Access index — the customer manages vectors and metadata directly.""" + + embedding_source_columns: Optional[List[EmbeddingSourceColumn]] = None + """The columns that contain the embedding source.""" + + embedding_vector_columns: Optional[List[EmbeddingVectorColumn]] = None + """The columns that contain the embedding vectors.""" + + schema_json: Optional[str] = None + """The schema of the index in JSON format. Supported types are `integer`, `long`, `float`, + `double`, `boolean`, `string`, `date`, `timestamp`. Supported types for vector columns: + `array`, `array`.""" + + def as_dict(self) -> dict: + """Serializes the DirectAccessIndexSpec into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.embedding_source_columns: + body["embedding_source_columns"] = [v.as_dict() for v in self.embedding_source_columns] + if self.embedding_vector_columns: + body["embedding_vector_columns"] = [v.as_dict() for v in self.embedding_vector_columns] + if self.schema_json is not None: + body["schema_json"] = self.schema_json + return body + + def as_shallow_dict(self) -> dict: + """Serializes the DirectAccessIndexSpec into a shallow dictionary of its immediate attributes.""" + body = {} + if self.embedding_source_columns: + body["embedding_source_columns"] = self.embedding_source_columns + if self.embedding_vector_columns: + body["embedding_vector_columns"] = self.embedding_vector_columns + if self.schema_json is not None: + body["schema_json"] = self.schema_json + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> DirectAccessIndexSpec: + """Deserializes the DirectAccessIndexSpec from a dictionary.""" + return cls( + embedding_source_columns=_repeated_dict(d, "embedding_source_columns", EmbeddingSourceColumn), + embedding_vector_columns=_repeated_dict(d, "embedding_vector_columns", EmbeddingVectorColumn), + schema_json=d.get("schema_json", None), + ) + + +@dataclass +class EmbeddingSourceColumn: + """Name of an embedding source column and its associated embedding model endpoint.""" + + embedding_model_endpoint: Optional[str] = None + """Name of the embedding model endpoint, used by default for both ingestion and querying.""" + + model_endpoint_name_for_query: Optional[str] = None + """Name of the embedding model endpoint which, if specified, is used for querying (not ingestion).""" + + name: Optional[str] = None + """Name of the source column.""" + + def as_dict(self) -> dict: + """Serializes the EmbeddingSourceColumn into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.embedding_model_endpoint is not None: + body["embedding_model_endpoint"] = self.embedding_model_endpoint + if self.model_endpoint_name_for_query is not None: + body["model_endpoint_name_for_query"] = self.model_endpoint_name_for_query + if self.name is not None: + body["name"] = self.name + return body + + def as_shallow_dict(self) -> dict: + """Serializes the EmbeddingSourceColumn into a shallow dictionary of its immediate attributes.""" + body = {} + if self.embedding_model_endpoint is not None: + body["embedding_model_endpoint"] = self.embedding_model_endpoint + if self.model_endpoint_name_for_query is not None: + body["model_endpoint_name_for_query"] = self.model_endpoint_name_for_query + if self.name is not None: + body["name"] = self.name + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> EmbeddingSourceColumn: + """Deserializes the EmbeddingSourceColumn from a dictionary.""" + return cls( + embedding_model_endpoint=d.get("embedding_model_endpoint", None), + model_endpoint_name_for_query=d.get("model_endpoint_name_for_query", None), + name=d.get("name", None), + ) + + +@dataclass +class EmbeddingVectorColumn: + """Name and dimension of an embedding vector column.""" + + embedding_dimension: Optional[int] = None + """Dimension of the embedding vector.""" + + name: Optional[str] = None + """Name of the column.""" + + def as_dict(self) -> dict: + """Serializes the EmbeddingVectorColumn into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.embedding_dimension is not None: + body["embedding_dimension"] = self.embedding_dimension + if self.name is not None: + body["name"] = self.name + return body + + def as_shallow_dict(self) -> dict: + """Serializes the EmbeddingVectorColumn into a shallow dictionary of its immediate attributes.""" + body = {} + if self.embedding_dimension is not None: + body["embedding_dimension"] = self.embedding_dimension + if self.name is not None: + body["name"] = self.name + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> EmbeddingVectorColumn: + """Deserializes the EmbeddingVectorColumn from a dictionary.""" + return cls(embedding_dimension=d.get("embedding_dimension", None), name=d.get("name", None)) + + +@dataclass +class Endpoint: + """An AI Search endpoint — compute infrastructure that hosts AI Search indexes and serves queries + against them. Customers create, query, and delete endpoints; the system manages provisioning, + scaling, and health status.""" + + endpoint_type: EndpointType + """Type of endpoint. Required on create and immutable thereafter.""" + + budget_policy_id: Optional[str] = None + """The user-selected budget policy id for the endpoint.""" + + create_time: Optional[Timestamp] = None + """Time the endpoint was created.""" + + creator: Optional[str] = None + """Creator of the endpoint""" + + custom_tags: Optional[List[CustomTag]] = None + """The custom tags assigned to the endpoint""" + + effective_budget_policy_id: Optional[str] = None + """The budget policy id applied to the endpoint""" + + endpoint_status: Optional[EndpointStatus] = None + """Current status of the endpoint""" + + id: Optional[str] = None + """Unique identifier of the endpoint""" + + index_count: Optional[int] = None + """Number of indexes on the endpoint""" + + last_updated_user: Optional[str] = None + """User who last updated the endpoint""" + + name: Optional[str] = None + """Name of the AI Search endpoint. Server-assigned full resource path + (`workspaces/{workspace}/endpoints/{endpoint}`) on output. On create, the user-supplied short + name is conveyed via `CreateEndpointRequest.endpoint_id`; the server composes the full `name` + and returns it on the response.""" + + replica_count: Optional[int] = None + """The client-supplied desired number of replicas for the endpoint, applied at create/update time. + Mutually exclusive with `target_qps`.""" + + scaling_info: Optional[EndpointScalingInfo] = None + """Scaling information for the endpoint""" + + target_qps: Optional[int] = None + """Target QPS for the endpoint. Mutually exclusive with `replica_count`. Best-effort; the system + does not guarantee this QPS will be achieved.""" + + throughput_info: Optional[EndpointThroughputInfo] = None + """Throughput information for the endpoint""" + + update_time: Optional[Timestamp] = None + """Time the endpoint was last updated.""" + + usage_policy_id: Optional[str] = None + """The usage policy id applied to the endpoint.""" + + def as_dict(self) -> dict: + """Serializes the Endpoint into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.budget_policy_id is not None: + body["budget_policy_id"] = self.budget_policy_id + if self.create_time is not None: + body["create_time"] = self.create_time.ToJsonString() + if self.creator is not None: + body["creator"] = self.creator + if self.custom_tags: + body["custom_tags"] = [v.as_dict() for v in self.custom_tags] + if self.effective_budget_policy_id is not None: + body["effective_budget_policy_id"] = self.effective_budget_policy_id + if self.endpoint_status: + body["endpoint_status"] = self.endpoint_status.as_dict() + if self.endpoint_type is not None: + body["endpoint_type"] = self.endpoint_type.value + if self.id is not None: + body["id"] = self.id + if self.index_count is not None: + body["index_count"] = self.index_count + if self.last_updated_user is not None: + body["last_updated_user"] = self.last_updated_user + if self.name is not None: + body["name"] = self.name + if self.replica_count is not None: + body["replica_count"] = self.replica_count + if self.scaling_info: + body["scaling_info"] = self.scaling_info.as_dict() + if self.target_qps is not None: + body["target_qps"] = self.target_qps + if self.throughput_info: + body["throughput_info"] = self.throughput_info.as_dict() + if self.update_time is not None: + body["update_time"] = self.update_time.ToJsonString() + if self.usage_policy_id is not None: + body["usage_policy_id"] = self.usage_policy_id + return body + + def as_shallow_dict(self) -> dict: + """Serializes the Endpoint into a shallow dictionary of its immediate attributes.""" + body = {} + if self.budget_policy_id is not None: + body["budget_policy_id"] = self.budget_policy_id + if self.create_time is not None: + body["create_time"] = self.create_time + if self.creator is not None: + body["creator"] = self.creator + if self.custom_tags: + body["custom_tags"] = self.custom_tags + if self.effective_budget_policy_id is not None: + body["effective_budget_policy_id"] = self.effective_budget_policy_id + if self.endpoint_status: + body["endpoint_status"] = self.endpoint_status + if self.endpoint_type is not None: + body["endpoint_type"] = self.endpoint_type + if self.id is not None: + body["id"] = self.id + if self.index_count is not None: + body["index_count"] = self.index_count + if self.last_updated_user is not None: + body["last_updated_user"] = self.last_updated_user + if self.name is not None: + body["name"] = self.name + if self.replica_count is not None: + body["replica_count"] = self.replica_count + if self.scaling_info: + body["scaling_info"] = self.scaling_info + if self.target_qps is not None: + body["target_qps"] = self.target_qps + if self.throughput_info: + body["throughput_info"] = self.throughput_info + if self.update_time is not None: + body["update_time"] = self.update_time + if self.usage_policy_id is not None: + body["usage_policy_id"] = self.usage_policy_id + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> Endpoint: + """Deserializes the Endpoint from a dictionary.""" + return cls( + budget_policy_id=d.get("budget_policy_id", None), + create_time=_timestamp(d, "create_time"), + creator=d.get("creator", None), + custom_tags=_repeated_dict(d, "custom_tags", CustomTag), + effective_budget_policy_id=d.get("effective_budget_policy_id", None), + endpoint_status=_from_dict(d, "endpoint_status", EndpointStatus), + endpoint_type=_enum(d, "endpoint_type", EndpointType), + id=d.get("id", None), + index_count=d.get("index_count", None), + last_updated_user=d.get("last_updated_user", None), + name=d.get("name", None), + replica_count=d.get("replica_count", None), + scaling_info=_from_dict(d, "scaling_info", EndpointScalingInfo), + target_qps=d.get("target_qps", None), + throughput_info=_from_dict(d, "throughput_info", EndpointThroughputInfo), + update_time=_timestamp(d, "update_time"), + usage_policy_id=d.get("usage_policy_id", None), + ) + + +@dataclass +class EndpointScalingInfo: + """Scaling information for a Storage Optimized endpoint — current scaling state and the requested + QPS target the system is scaling toward.""" + + requested_target_qps: Optional[int] = None + """The requested QPS target for the endpoint. Best-effort; the system does not guarantee this QPS + will be achieved.""" + + state: Optional[ScalingChangeState] = None + """The current state of the scaling change request.""" + + def as_dict(self) -> dict: + """Serializes the EndpointScalingInfo into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.requested_target_qps is not None: + body["requested_target_qps"] = self.requested_target_qps + if self.state is not None: + body["state"] = self.state.value + return body + + def as_shallow_dict(self) -> dict: + """Serializes the EndpointScalingInfo into a shallow dictionary of its immediate attributes.""" + body = {} + if self.requested_target_qps is not None: + body["requested_target_qps"] = self.requested_target_qps + if self.state is not None: + body["state"] = self.state + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> EndpointScalingInfo: + """Deserializes the EndpointScalingInfo from a dictionary.""" + return cls( + requested_target_qps=d.get("requested_target_qps", None), state=_enum(d, "state", ScalingChangeState) + ) + + +@dataclass +class EndpointStatus: + """Lifecycle and health state of an AI Search endpoint, along with any human-readable detail about + that state.""" + + message: Optional[str] = None + """Human-readable detail about the endpoint's current state or the reason for a state transition.""" + + state: Optional[EndpointStatusState] = None + """Current lifecycle state of the endpoint. See `State` for the meaning of each value.""" + + def as_dict(self) -> dict: + """Serializes the EndpointStatus into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.message is not None: + body["message"] = self.message + if self.state is not None: + body["state"] = self.state.value + return body + + def as_shallow_dict(self) -> dict: + """Serializes the EndpointStatus into a shallow dictionary of its immediate attributes.""" + body = {} + if self.message is not None: + body["message"] = self.message + if self.state is not None: + body["state"] = self.state + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> EndpointStatus: + """Deserializes the EndpointStatus from a dictionary.""" + return cls(message=d.get("message", None), state=_enum(d, "state", EndpointStatusState)) + + +class EndpointStatusState(Enum): + """Lifecycle state of an AI Search endpoint, used by both Standard and Storage Optimized SKUs.""" + + DELETED = "DELETED" + OFFLINE = "OFFLINE" + ONLINE = "ONLINE" + PROVISIONING = "PROVISIONING" + RED_STATE = "RED_STATE" + YELLOW_STATE = "YELLOW_STATE" + + +@dataclass +class EndpointThroughputInfo: + """Throughput information for an AI Search endpoint, including requested and current concurrency + settings.""" + + change_request_message: Optional[str] = None + """Additional information about the throughput change request""" + + change_request_state: Optional[ThroughputChangeRequestState] = None + """The state of the most recent throughput change request""" + + current_concurrency: Optional[float] = None + """The current concurrency (total CPU) allocated to the endpoint""" + + current_concurrency_utilization_percentage: Optional[float] = None + """The current utilization of concurrency as a percentage (0-100)""" + + current_num_replicas: Optional[int] = None + """The current number of replicas allocated to the endpoint""" + + maximum_concurrency_allowed: Optional[float] = None + """The maximum concurrency allowed for this endpoint""" + + minimal_concurrency_allowed: Optional[float] = None + """The minimum concurrency allowed for this endpoint""" + + requested_concurrency: Optional[float] = None + """The requested concurrency (total CPU) for the endpoint""" + + requested_num_replicas: Optional[int] = None + """The requested number of replicas for the endpoint""" + + def as_dict(self) -> dict: + """Serializes the EndpointThroughputInfo into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.change_request_message is not None: + body["change_request_message"] = self.change_request_message + if self.change_request_state is not None: + body["change_request_state"] = self.change_request_state.value + if self.current_concurrency is not None: + body["current_concurrency"] = self.current_concurrency + if self.current_concurrency_utilization_percentage is not None: + body["current_concurrency_utilization_percentage"] = self.current_concurrency_utilization_percentage + if self.current_num_replicas is not None: + body["current_num_replicas"] = self.current_num_replicas + if self.maximum_concurrency_allowed is not None: + body["maximum_concurrency_allowed"] = self.maximum_concurrency_allowed + if self.minimal_concurrency_allowed is not None: + body["minimal_concurrency_allowed"] = self.minimal_concurrency_allowed + if self.requested_concurrency is not None: + body["requested_concurrency"] = self.requested_concurrency + if self.requested_num_replicas is not None: + body["requested_num_replicas"] = self.requested_num_replicas + return body + + def as_shallow_dict(self) -> dict: + """Serializes the EndpointThroughputInfo into a shallow dictionary of its immediate attributes.""" + body = {} + if self.change_request_message is not None: + body["change_request_message"] = self.change_request_message + if self.change_request_state is not None: + body["change_request_state"] = self.change_request_state + if self.current_concurrency is not None: + body["current_concurrency"] = self.current_concurrency + if self.current_concurrency_utilization_percentage is not None: + body["current_concurrency_utilization_percentage"] = self.current_concurrency_utilization_percentage + if self.current_num_replicas is not None: + body["current_num_replicas"] = self.current_num_replicas + if self.maximum_concurrency_allowed is not None: + body["maximum_concurrency_allowed"] = self.maximum_concurrency_allowed + if self.minimal_concurrency_allowed is not None: + body["minimal_concurrency_allowed"] = self.minimal_concurrency_allowed + if self.requested_concurrency is not None: + body["requested_concurrency"] = self.requested_concurrency + if self.requested_num_replicas is not None: + body["requested_num_replicas"] = self.requested_num_replicas + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> EndpointThroughputInfo: + """Deserializes the EndpointThroughputInfo from a dictionary.""" + return cls( + change_request_message=d.get("change_request_message", None), + change_request_state=_enum(d, "change_request_state", ThroughputChangeRequestState), + current_concurrency=d.get("current_concurrency", None), + current_concurrency_utilization_percentage=d.get("current_concurrency_utilization_percentage", None), + current_num_replicas=d.get("current_num_replicas", None), + maximum_concurrency_allowed=d.get("maximum_concurrency_allowed", None), + minimal_concurrency_allowed=d.get("minimal_concurrency_allowed", None), + requested_concurrency=d.get("requested_concurrency", None), + requested_num_replicas=d.get("requested_num_replicas", None), + ) + + +class EndpointType(Enum): + """Type of endpoint.""" + + STANDARD = "STANDARD" + STORAGE_OPTIMIZED = "STORAGE_OPTIMIZED" + + +@dataclass +class FacetResultData: + """Facet aggregation rows returned by a query.""" + + facet_array: Optional[List[List[any]]] = None + """Facet rows; each row is `[facet_column_name, value_or_range, count]`.""" + + facet_row_count: Optional[int] = None + """Number of facet rows returned.""" + + def as_dict(self) -> dict: + """Serializes the FacetResultData into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.facet_array: + body["facet_array"] = [v for v in self.facet_array] + if self.facet_row_count is not None: + body["facet_row_count"] = self.facet_row_count + return body + + def as_shallow_dict(self) -> dict: + """Serializes the FacetResultData into a shallow dictionary of its immediate attributes.""" + body = {} + if self.facet_array: + body["facet_array"] = self.facet_array + if self.facet_row_count is not None: + body["facet_row_count"] = self.facet_row_count + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> FacetResultData: + """Deserializes the FacetResultData from a dictionary.""" + return cls(facet_array=d.get("facet_array", None), facet_row_count=d.get("facet_row_count", None)) + + +@dataclass +class Index: + """An AI Search index — a searchable collection of vectors and metadata hosted on an AI Search + endpoint. Indexes are children of endpoints; customers create, get, list, and delete them. The + `{index}` segment of the resource name is the index's Unity Catalog table name.""" + + primary_key: str + """Primary key of the index. Set on create and immutable thereafter.""" + + index_type: IndexType + """Type of index. Required on create and immutable thereafter.""" + + creator: Optional[str] = None + """Creator of the index.""" + + delta_sync_index_spec: Optional[DeltaSyncIndexSpec] = None + """Specification for a Delta Sync index. Set when `index_type` is `DELTA_SYNC`.""" + + direct_access_index_spec: Optional[DirectAccessIndexSpec] = None + """Specification for a Direct Access index. Set when `index_type` is `DIRECT_ACCESS`.""" + + endpoint: Optional[str] = None + """Name of the endpoint associated with the index. Ignored on create — the endpoint is taken from + `CreateIndexRequest.parent`; populated only on output.""" + + index_subtype: Optional[IndexSubtype] = None + """The subtype of the index. Set on create and immutable thereafter.""" + + name: Optional[str] = None + """Name of the AI Search index. Server-assigned full resource path + (`workspaces/{workspace}/endpoints/{endpoint}/indexes/{index}`) on output, where `{index}` is + the index's Unity Catalog table name. On create, the user-supplied UC table name is conveyed via + `CreateIndexRequest.index_id`; the server composes the full `name` and returns it on the + response.""" + + status: Optional[IndexStatus] = None + """Current status of the index.""" + + def as_dict(self) -> dict: + """Serializes the Index into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.creator is not None: + body["creator"] = self.creator + if self.delta_sync_index_spec: + body["delta_sync_index_spec"] = self.delta_sync_index_spec.as_dict() + if self.direct_access_index_spec: + body["direct_access_index_spec"] = self.direct_access_index_spec.as_dict() + if self.endpoint is not None: + body["endpoint"] = self.endpoint + if self.index_subtype is not None: + body["index_subtype"] = self.index_subtype.value + if self.index_type is not None: + body["index_type"] = self.index_type.value + if self.name is not None: + body["name"] = self.name + if self.primary_key is not None: + body["primary_key"] = self.primary_key + if self.status: + body["status"] = self.status.as_dict() + return body + + def as_shallow_dict(self) -> dict: + """Serializes the Index into a shallow dictionary of its immediate attributes.""" + body = {} + if self.creator is not None: + body["creator"] = self.creator + if self.delta_sync_index_spec: + body["delta_sync_index_spec"] = self.delta_sync_index_spec + if self.direct_access_index_spec: + body["direct_access_index_spec"] = self.direct_access_index_spec + if self.endpoint is not None: + body["endpoint"] = self.endpoint + if self.index_subtype is not None: + body["index_subtype"] = self.index_subtype + if self.index_type is not None: + body["index_type"] = self.index_type + if self.name is not None: + body["name"] = self.name + if self.primary_key is not None: + body["primary_key"] = self.primary_key + if self.status: + body["status"] = self.status + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> Index: + """Deserializes the Index from a dictionary.""" + return cls( + creator=d.get("creator", None), + delta_sync_index_spec=_from_dict(d, "delta_sync_index_spec", DeltaSyncIndexSpec), + direct_access_index_spec=_from_dict(d, "direct_access_index_spec", DirectAccessIndexSpec), + endpoint=d.get("endpoint", None), + index_subtype=_enum(d, "index_subtype", IndexSubtype), + index_type=_enum(d, "index_type", IndexType), + name=d.get("name", None), + primary_key=d.get("primary_key", None), + status=_from_dict(d, "status", IndexStatus), + ) + + +@dataclass +class IndexStatus: + """Lifecycle and health state of an AI Search index, along with human-readable detail about that + state and basic indexing progress.""" + + index_url: Optional[str] = None + """Index API URL used to perform operations on the index.""" + + indexed_row_count: Optional[int] = None + """Number of rows indexed.""" + + message: Optional[str] = None + """Human-readable detail about the index's current state.""" + + ready: Optional[bool] = None + """Whether the index is ready for search.""" + + def as_dict(self) -> dict: + """Serializes the IndexStatus into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.index_url is not None: + body["index_url"] = self.index_url + if self.indexed_row_count is not None: + body["indexed_row_count"] = self.indexed_row_count + if self.message is not None: + body["message"] = self.message + if self.ready is not None: + body["ready"] = self.ready + return body + + def as_shallow_dict(self) -> dict: + """Serializes the IndexStatus into a shallow dictionary of its immediate attributes.""" + body = {} + if self.index_url is not None: + body["index_url"] = self.index_url + if self.indexed_row_count is not None: + body["indexed_row_count"] = self.indexed_row_count + if self.message is not None: + body["message"] = self.message + if self.ready is not None: + body["ready"] = self.ready + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> IndexStatus: + """Deserializes the IndexStatus from a dictionary.""" + return cls( + index_url=d.get("index_url", None), + indexed_row_count=d.get("indexed_row_count", None), + message=d.get("message", None), + ready=d.get("ready", None), + ) + + +class IndexSubtype(Enum): + """The subtype of the AI Search index, determining the indexing and retrieval strategy. - `VECTOR`: + Not a supported create value — do not select it. Use `HYBRID` (vector + hybrid search) or + `FULL_TEXT` (full-text only). It is the proto2 default (`= 0`) solely to mirror the legacy + `index_v2.proto` enum value-for-value; it is not an offered index subtype. - `FULL_TEXT`: An + index that uses full-text search without vector embeddings. - `HYBRID`: An index that uses + vector embeddings for similarity search and hybrid search.""" + + FULL_TEXT = "FULL_TEXT" + HYBRID = "HYBRID" + VECTOR = "VECTOR" + + +class IndexType(Enum): + """There are 2 types of AI Search indexes: - `DELTA_SYNC`: An index that automatically syncs with a + source Delta Table, automatically and incrementally updating the index as the underlying data in + the Delta Table changes. - `DIRECT_ACCESS`: An index that supports direct read and write of + vectors and metadata through our REST and SDK APIs. With this model, the user manages index + updates.""" + + DELTA_SYNC = "DELTA_SYNC" + DIRECT_ACCESS = "DIRECT_ACCESS" + + +@dataclass +class ListEndpointsResponse: + """Response for ListEndpoints carrying the page of endpoints and an optional continuation token.""" + + endpoints: Optional[List[Endpoint]] = None + """The endpoints in the workspace.""" + + next_page_token: Optional[str] = None + """A token that can be used to get the next page of results. Empty when there are no more results.""" + + def as_dict(self) -> dict: + """Serializes the ListEndpointsResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.endpoints: + body["endpoints"] = [v.as_dict() for v in self.endpoints] + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ListEndpointsResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.endpoints: + body["endpoints"] = self.endpoints + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ListEndpointsResponse: + """Deserializes the ListEndpointsResponse from a dictionary.""" + return cls(endpoints=_repeated_dict(d, "endpoints", Endpoint), next_page_token=d.get("next_page_token", None)) + + +@dataclass +class ListIndexesResponse: + """Response for ListIndexes carrying the page of indexes and an optional continuation token.""" + + indexes: Optional[List[Index]] = None + """The indexes on the endpoint. The field is named `indexes` (not the irregular plural `indices`) + to satisfy core::0132, which derives the response field name from the ListIndexes method. + core::0158::response-plural-first-field independently computes the resource plural as `indices` + and is satisfied via a scoped field exception below.""" + + next_page_token: Optional[str] = None + """A token that can be used to get the next page of results. Empty when there are no more results.""" + + def as_dict(self) -> dict: + """Serializes the ListIndexesResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.indexes: + body["indexes"] = [v.as_dict() for v in self.indexes] + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ListIndexesResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.indexes: + body["indexes"] = self.indexes + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ListIndexesResponse: + """Deserializes the ListIndexesResponse from a dictionary.""" + return cls(indexes=_repeated_dict(d, "indexes", Index), next_page_token=d.get("next_page_token", None)) + + +class PipelineType(Enum): + """Pipeline execution mode for a Delta Sync index. Required on create for Delta Sync indexes; the + legacy backend rejects an unset value with INVALID_PARAMETER_VALUE. - `TRIGGERED`: the pipeline + stops after refreshing the source table once, using the data available when the update started. + - `CONTINUOUS`: the pipeline processes new data as it arrives in the source table to keep the + index fresh.""" + + CONTINUOUS = "CONTINUOUS" + TRIGGERED = "TRIGGERED" + + +@dataclass +class QueryIndexResponse: + """Response for QueryIndex carrying the matched rows and their column metadata.""" + + facet_result: Optional[FacetResultData] = None + """Facet aggregation rows, when facets were requested.""" + + manifest: Optional[ResultManifest] = None + """Metadata describing the result columns.""" + + result: Optional[ResultData] = None + """The matched result rows.""" + + def as_dict(self) -> dict: + """Serializes the QueryIndexResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.facet_result: + body["facet_result"] = self.facet_result.as_dict() + if self.manifest: + body["manifest"] = self.manifest.as_dict() + if self.result: + body["result"] = self.result.as_dict() + return body + + def as_shallow_dict(self) -> dict: + """Serializes the QueryIndexResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.facet_result: + body["facet_result"] = self.facet_result + if self.manifest: + body["manifest"] = self.manifest + if self.result: + body["result"] = self.result + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> QueryIndexResponse: + """Deserializes the QueryIndexResponse from a dictionary.""" + return cls( + facet_result=_from_dict(d, "facet_result", FacetResultData), + manifest=_from_dict(d, "manifest", ResultManifest), + result=_from_dict(d, "result", ResultData), + ) + + +@dataclass +class RemoveDataResponse: + """Response for RemoveData.""" + + result: Optional[DataModificationResult] = None + """Per-row outcome of the delete.""" + + status: Optional[DataModificationStatus] = None + """Overall status of the delete.""" + + def as_dict(self) -> dict: + """Serializes the RemoveDataResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.result: + body["result"] = self.result.as_dict() + if self.status is not None: + body["status"] = self.status.value + return body + + def as_shallow_dict(self) -> dict: + """Serializes the RemoveDataResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.result: + body["result"] = self.result + if self.status is not None: + body["status"] = self.status + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> RemoveDataResponse: + """Deserializes the RemoveDataResponse from a dictionary.""" + return cls( + result=_from_dict(d, "result", DataModificationResult), status=_enum(d, "status", DataModificationStatus) + ) + + +@dataclass +class RerankerConfig: + """Configuration for reranking query results with a reranker model.""" + + model: Optional[str] = None + """Reranker identifier: "databricks_reranker" for the base model, or a Model Serving endpoint name + when `model_type` is MODEL_TYPE_FINETUNED.""" + + model_type: Optional[RerankerConfigModelType] = None + """Discriminator for how `model` is interpreted.""" + + parameters: Optional[RerankerConfigRerankerParameters] = None + """Parameters controlling reranking.""" + + def as_dict(self) -> dict: + """Serializes the RerankerConfig into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.model is not None: + body["model"] = self.model + if self.model_type is not None: + body["model_type"] = self.model_type.value + if self.parameters: + body["parameters"] = self.parameters.as_dict() + return body + + def as_shallow_dict(self) -> dict: + """Serializes the RerankerConfig into a shallow dictionary of its immediate attributes.""" + body = {} + if self.model is not None: + body["model"] = self.model + if self.model_type is not None: + body["model_type"] = self.model_type + if self.parameters: + body["parameters"] = self.parameters + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> RerankerConfig: + """Deserializes the RerankerConfig from a dictionary.""" + return cls( + model=d.get("model", None), + model_type=_enum(d, "model_type", RerankerConfigModelType), + parameters=_from_dict(d, "parameters", RerankerConfigRerankerParameters), + ) + + +class RerankerConfigModelType(Enum): + """How the `model` field is interpreted.""" + + MODEL_TYPE_BASE = "MODEL_TYPE_BASE" + MODEL_TYPE_FINETUNED = "MODEL_TYPE_FINETUNED" + + +@dataclass +class RerankerConfigRerankerParameters: + """Parameters controlling how the reranker processes results.""" + + columns_to_rerank: Optional[List[str]] = None + """Columns whose values are concatenated and sent to the reranker.""" + + def as_dict(self) -> dict: + """Serializes the RerankerConfigRerankerParameters into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.columns_to_rerank: + body["columns_to_rerank"] = [v for v in self.columns_to_rerank] + return body + + def as_shallow_dict(self) -> dict: + """Serializes the RerankerConfigRerankerParameters into a shallow dictionary of its immediate attributes.""" + body = {} + if self.columns_to_rerank: + body["columns_to_rerank"] = self.columns_to_rerank + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> RerankerConfigRerankerParameters: + """Deserializes the RerankerConfigRerankerParameters from a dictionary.""" + return cls(columns_to_rerank=d.get("columns_to_rerank", None)) + + +@dataclass +class ResultData: + """The rows of a query result set.""" + + data_array: Optional[List[List[any]]] = None + """Result rows; each row is a list of column values aligned with the manifest columns.""" + + row_count: Optional[int] = None + """Number of rows in the result set.""" + + def as_dict(self) -> dict: + """Serializes the ResultData into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.data_array: + body["data_array"] = [v for v in self.data_array] + if self.row_count is not None: + body["row_count"] = self.row_count + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ResultData into a shallow dictionary of its immediate attributes.""" + body = {} + if self.data_array: + body["data_array"] = self.data_array + if self.row_count is not None: + body["row_count"] = self.row_count + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ResultData: + """Deserializes the ResultData from a dictionary.""" + return cls(data_array=d.get("data_array", None), row_count=d.get("row_count", None)) + + +@dataclass +class ResultManifest: + """Metadata describing the columns of a query result set.""" + + column_count: Optional[int] = None + """Number of columns in the result set.""" + + columns: Optional[List[ColumnInfo]] = None + """Information about each column in the result set.""" + + facet_column_count: Optional[int] = None + """Number of columns in the facet result.""" + + facet_columns: Optional[List[ColumnInfo]] = None + """Information about each facet column.""" + + def as_dict(self) -> dict: + """Serializes the ResultManifest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.column_count is not None: + body["column_count"] = self.column_count + if self.columns: + body["columns"] = [v.as_dict() for v in self.columns] + if self.facet_column_count is not None: + body["facet_column_count"] = self.facet_column_count + if self.facet_columns: + body["facet_columns"] = [v.as_dict() for v in self.facet_columns] + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ResultManifest into a shallow dictionary of its immediate attributes.""" + body = {} + if self.column_count is not None: + body["column_count"] = self.column_count + if self.columns: + body["columns"] = self.columns + if self.facet_column_count is not None: + body["facet_column_count"] = self.facet_column_count + if self.facet_columns: + body["facet_columns"] = self.facet_columns + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ResultManifest: + """Deserializes the ResultManifest from a dictionary.""" + return cls( + column_count=d.get("column_count", None), + columns=_repeated_dict(d, "columns", ColumnInfo), + facet_column_count=d.get("facet_column_count", None), + facet_columns=_repeated_dict(d, "facet_columns", ColumnInfo), + ) + + +class ScalingChangeState(Enum): + """State of the most recent scaling change request for a Storage Optimized endpoint.""" + + SCALING_CHANGE_APPLIED = "SCALING_CHANGE_APPLIED" + SCALING_CHANGE_IN_PROGRESS = "SCALING_CHANGE_IN_PROGRESS" + SCALING_CHANGE_UNSPECIFIED = "SCALING_CHANGE_UNSPECIFIED" + + +@dataclass +class ScanIndexResponse: + """Response for ScanIndex carrying a page of rows and an optional continuation token.""" + + data: Optional[List[Dict[str, any]]] = None + """The rows in this page, each a struct of column name to value.""" + + next_page_token: Optional[str] = None + """Token for the next page; empty when the scan is exhausted.""" + + def as_dict(self) -> dict: + """Serializes the ScanIndexResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.data: + body["data"] = [v for v in self.data] + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ScanIndexResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.data: + body["data"] = self.data + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ScanIndexResponse: + """Deserializes the ScanIndexResponse from a dictionary.""" + return cls(data=d.get("data", None), next_page_token=d.get("next_page_token", None)) + + +@dataclass +class SyncIndexResponse: + """Response for SyncIndex. Empty today; reserved so future sync metadata (e.g. an operation handle) + can be added without breaking the wire contract.""" + + def as_dict(self) -> dict: + """Serializes the SyncIndexResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + return body + + def as_shallow_dict(self) -> dict: + """Serializes the SyncIndexResponse into a shallow dictionary of its immediate attributes.""" + body = {} + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> SyncIndexResponse: + """Deserializes the SyncIndexResponse from a dictionary.""" + return cls() + + +class ThroughputChangeRequestState(Enum): + """State of the most recent throughput change request issued against a Storage Optimized endpoint. + Surfaced on `EndpointThroughputInfo.change_request_state`.""" + + CHANGE_ADJUSTED = "CHANGE_ADJUSTED" + CHANGE_FAILED = "CHANGE_FAILED" + CHANGE_IN_PROGRESS = "CHANGE_IN_PROGRESS" + CHANGE_REACHED_MAXIMUM = "CHANGE_REACHED_MAXIMUM" + CHANGE_REACHED_MINIMUM = "CHANGE_REACHED_MINIMUM" + CHANGE_SUCCESS = "CHANGE_SUCCESS" + + +@dataclass +class UpsertDataResponse: + """Response for UpsertData.""" + + result: Optional[DataModificationResult] = None + """Per-row outcome of the upsert.""" + + status: Optional[DataModificationStatus] = None + """Overall status of the upsert.""" + + def as_dict(self) -> dict: + """Serializes the UpsertDataResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.result: + body["result"] = self.result.as_dict() + if self.status is not None: + body["status"] = self.status.value + return body + + def as_shallow_dict(self) -> dict: + """Serializes the UpsertDataResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.result: + body["result"] = self.result + if self.status is not None: + body["status"] = self.status + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> UpsertDataResponse: + """Deserializes the UpsertDataResponse from a dictionary.""" + return cls( + result=_from_dict(d, "result", DataModificationResult), status=_enum(d, "status", DataModificationStatus) + ) + + +class AiSearchAPI: + """**AI Search Endpoint**: Represents the compute resources to host AI Search indexes. AIP-conformant + replacement for the legacy VectorSearchEndpoints API; functionally equivalent.""" + + def __init__(self, api_client): + self._api = api_client + + def create_endpoint(self, parent: str, endpoint: Endpoint, *, endpoint_id: Optional[str] = None) -> Endpoint: + """Create a new AI Search endpoint. + + :param parent: str + The Workspace where this Endpoint will be created. Format: `workspaces/{workspace_id}` + :param endpoint: :class:`Endpoint` + The Endpoint resource to create. Fields other than `endpoint.name` carry the desired configuration; + `endpoint.name` is server-assigned from `parent` and `endpoint_id`. + :param endpoint_id: str (optional) + The user-supplied short name for the Endpoint, per AIP-133. The server composes the full + `Endpoint.name` as `{parent}/endpoints/{endpoint_id}`. AIP-133 does not list `endpoint_id` as a + fields-may-be-required entry, so we annotate it OPTIONAL on the wire; the server still rejects empty + values with INVALID_PARAMETER_VALUE. + + :returns: :class:`Endpoint` + """ + + body = endpoint.as_dict() + query = {} + if endpoint_id is not None: + query["endpoint_id"] = endpoint_id + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", f"/api/2.0/ai-search/{parent}/endpoints", query=query, body=body, headers=headers) + return Endpoint.from_dict(res) + + def create_index(self, parent: str, index: Index, *, index_id: Optional[str] = None) -> Index: + """Create a new AI Search index. + + :param parent: str + The Endpoint where this Index will be created. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}` + :param index: :class:`Index` + The Index resource to create. Fields other than `index.name` carry the desired configuration; + `index.name` is server-assigned from `parent` and `index_id`. + :param index_id: str (optional) + The user-supplied Unity Catalog table name for the Index, per AIP-133. The server composes the full + `Index.name` as `{parent}/indexes/{index_id}`. AIP-133 does not list `index_id` as a + fields-may-be-required entry, so we annotate it OPTIONAL on the wire; the server still rejects empty + values with INVALID_PARAMETER_VALUE. + + :returns: :class:`Index` + """ + + body = index.as_dict() + query = {} + if index_id is not None: + query["index_id"] = index_id + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", f"/api/2.0/ai-search/{parent}/indexes", query=query, body=body, headers=headers) + return Index.from_dict(res) + + def delete_endpoint(self, name: str): + """Delete an AI Search endpoint. + + :param name: str + Full resource name of the endpoint to delete. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}` + + + """ + + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + self._api.do("DELETE", f"/api/2.0/ai-search/{name}", headers=headers) + + def delete_index(self, name: str): + """Delete an AI Search index. + + :param name: str + Full resource name of the index to delete. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + + + """ + + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + self._api.do("DELETE", f"/api/2.0/ai-search/{name}", headers=headers) + + def get_endpoint(self, name: str) -> Endpoint: + """Get details for a single AI Search endpoint. + + :param name: str + Full resource name of the endpoint. Format: `workspaces/{workspace_id}/endpoints/{endpoint_id}` + + :returns: :class:`Endpoint` + """ + + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("GET", f"/api/2.0/ai-search/{name}", headers=headers) + return Endpoint.from_dict(res) + + def get_index(self, name: str) -> Index: + """Get details for a single AI Search index. + + :param name: str + Full resource name of the index. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + + :returns: :class:`Index` + """ + + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("GET", f"/api/2.0/ai-search/{name}", headers=headers) + return Index.from_dict(res) + + def list_endpoints( + self, parent: str, *, page_size: Optional[int] = None, page_token: Optional[str] = None + ) -> Iterator[Endpoint]: + """List AI Search endpoints in a workspace. + + :param parent: str + The Workspace that owns this collection of endpoints. Format: `workspaces/{workspace_id}` + :param page_size: int (optional) + Best-effort upper bound on the number of results to return. Honored as an upper bound by the shim: + `page_size` only narrows the legacy backend's response, never widens it, so the practical cap is + `min(page_size, legacy_fixed_page_size)`. + :param page_token: str (optional) + Page token from a previous response. If not provided, returns the first page. + + :returns: Iterator over :class:`Endpoint` + """ + + query = {} + if page_size is not None: + query["page_size"] = page_size + if page_token is not None: + query["page_token"] = page_token + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + while True: + json = self._api.do("GET", f"/api/2.0/ai-search/{parent}/endpoints", query=query, headers=headers) + if "endpoints" in json: + for v in json["endpoints"]: + yield Endpoint.from_dict(v) + if "next_page_token" not in json or not json["next_page_token"]: + return + query["page_token"] = json["next_page_token"] + + def list_indexes( + self, parent: str, *, page_size: Optional[int] = None, page_token: Optional[str] = None + ) -> Iterator[Index]: + """List AI Search indexes on an endpoint. + + :param parent: str + The Endpoint that owns this collection of indexes. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}` + :param page_size: int (optional) + Best-effort upper bound on the number of results to return. Honored as an upper bound by the shim: + `page_size` only narrows the legacy backend's response, never widens it, so the practical cap is + `min(page_size, legacy_fixed_page_size)`. + :param page_token: str (optional) + Page token from a previous response. If not provided, returns the first page. + + :returns: Iterator over :class:`Index` + """ + + query = {} + if page_size is not None: + query["page_size"] = page_size + if page_token is not None: + query["page_token"] = page_token + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + while True: + json = self._api.do("GET", f"/api/2.0/ai-search/{parent}/indexes", query=query, headers=headers) + if "indexes" in json: + for v in json["indexes"]: + yield Index.from_dict(v) + if "next_page_token" not in json or not json["next_page_token"]: + return + query["page_token"] = json["next_page_token"] + + def query_index( + self, + name: str, + columns: List[str], + *, + columns_to_rerank: Optional[List[str]] = None, + facets: Optional[List[str]] = None, + filters_json: Optional[str] = None, + max_results: Optional[int] = None, + query_columns: Optional[List[str]] = None, + query_text: Optional[str] = None, + query_type: Optional[str] = None, + query_vector: Optional[List[float]] = None, + reranker: Optional[RerankerConfig] = None, + score_threshold: Optional[float] = None, + sort_columns: Optional[List[str]] = None, + ) -> QueryIndexResponse: + """Query (search) an AI Search index. Read-only, so a read-scoped token may invoke it. + + :param name: str + Full resource name of the index to query. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + :param columns: List[str] + Column names to include in each result row. + :param columns_to_rerank: List[str] (optional) + Columns whose values are sent to the reranker. + :param facets: List[str] (optional) + Facets to compute over the matched results (e.g. `"category TOP 5"`). + :param filters_json: str (optional) + JSON string describing query filters (e.g. `{"id >": 5}`). + :param max_results: int (optional) + Maximum number of results to return (the legacy `num_results`). Defaults to 10. + :param query_columns: List[str] (optional) + Text columns to search for `query_text`. When empty, all text columns are searched. + :param query_text: str (optional) + Query text. Required for Delta Sync indexes that compute embeddings from a model endpoint. + :param query_type: str (optional) + Query type: `ANN`, `HYBRID`, or `FULL_TEXT`. Defaults to `ANN`. + :param query_vector: List[float] (optional) + Query vector. Required for Direct Access indexes and Delta Sync indexes with self-managed vectors. + :param reranker: :class:`RerankerConfig` (optional) + If set, results are reranked before being returned. + :param score_threshold: float (optional) + Score threshold for the approximate nearest-neighbor search. Defaults to 0.0. + :param sort_columns: List[str] (optional) + Sort clauses, e.g. `["rating DESC", "price ASC"]`. Overrides relevance ordering. + + :returns: :class:`QueryIndexResponse` + """ + + body = {} + if columns is not None: + body["columns"] = [v for v in columns] + if columns_to_rerank is not None: + body["columns_to_rerank"] = [v for v in columns_to_rerank] + if facets is not None: + body["facets"] = [v for v in facets] + if filters_json is not None: + body["filters_json"] = filters_json + if max_results is not None: + body["max_results"] = max_results + if query_columns is not None: + body["query_columns"] = [v for v in query_columns] + if query_text is not None: + body["query_text"] = query_text + if query_type is not None: + body["query_type"] = query_type + if query_vector is not None: + body["query_vector"] = [v for v in query_vector] + if reranker is not None: + body["reranker"] = reranker.as_dict() + if score_threshold is not None: + body["score_threshold"] = score_threshold + if sort_columns is not None: + body["sort_columns"] = [v for v in sort_columns] + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", f"/api/2.0/ai-search/{name}:query", body=body, headers=headers) + return QueryIndexResponse.from_dict(res) + + def remove_data(self, name: str, primary_keys: List[str]) -> RemoveDataResponse: + """Remove rows by primary key from a Direct Access AI Search index. + + :param name: str + Full resource name of the index. Must be a Direct Access index. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + :param primary_keys: List[str] + Primary keys of the rows to remove. + + :returns: :class:`RemoveDataResponse` + """ + + body = {} + if primary_keys is not None: + body["primary_keys"] = [v for v in primary_keys] + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", f"/api/2.0/ai-search/{name}:removeData", body=body, headers=headers) + return RemoveDataResponse.from_dict(res) + + def scan_index( + self, name: str, *, page_size: Optional[int] = None, page_token: Optional[str] = None + ) -> ScanIndexResponse: + """Scan (paginate over) the rows of an AI Search index. + + :param name: str + Full resource name of the index to scan. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + :param page_size: int (optional) + Maximum number of rows to return in this page. + :param page_token: str (optional) + Page token from a previous response; if unset, scanning starts from the beginning. + + :returns: :class:`ScanIndexResponse` + """ + + body = {} + if page_size is not None: + body["page_size"] = page_size + if page_token is not None: + body["page_token"] = page_token + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", f"/api/2.0/ai-search/{name}:scan", body=body, headers=headers) + return ScanIndexResponse.from_dict(res) + + def sync_index(self, name: str) -> SyncIndexResponse: + """Synchronize a Delta Sync AI Search index with its source Delta table. Applies only to Delta Sync + indexes; Direct Access indexes are written via the data-plane upsert path. + + :param name: str + Full resource name of the index to synchronize. Must be a Delta Sync index. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + + :returns: :class:`SyncIndexResponse` + """ + + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", f"/api/2.0/ai-search/{name}:sync", headers=headers) + return SyncIndexResponse.from_dict(res) + + def update_endpoint(self, name: str, endpoint: Endpoint, update_mask: FieldMask) -> Endpoint: + """Update an existing AI Search endpoint. Multi-bucket masks are supported and dispatched in + deterministic bucket order: budget policy, custom tags, throughput, then scaling/replicas. Per-bucket + dispatch is not atomic across buckets — if a later bucket fails, earlier buckets may already have + been applied. + + :param name: str + Name of the AI Search endpoint. Server-assigned full resource path + (`workspaces/{workspace}/endpoints/{endpoint}`) on output. On create, the user-supplied short name + is conveyed via `CreateEndpointRequest.endpoint_id`; the server composes the full `name` and returns + it on the response. + :param endpoint: :class:`Endpoint` + The Endpoint resource to update. `endpoint.name` carries the full resource path. + :param update_mask: FieldMask + The list of fields to update. + + :returns: :class:`Endpoint` + """ + + body = endpoint.as_dict() + query = {} + if update_mask is not None: + query["update_mask"] = update_mask.ToJsonString() + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("PATCH", f"/api/2.0/ai-search/{name}", query=query, body=body, headers=headers) + return Endpoint.from_dict(res) + + def upsert_data(self, name: str, inputs_json: str) -> UpsertDataResponse: + """Upsert rows into a Direct Access AI Search index. + + :param name: str + Full resource name of the index. Must be a Direct Access index. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + :param inputs_json: str + JSON document describing the rows to upsert. + + :returns: :class:`UpsertDataResponse` + """ + + body = {} + if inputs_json is not None: + body["inputs_json"] = inputs_json + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", f"/api/2.0/ai-search/{name}:upsertData", body=body, headers=headers) + return UpsertDataResponse.from_dict(res) diff --git a/databricks/sdk/service/apps.py b/databricks/sdk/service/apps.py index e052c6f2a..44108700d 100755 --- a/databricks/sdk/service/apps.py +++ b/databricks/sdk/service/apps.py @@ -1776,6 +1776,9 @@ class ApplicationStatus: message: Optional[str] = None """Application status message""" + running_instances: Optional[int] = None + """The number of running instances of this application.""" + state: Optional[ApplicationState] = None """State of the application.""" @@ -1784,6 +1787,8 @@ def as_dict(self) -> dict: body = {} if self.message is not None: body["message"] = self.message + if self.running_instances is not None: + body["running_instances"] = self.running_instances if self.state is not None: body["state"] = self.state.value return body @@ -1793,6 +1798,8 @@ def as_shallow_dict(self) -> dict: body = {} if self.message is not None: body["message"] = self.message + if self.running_instances is not None: + body["running_instances"] = self.running_instances if self.state is not None: body["state"] = self.state return body @@ -1800,7 +1807,11 @@ def as_shallow_dict(self) -> dict: @classmethod def from_dict(cls, d: Dict[str, Any]) -> ApplicationStatus: """Deserializes the ApplicationStatus from a dictionary.""" - return cls(message=d.get("message", None), state=_enum(d, "state", ApplicationState)) + return cls( + message=d.get("message", None), + running_instances=d.get("running_instances", None), + state=_enum(d, "state", ApplicationState), + ) class ComputeSize(Enum): diff --git a/databricks/sdk/service/bundledeployments.py b/databricks/sdk/service/bundledeployments.py new file mode 100755 index 000000000..b1c3922a3 --- /dev/null +++ b/databricks/sdk/service/bundledeployments.py @@ -0,0 +1,1272 @@ +# Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. +# ruff: noqa: F811, F841 +# F401 is intentionally NOT covered: `make fmt` uses `ruff check --fix-only` +# to strip the fat-import header below; ignoring F401 would defeat that. + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from enum import Enum +from typing import Any, Dict, Iterator, List, Optional + +from google.protobuf.timestamp_pb2 import Timestamp + +from databricks.sdk.service._internal import ( + _enum, + _from_dict, + _repeated_dict, + _timestamp, +) + +_LOG = logging.getLogger("databricks.sdk") + + +# all definitions in this file are in alphabetical order + + +@dataclass +class Deployment: + """A bundle deployment registered with the control plane.""" + + create_time: Optional[Timestamp] = None + """When the deployment was created.""" + + created_by: Optional[str] = None + """The user who created the deployment (email or principal name).""" + + deployment_mode: Optional[DeploymentMode] = None + """Bundle target deployment mode (development or production), derived from the most recent + version's mode.""" + + destroy_time: Optional[Timestamp] = None + """When the deployment was destroyed (i.e. `bundle destroy` completed). Unset if the deployment has + not been destroyed. Named destroy_time (not delete_time) because this tracks the `databricks + bundle destroy` command, not the API-level deletion.""" + + destroyed_by: Optional[str] = None + """The user who destroyed the deployment (email or principal name). Unset if the deployment has not + been destroyed.""" + + display_name: Optional[str] = None + """Human-readable name for the deployment. Output only: it is denormalized from the latest version, + not set directly on the deployment.""" + + git_info: Optional[GitInfo] = None + """Git provenance of the deployment's source, derived from the latest version.""" + + last_version_id: Optional[str] = None + """The version_id of the most recent deployment version.""" + + name: Optional[str] = None + """Resource name of the deployment. Format: deployments/{deployment_id}""" + + status: Optional[DeploymentStatus] = None + """Current status of the deployment.""" + + target_name: Optional[str] = None + """The bundle target name associated with this deployment. Output only: it is denormalized from the + latest version, not set directly on the deployment.""" + + update_time: Optional[Timestamp] = None + """When the deployment was last updated.""" + + workspace_info: Optional[WorkspaceInfo] = None + """Workspace location of the deployment, derived from the latest version.""" + + def as_dict(self) -> dict: + """Serializes the Deployment into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.create_time is not None: + body["create_time"] = self.create_time.ToJsonString() + if self.created_by is not None: + body["created_by"] = self.created_by + if self.deployment_mode is not None: + body["deployment_mode"] = self.deployment_mode.value + if self.destroy_time is not None: + body["destroy_time"] = self.destroy_time.ToJsonString() + if self.destroyed_by is not None: + body["destroyed_by"] = self.destroyed_by + if self.display_name is not None: + body["display_name"] = self.display_name + if self.git_info: + body["git_info"] = self.git_info.as_dict() + if self.last_version_id is not None: + body["last_version_id"] = self.last_version_id + if self.name is not None: + body["name"] = self.name + if self.status is not None: + body["status"] = self.status.value + if self.target_name is not None: + body["target_name"] = self.target_name + if self.update_time is not None: + body["update_time"] = self.update_time.ToJsonString() + if self.workspace_info: + body["workspace_info"] = self.workspace_info.as_dict() + return body + + def as_shallow_dict(self) -> dict: + """Serializes the Deployment into a shallow dictionary of its immediate attributes.""" + body = {} + if self.create_time is not None: + body["create_time"] = self.create_time + if self.created_by is not None: + body["created_by"] = self.created_by + if self.deployment_mode is not None: + body["deployment_mode"] = self.deployment_mode + if self.destroy_time is not None: + body["destroy_time"] = self.destroy_time + if self.destroyed_by is not None: + body["destroyed_by"] = self.destroyed_by + if self.display_name is not None: + body["display_name"] = self.display_name + if self.git_info: + body["git_info"] = self.git_info + if self.last_version_id is not None: + body["last_version_id"] = self.last_version_id + if self.name is not None: + body["name"] = self.name + if self.status is not None: + body["status"] = self.status + if self.target_name is not None: + body["target_name"] = self.target_name + if self.update_time is not None: + body["update_time"] = self.update_time + if self.workspace_info: + body["workspace_info"] = self.workspace_info + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> Deployment: + """Deserializes the Deployment from a dictionary.""" + return cls( + create_time=_timestamp(d, "create_time"), + created_by=d.get("created_by", None), + deployment_mode=_enum(d, "deployment_mode", DeploymentMode), + destroy_time=_timestamp(d, "destroy_time"), + destroyed_by=d.get("destroyed_by", None), + display_name=d.get("display_name", None), + git_info=_from_dict(d, "git_info", GitInfo), + last_version_id=d.get("last_version_id", None), + name=d.get("name", None), + status=_enum(d, "status", DeploymentStatus), + target_name=d.get("target_name", None), + update_time=_timestamp(d, "update_time"), + workspace_info=_from_dict(d, "workspace_info", WorkspaceInfo), + ) + + +class DeploymentMode(Enum): + """Bundle target deployment mode. Mirrors the `mode` field on a bundle target in `databricks.yml` + (see https://docs.databricks.com/dev-tools/bundles/deployment-modes).""" + + DEPLOYMENT_MODE_DEVELOPMENT = "DEPLOYMENT_MODE_DEVELOPMENT" + DEPLOYMENT_MODE_PRODUCTION = "DEPLOYMENT_MODE_PRODUCTION" + + +class DeploymentResourceType(Enum): + """Type of a deployment resource.""" + + DEPLOYMENT_RESOURCE_TYPE_ALERT = "DEPLOYMENT_RESOURCE_TYPE_ALERT" + DEPLOYMENT_RESOURCE_TYPE_APP = "DEPLOYMENT_RESOURCE_TYPE_APP" + DEPLOYMENT_RESOURCE_TYPE_CATALOG = "DEPLOYMENT_RESOURCE_TYPE_CATALOG" + DEPLOYMENT_RESOURCE_TYPE_CLUSTER = "DEPLOYMENT_RESOURCE_TYPE_CLUSTER" + DEPLOYMENT_RESOURCE_TYPE_DASHBOARD = "DEPLOYMENT_RESOURCE_TYPE_DASHBOARD" + DEPLOYMENT_RESOURCE_TYPE_DATABASE_CATALOG = "DEPLOYMENT_RESOURCE_TYPE_DATABASE_CATALOG" + DEPLOYMENT_RESOURCE_TYPE_DATABASE_INSTANCE = "DEPLOYMENT_RESOURCE_TYPE_DATABASE_INSTANCE" + DEPLOYMENT_RESOURCE_TYPE_EXPERIMENT = "DEPLOYMENT_RESOURCE_TYPE_EXPERIMENT" + DEPLOYMENT_RESOURCE_TYPE_EXTERNAL_LOCATION = "DEPLOYMENT_RESOURCE_TYPE_EXTERNAL_LOCATION" + DEPLOYMENT_RESOURCE_TYPE_JOB = "DEPLOYMENT_RESOURCE_TYPE_JOB" + DEPLOYMENT_RESOURCE_TYPE_MODEL = "DEPLOYMENT_RESOURCE_TYPE_MODEL" + DEPLOYMENT_RESOURCE_TYPE_MODEL_SERVING_ENDPOINT = "DEPLOYMENT_RESOURCE_TYPE_MODEL_SERVING_ENDPOINT" + DEPLOYMENT_RESOURCE_TYPE_PIPELINE = "DEPLOYMENT_RESOURCE_TYPE_PIPELINE" + DEPLOYMENT_RESOURCE_TYPE_POSTGRES_BRANCH = "DEPLOYMENT_RESOURCE_TYPE_POSTGRES_BRANCH" + DEPLOYMENT_RESOURCE_TYPE_POSTGRES_ENDPOINT = "DEPLOYMENT_RESOURCE_TYPE_POSTGRES_ENDPOINT" + DEPLOYMENT_RESOURCE_TYPE_POSTGRES_PROJECT = "DEPLOYMENT_RESOURCE_TYPE_POSTGRES_PROJECT" + DEPLOYMENT_RESOURCE_TYPE_QUALITY_MONITOR = "DEPLOYMENT_RESOURCE_TYPE_QUALITY_MONITOR" + DEPLOYMENT_RESOURCE_TYPE_REGISTERED_MODEL = "DEPLOYMENT_RESOURCE_TYPE_REGISTERED_MODEL" + DEPLOYMENT_RESOURCE_TYPE_SCHEMA = "DEPLOYMENT_RESOURCE_TYPE_SCHEMA" + DEPLOYMENT_RESOURCE_TYPE_SECRET_SCOPE = "DEPLOYMENT_RESOURCE_TYPE_SECRET_SCOPE" + DEPLOYMENT_RESOURCE_TYPE_SQL_WAREHOUSE = "DEPLOYMENT_RESOURCE_TYPE_SQL_WAREHOUSE" + DEPLOYMENT_RESOURCE_TYPE_SYNCED_DATABASE_TABLE = "DEPLOYMENT_RESOURCE_TYPE_SYNCED_DATABASE_TABLE" + DEPLOYMENT_RESOURCE_TYPE_VOLUME = "DEPLOYMENT_RESOURCE_TYPE_VOLUME" + + +class DeploymentStatus(Enum): + """Status of a deployment.""" + + DEPLOYMENT_STATUS_ACTIVE = "DEPLOYMENT_STATUS_ACTIVE" + DEPLOYMENT_STATUS_DELETED = "DEPLOYMENT_STATUS_DELETED" + DEPLOYMENT_STATUS_FAILED = "DEPLOYMENT_STATUS_FAILED" + DEPLOYMENT_STATUS_IN_PROGRESS = "DEPLOYMENT_STATUS_IN_PROGRESS" + + +@dataclass +class GitInfo: + """Git provenance of a bundle's source, captured at deploy time. Lets consumers link a deployed + resource back to its source in version control.""" + + branch: Optional[str] = None + """Branch the source was deployed from.""" + + commit: Optional[str] = None + """Commit SHA of the deployed source.""" + + origin_url: Optional[str] = None + """URL of the git remote the source was deployed from.""" + + def as_dict(self) -> dict: + """Serializes the GitInfo into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.branch is not None: + body["branch"] = self.branch + if self.commit is not None: + body["commit"] = self.commit + if self.origin_url is not None: + body["origin_url"] = self.origin_url + return body + + def as_shallow_dict(self) -> dict: + """Serializes the GitInfo into a shallow dictionary of its immediate attributes.""" + body = {} + if self.branch is not None: + body["branch"] = self.branch + if self.commit is not None: + body["commit"] = self.commit + if self.origin_url is not None: + body["origin_url"] = self.origin_url + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> GitInfo: + """Deserializes the GitInfo from a dictionary.""" + return cls(branch=d.get("branch", None), commit=d.get("commit", None), origin_url=d.get("origin_url", None)) + + +@dataclass +class HeartbeatResponse: + """Response for Heartbeat.""" + + expire_time: Optional[Timestamp] = None + """The new lock expiry time after renewal.""" + + def as_dict(self) -> dict: + """Serializes the HeartbeatResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.expire_time is not None: + body["expire_time"] = self.expire_time.ToJsonString() + return body + + def as_shallow_dict(self) -> dict: + """Serializes the HeartbeatResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.expire_time is not None: + body["expire_time"] = self.expire_time + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> HeartbeatResponse: + """Deserializes the HeartbeatResponse from a dictionary.""" + return cls(expire_time=_timestamp(d, "expire_time")) + + +@dataclass +class ListDeploymentsResponse: + """Response for ListDeployments.""" + + deployments: Optional[List[Deployment]] = None + """The deployments from the queried workspace.""" + + next_page_token: Optional[str] = None + """A token, which can be sent as `page_token` to retrieve the next page. If this field is omitted, + there are no subsequent pages.""" + + def as_dict(self) -> dict: + """Serializes the ListDeploymentsResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.deployments: + body["deployments"] = [v.as_dict() for v in self.deployments] + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ListDeploymentsResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.deployments: + body["deployments"] = self.deployments + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ListDeploymentsResponse: + """Deserializes the ListDeploymentsResponse from a dictionary.""" + return cls( + deployments=_repeated_dict(d, "deployments", Deployment), next_page_token=d.get("next_page_token", None) + ) + + +@dataclass +class ListOperationsResponse: + """Response for ListOperations.""" + + next_page_token: Optional[str] = None + """A token, which can be sent as `page_token` to retrieve the next page. If this field is omitted, + there are no subsequent pages.""" + + operations: Optional[List[Operation]] = None + """The resource operations under the specified version.""" + + def as_dict(self) -> dict: + """Serializes the ListOperationsResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + if self.operations: + body["operations"] = [v.as_dict() for v in self.operations] + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ListOperationsResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + if self.operations: + body["operations"] = self.operations + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ListOperationsResponse: + """Deserializes the ListOperationsResponse from a dictionary.""" + return cls( + next_page_token=d.get("next_page_token", None), operations=_repeated_dict(d, "operations", Operation) + ) + + +@dataclass +class ListResourcesResponse: + """Response for ListResources.""" + + next_page_token: Optional[str] = None + """A token, which can be sent as `page_token` to retrieve the next page. If this field is omitted, + there are no subsequent pages.""" + + resources: Optional[List[Resource]] = None + """The resources under the specified deployment.""" + + def as_dict(self) -> dict: + """Serializes the ListResourcesResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + if self.resources: + body["resources"] = [v.as_dict() for v in self.resources] + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ListResourcesResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + if self.resources: + body["resources"] = self.resources + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ListResourcesResponse: + """Deserializes the ListResourcesResponse from a dictionary.""" + return cls(next_page_token=d.get("next_page_token", None), resources=_repeated_dict(d, "resources", Resource)) + + +@dataclass +class ListVersionsResponse: + """Response for ListVersions.""" + + next_page_token: Optional[str] = None + """A token, which can be sent as `page_token` to retrieve the next page. If this field is omitted, + there are no subsequent pages.""" + + versions: Optional[List[Version]] = None + """The versions under the specified deployment.""" + + def as_dict(self) -> dict: + """Serializes the ListVersionsResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + if self.versions: + body["versions"] = [v.as_dict() for v in self.versions] + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ListVersionsResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.next_page_token is not None: + body["next_page_token"] = self.next_page_token + if self.versions: + body["versions"] = self.versions + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ListVersionsResponse: + """Deserializes the ListVersionsResponse from a dictionary.""" + return cls(next_page_token=d.get("next_page_token", None), versions=_repeated_dict(d, "versions", Version)) + + +@dataclass +class Operation: + """An operation on a single resource performed during a version. Operations are append-only and + record the result of applying a resource change to the workspace.""" + + action_type: OperationActionType + """The type of operation performed on this resource.""" + + resource_id: str + """ID reference for the actual resource in the workspace (e.g. the job ID, pipeline ID).""" + + status: OperationStatus + """Whether the operation succeeded or failed.""" + + create_time: Optional[Timestamp] = None + """When the operation was recorded.""" + + error_message: Optional[str] = None + """Error message if the operation failed. Set when status is OPERATION_STATUS_FAILED. Captures the + error encountered while applying the resource to the workspace.""" + + name: Optional[str] = None + """Resource name of the operation. Format: + deployments/{deployment_id}/versions/{version_id}/operations/{resource_key}""" + + resource_key: Optional[str] = None + """Resource identifier within the bundle (e.g. "jobs.foo", "pipelines.bar", "jobs.foo.permissions", + "files."). Can be an arbitrary UTF-8 encoded string key. This key links the operation + to the corresponding deployment-level Resource.""" + + resource_type: Optional[DeploymentResourceType] = None + """The type of the deployment resource this operation applies to. Derived from the `resource_key` + prefix (e.g. "jobs" → JOB); the caller does not set this field.""" + + state: Optional[any] = None + """Serialized local config state after the operation. Should be unset for delete operations.""" + + def as_dict(self) -> dict: + """Serializes the Operation into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.action_type is not None: + body["action_type"] = self.action_type.value + if self.create_time is not None: + body["create_time"] = self.create_time.ToJsonString() + if self.error_message is not None: + body["error_message"] = self.error_message + if self.name is not None: + body["name"] = self.name + if self.resource_id is not None: + body["resource_id"] = self.resource_id + if self.resource_key is not None: + body["resource_key"] = self.resource_key + if self.resource_type is not None: + body["resource_type"] = self.resource_type.value + if self.state: + body["state"] = self.state + if self.status is not None: + body["status"] = self.status.value + return body + + def as_shallow_dict(self) -> dict: + """Serializes the Operation into a shallow dictionary of its immediate attributes.""" + body = {} + if self.action_type is not None: + body["action_type"] = self.action_type + if self.create_time is not None: + body["create_time"] = self.create_time + if self.error_message is not None: + body["error_message"] = self.error_message + if self.name is not None: + body["name"] = self.name + if self.resource_id is not None: + body["resource_id"] = self.resource_id + if self.resource_key is not None: + body["resource_key"] = self.resource_key + if self.resource_type is not None: + body["resource_type"] = self.resource_type + if self.state: + body["state"] = self.state + if self.status is not None: + body["status"] = self.status + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> Operation: + """Deserializes the Operation from a dictionary.""" + return cls( + action_type=_enum(d, "action_type", OperationActionType), + create_time=_timestamp(d, "create_time"), + error_message=d.get("error_message", None), + name=d.get("name", None), + resource_id=d.get("resource_id", None), + resource_key=d.get("resource_key", None), + resource_type=_enum(d, "resource_type", DeploymentResourceType), + state=d.get("state", None), + status=_enum(d, "status", OperationStatus), + ) + + +class OperationActionType(Enum): + """Type of action performed on a resource during a deployment.""" + + OPERATION_ACTION_TYPE_BIND = "OPERATION_ACTION_TYPE_BIND" + OPERATION_ACTION_TYPE_BIND_AND_UPDATE = "OPERATION_ACTION_TYPE_BIND_AND_UPDATE" + OPERATION_ACTION_TYPE_CREATE = "OPERATION_ACTION_TYPE_CREATE" + OPERATION_ACTION_TYPE_DELETE = "OPERATION_ACTION_TYPE_DELETE" + OPERATION_ACTION_TYPE_INITIAL_REGISTER = "OPERATION_ACTION_TYPE_INITIAL_REGISTER" + OPERATION_ACTION_TYPE_RECREATE = "OPERATION_ACTION_TYPE_RECREATE" + OPERATION_ACTION_TYPE_RESIZE = "OPERATION_ACTION_TYPE_RESIZE" + OPERATION_ACTION_TYPE_UPDATE = "OPERATION_ACTION_TYPE_UPDATE" + OPERATION_ACTION_TYPE_UPDATE_WITH_ID = "OPERATION_ACTION_TYPE_UPDATE_WITH_ID" + + +class OperationStatus(Enum): + """Status of a resource operation.""" + + OPERATION_STATUS_FAILED = "OPERATION_STATUS_FAILED" + OPERATION_STATUS_SUCCEEDED = "OPERATION_STATUS_SUCCEEDED" + + +@dataclass +class Resource: + """A resource managed by a deployment. Resources are implicitly created, updated, or deleted when + operations are recorded on a version.""" + + resource_type: DeploymentResourceType + """The type of the deployment resource.""" + + last_action_type: Optional[OperationActionType] = None + """The action performed on this resource during the last version.""" + + last_version_id: Optional[str] = None + """The version_id of the last version where this resource was updated.""" + + name: Optional[str] = None + """Resource name. Format: deployments/{deployment_id}/resources/{resource_key}""" + + resource_id: Optional[str] = None + """ID that references the actual resource in the workspace (e.g. the job ID, pipeline ID).""" + + resource_key: Optional[str] = None + """Resource identifier within the bundle (e.g. "jobs.foo", "pipelines.bar", + "jobs.foo.permissions").""" + + state: Optional[any] = None + """Serialized local config state (what the CLI deployed).""" + + def as_dict(self) -> dict: + """Serializes the Resource into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.last_action_type is not None: + body["last_action_type"] = self.last_action_type.value + if self.last_version_id is not None: + body["last_version_id"] = self.last_version_id + if self.name is not None: + body["name"] = self.name + if self.resource_id is not None: + body["resource_id"] = self.resource_id + if self.resource_key is not None: + body["resource_key"] = self.resource_key + if self.resource_type is not None: + body["resource_type"] = self.resource_type.value + if self.state: + body["state"] = self.state + return body + + def as_shallow_dict(self) -> dict: + """Serializes the Resource into a shallow dictionary of its immediate attributes.""" + body = {} + if self.last_action_type is not None: + body["last_action_type"] = self.last_action_type + if self.last_version_id is not None: + body["last_version_id"] = self.last_version_id + if self.name is not None: + body["name"] = self.name + if self.resource_id is not None: + body["resource_id"] = self.resource_id + if self.resource_key is not None: + body["resource_key"] = self.resource_key + if self.resource_type is not None: + body["resource_type"] = self.resource_type + if self.state: + body["state"] = self.state + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> Resource: + """Deserializes the Resource from a dictionary.""" + return cls( + last_action_type=_enum(d, "last_action_type", OperationActionType), + last_version_id=d.get("last_version_id", None), + name=d.get("name", None), + resource_id=d.get("resource_id", None), + resource_key=d.get("resource_key", None), + resource_type=_enum(d, "resource_type", DeploymentResourceType), + state=d.get("state", None), + ) + + +@dataclass +class Version: + """A single invocation of a deploy or destroy command against a deployment. Creating a version + acquires an exclusive lock on the parent deployment.""" + + cli_version: str + """CLI version used to initiate the version.""" + + version_type: VersionType + """Type of version (deploy or destroy).""" + + complete_time: Optional[Timestamp] = None + """When the version completed. Unset while the version is in progress.""" + + completed_by: Optional[str] = None + """The user who completed the version (email or principal name). May differ from `created_by` when + another user force-completes the version.""" + + completion_reason: Optional[VersionComplete] = None + """Why the version was completed. Unset while in progress. Set when status transitions to + COMPLETED.""" + + create_time: Optional[Timestamp] = None + """When the version was created.""" + + created_by: Optional[str] = None + """The user who created the version (email or principal name).""" + + deployment_mode: Optional[DeploymentMode] = None + """Bundle target deployment mode (development or production), captured at the time of this version.""" + + display_name: Optional[str] = None + """Display name for the deployment, captured at the time of this version.""" + + git_info: Optional[GitInfo] = None + """Git provenance of the source, captured at the time of this version.""" + + name: Optional[str] = None + """Resource name of the version. Format: deployments/{deployment_id}/versions/{version_id}""" + + status: Optional[VersionStatus] = None + """Status of the version: IN_PROGRESS or COMPLETED.""" + + target_name: Optional[str] = None + """Target name of the deployment, captured at the time of this version.""" + + version_id: Optional[str] = None + """Monotonically increasing version identifier within the parent deployment. Assigned by the client + on creation.""" + + workspace_info: Optional[WorkspaceInfo] = None + """Workspace location of the deployment, captured at the time of this version.""" + + def as_dict(self) -> dict: + """Serializes the Version into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.cli_version is not None: + body["cli_version"] = self.cli_version + if self.complete_time is not None: + body["complete_time"] = self.complete_time.ToJsonString() + if self.completed_by is not None: + body["completed_by"] = self.completed_by + if self.completion_reason is not None: + body["completion_reason"] = self.completion_reason.value + if self.create_time is not None: + body["create_time"] = self.create_time.ToJsonString() + if self.created_by is not None: + body["created_by"] = self.created_by + if self.deployment_mode is not None: + body["deployment_mode"] = self.deployment_mode.value + if self.display_name is not None: + body["display_name"] = self.display_name + if self.git_info: + body["git_info"] = self.git_info.as_dict() + if self.name is not None: + body["name"] = self.name + if self.status is not None: + body["status"] = self.status.value + if self.target_name is not None: + body["target_name"] = self.target_name + if self.version_id is not None: + body["version_id"] = self.version_id + if self.version_type is not None: + body["version_type"] = self.version_type.value + if self.workspace_info: + body["workspace_info"] = self.workspace_info.as_dict() + return body + + def as_shallow_dict(self) -> dict: + """Serializes the Version into a shallow dictionary of its immediate attributes.""" + body = {} + if self.cli_version is not None: + body["cli_version"] = self.cli_version + if self.complete_time is not None: + body["complete_time"] = self.complete_time + if self.completed_by is not None: + body["completed_by"] = self.completed_by + if self.completion_reason is not None: + body["completion_reason"] = self.completion_reason + if self.create_time is not None: + body["create_time"] = self.create_time + if self.created_by is not None: + body["created_by"] = self.created_by + if self.deployment_mode is not None: + body["deployment_mode"] = self.deployment_mode + if self.display_name is not None: + body["display_name"] = self.display_name + if self.git_info: + body["git_info"] = self.git_info + if self.name is not None: + body["name"] = self.name + if self.status is not None: + body["status"] = self.status + if self.target_name is not None: + body["target_name"] = self.target_name + if self.version_id is not None: + body["version_id"] = self.version_id + if self.version_type is not None: + body["version_type"] = self.version_type + if self.workspace_info: + body["workspace_info"] = self.workspace_info + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> Version: + """Deserializes the Version from a dictionary.""" + return cls( + cli_version=d.get("cli_version", None), + complete_time=_timestamp(d, "complete_time"), + completed_by=d.get("completed_by", None), + completion_reason=_enum(d, "completion_reason", VersionComplete), + create_time=_timestamp(d, "create_time"), + created_by=d.get("created_by", None), + deployment_mode=_enum(d, "deployment_mode", DeploymentMode), + display_name=d.get("display_name", None), + git_info=_from_dict(d, "git_info", GitInfo), + name=d.get("name", None), + status=_enum(d, "status", VersionStatus), + target_name=d.get("target_name", None), + version_id=d.get("version_id", None), + version_type=_enum(d, "version_type", VersionType), + workspace_info=_from_dict(d, "workspace_info", WorkspaceInfo), + ) + + +class VersionComplete(Enum): + """Reason why a version was completed.""" + + VERSION_COMPLETE_FAILURE = "VERSION_COMPLETE_FAILURE" + VERSION_COMPLETE_FORCE_ABORT = "VERSION_COMPLETE_FORCE_ABORT" + VERSION_COMPLETE_LEASE_EXPIRED = "VERSION_COMPLETE_LEASE_EXPIRED" + VERSION_COMPLETE_SUCCESS = "VERSION_COMPLETE_SUCCESS" + + +class VersionStatus(Enum): + """Status of a version.""" + + VERSION_STATUS_COMPLETED = "VERSION_STATUS_COMPLETED" + VERSION_STATUS_IN_PROGRESS = "VERSION_STATUS_IN_PROGRESS" + + +class VersionType(Enum): + """Type of version.""" + + VERSION_TYPE_DEPLOY = "VERSION_TYPE_DEPLOY" + VERSION_TYPE_DESTROY = "VERSION_TYPE_DESTROY" + + +@dataclass +class WorkspaceInfo: + """Workspace location of a bundle deployment, captured at deploy time.""" + + file_path: Optional[str] = None + """Absolute workspace path where the deployed bundle files live. Mirrors the workspace.file_path + field in DABs bundle config.""" + + git_folder_path: Optional[str] = None + """When deployed from a Databricks Git folder, the absolute workspace path of that folder; empty + for local deploys.""" + + root_path: Optional[str] = None + """Absolute workspace path of the deployment root — the base path the deployed files live under. + Mirrors workspace.root_path in the DABs bundle config; file_path is its files subdirectory.""" + + source_linked: Optional[bool] = None + """Whether files are served directly from the source sync root instead of being copied into + file_path.""" + + def as_dict(self) -> dict: + """Serializes the WorkspaceInfo into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.file_path is not None: + body["file_path"] = self.file_path + if self.git_folder_path is not None: + body["git_folder_path"] = self.git_folder_path + if self.root_path is not None: + body["root_path"] = self.root_path + if self.source_linked is not None: + body["source_linked"] = self.source_linked + return body + + def as_shallow_dict(self) -> dict: + """Serializes the WorkspaceInfo into a shallow dictionary of its immediate attributes.""" + body = {} + if self.file_path is not None: + body["file_path"] = self.file_path + if self.git_folder_path is not None: + body["git_folder_path"] = self.git_folder_path + if self.root_path is not None: + body["root_path"] = self.root_path + if self.source_linked is not None: + body["source_linked"] = self.source_linked + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> WorkspaceInfo: + """Deserializes the WorkspaceInfo from a dictionary.""" + return cls( + file_path=d.get("file_path", None), + git_folder_path=d.get("git_folder_path", None), + root_path=d.get("root_path", None), + source_linked=d.get("source_linked", None), + ) + + +class BundleDeploymentsAPI: + """Service for managing bundle deployment metadata.""" + + def __init__(self, api_client): + self._api = api_client + + def complete_version( + self, name: str, completion_reason: VersionComplete, *, force: Optional[bool] = None + ) -> Version: + """Marks a version as complete and releases the deployment lock. + + The server atomically: 1. Sets the version status to the provided terminal status. 2. Sets + `complete_time` to the current server timestamp. 3. Releases the lock on the parent deployment. 4. + Updates the parent deployment's `status` and `last_version_id`. + + :param name: str + The name of the version to complete. Format: deployments/{deployment_id}/versions/{version_id} + :param completion_reason: :class:`VersionComplete` + The reason for completing the version. Must be a terminal reason: VERSION_COMPLETE_SUCCESS, + VERSION_COMPLETE_FAILURE, or VERSION_COMPLETE_FORCE_ABORT. + :param force: bool (optional) + If true, force-completes the version even if the caller is not the original creator. The + completion_reason must be VERSION_COMPLETE_FORCE_ABORT when force is true. + + :returns: :class:`Version` + """ + + body = {} + if completion_reason is not None: + body["completion_reason"] = completion_reason.value + if force is not None: + body["force"] = force + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", f"/api/2.0/bundle/{name}/complete", body=body, headers=headers) + return Version.from_dict(res) + + def create_deployment(self, deployment: Deployment, deployment_id: str) -> Deployment: + """Creates a new deployment in the workspace. + + The caller must provide a `deployment_id` which becomes the final component of the deployment's + resource name. If a deployment with the same ID already exists, the server returns `ALREADY_EXISTS`. + + :param deployment: :class:`Deployment` + The deployment to create. + :param deployment_id: str + The ID to use for the deployment, which will become the final component of the deployment's resource + name (i.e. `deployments/{deployment_id}`). + + :returns: :class:`Deployment` + """ + + body = deployment.as_dict() + query = {} + if deployment_id is not None: + query["deployment_id"] = deployment_id + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", "/api/2.0/bundle/deployments", query=query, body=body, headers=headers) + return Deployment.from_dict(res) + + def create_operation(self, parent: str, operation: Operation, resource_key: str) -> Operation: + """Creates a resource operation under a version. + + The caller must provide a `resource_key` which becomes the final component of the operation's name. If + an operation with the same key already exists under the version, the server returns `ALREADY_EXISTS`. + + On success the server also updates the corresponding deployment-level Resource (creating it if this is + the first operation for that resource_key, or removing it if action_type is DELETE). + + :param parent: str + The parent version where this operation will be recorded. Format: + deployments/{deployment_id}/versions/{version_id} + :param operation: :class:`Operation` + The resource operation to create. + :param resource_key: str + The key identifying the resource this operation applies to. Becomes the final component of the + operation's name. + + :returns: :class:`Operation` + """ + + body = operation.as_dict() + query = {} + if resource_key is not None: + query["resource_key"] = resource_key + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", f"/api/2.0/bundle/{parent}/operations", query=query, body=body, headers=headers) + return Operation.from_dict(res) + + def create_version(self, parent: str, version: Version, version_id: str) -> Version: + """Creates a new version under a deployment. + + Creating a version acquires an exclusive lock on the deployment, preventing concurrent deploys. The + caller provides a `version_id` which the server validates equals `last_version_id + 1` on the + deployment. + + :param parent: str + The parent deployment where this version will be created. Format: deployments/{deployment_id} + :param version: :class:`Version` + The version to create. + :param version_id: str + The version ID the caller expects to create. The server validates this equals `last_version_id + 1` + on the deployment. If it doesn't match, the server returns `ABORTED`. + + :returns: :class:`Version` + """ + + body = version.as_dict() + query = {} + if version_id is not None: + query["version_id"] = version_id + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", f"/api/2.0/bundle/{parent}/versions", query=query, body=body, headers=headers) + return Version.from_dict(res) + + def delete_deployment(self, name: str): + """Deletes a deployment. + + The deployment is marked as deleted. It and all its children (versions and their operations) will be + permanently deleted after the retention policy expires. If the deployment has an in-progress version, + the server returns `RESOURCE_CONFLICT`. + + :param name: str + Resource name of the deployment to delete. Format: deployments/{deployment_id} + + + """ + + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + self._api.do("DELETE", f"/api/2.0/bundle/{name}", headers=headers) + + def get_deployment(self, name: str) -> Deployment: + """Retrieves a deployment by its resource name. + + :param name: str + Resource name of the deployment to retrieve. Format: deployments/{deployment_id} + + :returns: :class:`Deployment` + """ + + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("GET", f"/api/2.0/bundle/{name}", headers=headers) + return Deployment.from_dict(res) + + def get_operation(self, name: str) -> Operation: + """Retrieves a resource operation by its resource name. + + :param name: str + The name of the resource operation to retrieve. Format: + deployments/{deployment_id}/versions/{version_id}/operations/{resource_key} + + :returns: :class:`Operation` + """ + + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("GET", f"/api/2.0/bundle/{name}", headers=headers) + return Operation.from_dict(res) + + def get_resource(self, name: str) -> Resource: + """Retrieves a deployment resource by its resource name. + + :param name: str + The name of the resource to retrieve. Format: deployments/{deployment_id}/resources/{resource_key} + + :returns: :class:`Resource` + """ + + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("GET", f"/api/2.0/bundle/{name}", headers=headers) + return Resource.from_dict(res) + + def get_version(self, name: str) -> Version: + """Retrieves a version by its resource name. + + :param name: str + The name of the version to retrieve. Format: deployments/{deployment_id}/versions/{version_id} + + :returns: :class:`Version` + """ + + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("GET", f"/api/2.0/bundle/{name}", headers=headers) + return Version.from_dict(res) + + def heartbeat(self, name: str) -> HeartbeatResponse: + """Sends a heartbeat to renew the lock held by a version. + + The server validates that the version is the active (non-terminal) version on the parent deployment + and resets the lock expiry. If the lock has already expired or the version is no longer active, the + server returns `ABORTED`. + + :param name: str + The version whose lock to renew. Format: deployments/{deployment_id}/versions/{version_id} + + :returns: :class:`HeartbeatResponse` + """ + + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + res = self._api.do("POST", f"/api/2.0/bundle/{name}/heartbeat", headers=headers) + return HeartbeatResponse.from_dict(res) + + def list_deployments( + self, *, page_size: Optional[int] = None, page_token: Optional[str] = None + ) -> Iterator[Deployment]: + """Lists deployments in the workspace. + + :param page_size: int (optional) + The maximum number of deployments to return. The service may return fewer than this value. If + unspecified, at most 50 deployments will be returned. The maximum value is 1000; values above 1000 + will be coerced to 1000. + :param page_token: str (optional) + A page token, received from a previous `ListDeployments` call. Provide this to retrieve the + subsequent page. + + :returns: Iterator over :class:`Deployment` + """ + + query = {} + if page_size is not None: + query["page_size"] = page_size + if page_token is not None: + query["page_token"] = page_token + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + while True: + json = self._api.do("GET", "/api/2.0/bundle/deployments", query=query, headers=headers) + if "deployments" in json: + for v in json["deployments"]: + yield Deployment.from_dict(v) + if "next_page_token" not in json or not json["next_page_token"]: + return + query["page_token"] = json["next_page_token"] + + def list_operations( + self, parent: str, *, page_size: Optional[int] = None, page_token: Optional[str] = None + ) -> Iterator[Operation]: + """Lists resource operations under a version. + + :param parent: str + The parent version. Format: deployments/{deployment_id}/versions/{version_id} + :param page_size: int (optional) + The maximum number of operations to return. The service may return fewer than this value. If + unspecified, at most 50 operations will be returned. The maximum value is 1000; values above 1000 + will be coerced to 1000. + :param page_token: str (optional) + A page token, received from a previous `ListOperations` call. Provide this to retrieve the + subsequent page. + + :returns: Iterator over :class:`Operation` + """ + + query = {} + if page_size is not None: + query["page_size"] = page_size + if page_token is not None: + query["page_token"] = page_token + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + while True: + json = self._api.do("GET", f"/api/2.0/bundle/{parent}/operations", query=query, headers=headers) + if "operations" in json: + for v in json["operations"]: + yield Operation.from_dict(v) + if "next_page_token" not in json or not json["next_page_token"]: + return + query["page_token"] = json["next_page_token"] + + def list_resources( + self, parent: str, *, page_size: Optional[int] = None, page_token: Optional[str] = None + ) -> Iterator[Resource]: + """Lists resources under a deployment. + + :param parent: str + The parent deployment. Format: deployments/{deployment_id} + :param page_size: int (optional) + The maximum number of resources to return. The service may return fewer than this value. If + unspecified, at most 50 resources will be returned. The maximum value is 1000; values above 1000 + will be coerced to 1000. + :param page_token: str (optional) + A page token, received from a previous `ListResources` call. Provide this to retrieve the subsequent + page. + + :returns: Iterator over :class:`Resource` + """ + + query = {} + if page_size is not None: + query["page_size"] = page_size + if page_token is not None: + query["page_token"] = page_token + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + while True: + json = self._api.do("GET", f"/api/2.0/bundle/{parent}/resources", query=query, headers=headers) + if "resources" in json: + for v in json["resources"]: + yield Resource.from_dict(v) + if "next_page_token" not in json or not json["next_page_token"]: + return + query["page_token"] = json["next_page_token"] + + def list_versions( + self, parent: str, *, page_size: Optional[int] = None, page_token: Optional[str] = None + ) -> Iterator[Version]: + """Lists versions under a deployment, ordered by version_id descending (most recent first). + + :param parent: str + The parent deployment. Format: deployments/{deployment_id} + :param page_size: int (optional) + The maximum number of versions to return. The service may return fewer than this value. If + unspecified, at most 50 versions will be returned. The maximum value is 1000; values above 1000 will + be coerced to 1000. + :param page_token: str (optional) + A page token, received from a previous `ListVersions` call. Provide this to retrieve the subsequent + page. + + :returns: Iterator over :class:`Version` + """ + + query = {} + if page_size is not None: + query["page_size"] = page_size + if page_token is not None: + query["page_token"] = page_token + headers = { + "Accept": "application/json", + } + + cfg = self._api._cfg + if cfg.workspace_id: + headers["X-Databricks-Workspace-Id"] = cfg.workspace_id + + while True: + json = self._api.do("GET", f"/api/2.0/bundle/{parent}/versions", query=query, headers=headers) + if "versions" in json: + for v in json["versions"]: + yield Version.from_dict(v) + if "next_page_token" not in json or not json["next_page_token"]: + return + query["page_token"] = json["next_page_token"] diff --git a/databricks/sdk/service/catalog.py b/databricks/sdk/service/catalog.py index 1799f2426..45b8a75ee 100755 --- a/databricks/sdk/service/catalog.py +++ b/databricks/sdk/service/catalog.py @@ -1128,6 +1128,9 @@ class CatalogInfo: created_by: Optional[str] = None """Username of catalog creator.""" + custom_max_retention_hours: Optional[int] = None + """Custom maximum retention period in hours for the catalog""" + effective_predictive_optimization_flag: Optional[EffectivePredictiveOptimizationFlag] = None enable_predictive_optimization: Optional[EnablePredictiveOptimization] = None @@ -1196,6 +1199,8 @@ def as_dict(self) -> dict: body["created_at"] = self.created_at if self.created_by is not None: body["created_by"] = self.created_by + if self.custom_max_retention_hours is not None: + body["custom_max_retention_hours"] = self.custom_max_retention_hours if self.effective_predictive_optimization_flag: body["effective_predictive_optimization_flag"] = self.effective_predictive_optimization_flag.as_dict() if self.enable_predictive_optimization is not None: @@ -1249,6 +1254,8 @@ def as_shallow_dict(self) -> dict: body["created_at"] = self.created_at if self.created_by is not None: body["created_by"] = self.created_by + if self.custom_max_retention_hours is not None: + body["custom_max_retention_hours"] = self.custom_max_retention_hours if self.effective_predictive_optimization_flag: body["effective_predictive_optimization_flag"] = self.effective_predictive_optimization_flag if self.enable_predictive_optimization is not None: @@ -1297,6 +1304,7 @@ def from_dict(cls, d: Dict[str, Any]) -> CatalogInfo: connection_name=d.get("connection_name", None), created_at=d.get("created_at", None), created_by=d.get("created_by", None), + custom_max_retention_hours=d.get("custom_max_retention_hours", None), effective_predictive_optimization_flag=_from_dict( d, "effective_predictive_optimization_flag", EffectivePredictiveOptimizationFlag ), @@ -1695,6 +1703,9 @@ class ConnectionInfo: credential_type: Optional[CredentialType] = None """The type of credential.""" + environment_settings: Optional[EnvironmentSettings] = None + """[Create,Update:OPT] Connection environment settings as EnvironmentSettings object.""" + full_name: Optional[str] = None """Full name of connection.""" @@ -1744,6 +1755,8 @@ def as_dict(self) -> dict: body["created_by"] = self.created_by if self.credential_type is not None: body["credential_type"] = self.credential_type.value + if self.environment_settings: + body["environment_settings"] = self.environment_settings.as_dict() if self.full_name is not None: body["full_name"] = self.full_name if self.metastore_id is not None: @@ -1785,6 +1798,8 @@ def as_shallow_dict(self) -> dict: body["created_by"] = self.created_by if self.credential_type is not None: body["credential_type"] = self.credential_type + if self.environment_settings: + body["environment_settings"] = self.environment_settings if self.full_name is not None: body["full_name"] = self.full_name if self.metastore_id is not None: @@ -1821,6 +1836,7 @@ def from_dict(cls, d: Dict[str, Any]) -> ConnectionInfo: created_at=d.get("created_at", None), created_by=d.get("created_by", None), credential_type=_enum(d, "credential_type", CredentialType), + environment_settings=_from_dict(d, "environment_settings", EnvironmentSettings), full_name=d.get("full_name", None), metastore_id=d.get("metastore_id", None), name=d.get("name", None), @@ -3444,6 +3460,38 @@ def from_dict(cls, d: Dict[str, Any]) -> EntityTagAssignment: ) +@dataclass +class EnvironmentSettings: + environment_version: Optional[str] = None + + java_dependencies: Optional[List[str]] = None + + def as_dict(self) -> dict: + """Serializes the EnvironmentSettings into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.environment_version is not None: + body["environment_version"] = self.environment_version + if self.java_dependencies: + body["java_dependencies"] = [v for v in self.java_dependencies] + return body + + def as_shallow_dict(self) -> dict: + """Serializes the EnvironmentSettings into a shallow dictionary of its immediate attributes.""" + body = {} + if self.environment_version is not None: + body["environment_version"] = self.environment_version + if self.java_dependencies: + body["java_dependencies"] = self.java_dependencies + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> EnvironmentSettings: + """Deserializes the EnvironmentSettings from a dictionary.""" + return cls( + environment_version=d.get("environment_version", None), java_dependencies=d.get("java_dependencies", None) + ) + + @dataclass class ExternalLineageExternalMetadata: name: Optional[str] = None @@ -8874,6 +8922,9 @@ class SchemaInfo: created_by: Optional[str] = None """Username of schema creator.""" + custom_max_retention_hours: Optional[int] = None + """Custom maximum retention period in hours for the schema.""" + effective_predictive_optimization_flag: Optional[EffectivePredictiveOptimizationFlag] = None enable_predictive_optimization: Optional[EnablePredictiveOptimization] = None @@ -8924,6 +8975,8 @@ def as_dict(self) -> dict: body["created_at"] = self.created_at if self.created_by is not None: body["created_by"] = self.created_by + if self.custom_max_retention_hours is not None: + body["custom_max_retention_hours"] = self.custom_max_retention_hours if self.effective_predictive_optimization_flag: body["effective_predictive_optimization_flag"] = self.effective_predictive_optimization_flag.as_dict() if self.enable_predictive_optimization is not None: @@ -8965,6 +9018,8 @@ def as_shallow_dict(self) -> dict: body["created_at"] = self.created_at if self.created_by is not None: body["created_by"] = self.created_by + if self.custom_max_retention_hours is not None: + body["custom_max_retention_hours"] = self.custom_max_retention_hours if self.effective_predictive_optimization_flag: body["effective_predictive_optimization_flag"] = self.effective_predictive_optimization_flag if self.enable_predictive_optimization is not None: @@ -9001,6 +9056,7 @@ def from_dict(cls, d: Dict[str, Any]) -> SchemaInfo: comment=d.get("comment", None), created_at=d.get("created_at", None), created_by=d.get("created_by", None), + custom_max_retention_hours=d.get("custom_max_retention_hours", None), effective_predictive_optimization_flag=_from_dict( d, "effective_predictive_optimization_flag", EffectivePredictiveOptimizationFlag ), @@ -9234,9 +9290,9 @@ def from_dict(cls, d: Dict[str, Any]) -> Securable: class SecurableKind(Enum): - """Latest kind: MEMORY_STORE_STANDARD = 342; Next id: 343. Reserved numbers: 316, 317, 327, 330, - 341 (former ENDPOINT_LLM_*, MODEL_SERVICE_STANDARD, MODEL_SERVICE_SYSTEM_DELTASHARING, - MCP_SERVICE_STANDARD).""" + """Latest kind: CONNECTION_ADOBE_CAMPAIGNS_OAUTH_M2M = 345; Next id: 346. Reserved numbers: 316, + 317, 327, 330, 341 (former ENDPOINT_LLM_*, MODEL_SERVICE_STANDARD, + MODEL_SERVICE_SYSTEM_DELTASHARING, MCP_SERVICE_STANDARD).""" TABLE_DB_STORAGE = "TABLE_DB_STORAGE" TABLE_DELTA = "TABLE_DELTA" @@ -11591,6 +11647,7 @@ def create( *, comment: Optional[str] = None, connection_name: Optional[str] = None, + custom_max_retention_hours: Optional[int] = None, managed_encryption_settings: Optional[EncryptionSettings] = None, options: Optional[Dict[str, str]] = None, properties: Optional[Dict[str, str]] = None, @@ -11607,6 +11664,8 @@ def create( User-provided free-form text description. :param connection_name: str (optional) The name of the connection to an external data source. + :param custom_max_retention_hours: int (optional) + Custom maximum retention period in hours for the catalog :param managed_encryption_settings: :class:`EncryptionSettings` (optional) Control CMK encryption for managed catalog data :param options: Dict[str,str] (optional) @@ -11630,6 +11689,8 @@ def create( body["comment"] = comment if connection_name is not None: body["connection_name"] = connection_name + if custom_max_retention_hours is not None: + body["custom_max_retention_hours"] = custom_max_retention_hours if managed_encryption_settings is not None: body["managed_encryption_settings"] = managed_encryption_settings.as_dict() if name is not None: @@ -11781,6 +11842,7 @@ def update( name: str, *, comment: Optional[str] = None, + custom_max_retention_hours: Optional[int] = None, enable_predictive_optimization: Optional[EnablePredictiveOptimization] = None, isolation_mode: Optional[CatalogIsolationMode] = None, managed_encryption_settings: Optional[EncryptionSettings] = None, @@ -11796,6 +11858,8 @@ def update( The name of the catalog. :param comment: str (optional) User-provided free-form text description. + :param custom_max_retention_hours: int (optional) + Custom maximum retention period in hours for the catalog :param enable_predictive_optimization: :class:`EnablePredictiveOptimization` (optional) Whether predictive optimization should be enabled for this object and objects under it. :param isolation_mode: :class:`CatalogIsolationMode` (optional) @@ -11817,6 +11881,8 @@ def update( body = {} if comment is not None: body["comment"] = comment + if custom_max_retention_hours is not None: + body["custom_max_retention_hours"] = custom_max_retention_hours if enable_predictive_optimization is not None: body["enable_predictive_optimization"] = enable_predictive_optimization.value if isolation_mode is not None: @@ -11864,6 +11930,7 @@ def create( options: Dict[str, str], *, comment: Optional[str] = None, + environment_settings: Optional[EnvironmentSettings] = None, properties: Optional[Dict[str, str]] = None, read_only: Optional[bool] = None, ) -> ConnectionInfo: @@ -11880,6 +11947,8 @@ def create( A map of key-value properties attached to the securable. :param comment: str (optional) User-provided free-form text description. + :param environment_settings: :class:`EnvironmentSettings` (optional) + [Create,Update:OPT] Connection environment settings as EnvironmentSettings object. :param properties: Dict[str,str] (optional) A map of key-value properties attached to the securable. :param read_only: bool (optional) @@ -11893,6 +11962,8 @@ def create( body["comment"] = comment if connection_type is not None: body["connection_type"] = connection_type.value + if environment_settings is not None: + body["environment_settings"] = environment_settings.as_dict() if name is not None: body["name"] = name if options is not None: @@ -11998,7 +12069,13 @@ def list(self, *, max_results: Optional[int] = None, page_token: Optional[str] = query["page_token"] = json["next_page_token"] def update( - self, name: str, options: Dict[str, str], *, new_name: Optional[str] = None, owner: Optional[str] = None + self, + name: str, + options: Dict[str, str], + *, + environment_settings: Optional[EnvironmentSettings] = None, + new_name: Optional[str] = None, + owner: Optional[str] = None, ) -> ConnectionInfo: """Updates the connection that matches the supplied name. @@ -12006,6 +12083,8 @@ def update( Name of the connection. :param options: Dict[str,str] A map of key-value properties attached to the securable. + :param environment_settings: :class:`EnvironmentSettings` (optional) + [Create,Update:OPT] Connection environment settings as EnvironmentSettings object. :param new_name: str (optional) New name for the connection. :param owner: str (optional) @@ -12015,6 +12094,8 @@ def update( """ body = {} + if environment_settings is not None: + body["environment_settings"] = environment_settings.as_dict() if new_name is not None: body["new_name"] = new_name if options is not None: @@ -15723,6 +15804,7 @@ def create( catalog_name: str, *, comment: Optional[str] = None, + custom_max_retention_hours: Optional[int] = None, properties: Optional[Dict[str, str]] = None, storage_root: Optional[str] = None, ) -> SchemaInfo: @@ -15735,6 +15817,8 @@ def create( Name of parent catalog. :param comment: str (optional) User-provided free-form text description. + :param custom_max_retention_hours: int (optional) + Custom maximum retention period in hours for the schema. :param properties: Dict[str,str] (optional) A map of key-value properties attached to the securable. :param storage_root: str (optional) @@ -15748,6 +15832,8 @@ def create( body["catalog_name"] = catalog_name if comment is not None: body["comment"] = comment + if custom_max_retention_hours is not None: + body["custom_max_retention_hours"] = custom_max_retention_hours if name is not None: body["name"] = name if properties is not None: @@ -15887,6 +15973,7 @@ def update( full_name: str, *, comment: Optional[str] = None, + custom_max_retention_hours: Optional[int] = None, enable_predictive_optimization: Optional[EnablePredictiveOptimization] = None, new_name: Optional[str] = None, owner: Optional[str] = None, @@ -15901,6 +15988,8 @@ def update( Full name of the schema. :param comment: str (optional) User-provided free-form text description. + :param custom_max_retention_hours: int (optional) + Custom maximum retention period in hours for the schema. :param enable_predictive_optimization: :class:`EnablePredictiveOptimization` (optional) Whether predictive optimization should be enabled for this object and objects under it. :param new_name: str (optional) @@ -15916,6 +16005,8 @@ def update( body = {} if comment is not None: body["comment"] = comment + if custom_max_retention_hours is not None: + body["custom_max_retention_hours"] = custom_max_retention_hours if enable_predictive_optimization is not None: body["enable_predictive_optimization"] = enable_predictive_optimization.value if new_name is not None: diff --git a/databricks/sdk/service/disasterrecovery.py b/databricks/sdk/service/disasterrecovery.py index 32d51f9db..54ef9bf61 100755 --- a/databricks/sdk/service/disasterrecovery.py +++ b/databricks/sdk/service/disasterrecovery.py @@ -397,7 +397,11 @@ class UcReplicationConfig: """Unity Catalog replication configuration (top-level, not per-set).""" catalogs: List[UcCatalog] - """UC catalogs to replicate.""" + """UC catalogs to replicate. + + Mutable: catalogs may be added or removed on an existing failover group via UpdateFailoverGroup + with `unity_catalog_assets.catalogs` in the update_mask (gated by the + `databricks.drmanager.enableCatalogMutationOnUpdate` flag).""" data_replication_workspace_set: str """The workspace set whose workspaces will be used for data replication of all UC catalogs' diff --git a/databricks/sdk/service/jobs.py b/databricks/sdk/service/jobs.py index 35eb93086..75e4e5782 100755 --- a/databricks/sdk/service/jobs.py +++ b/databricks/sdk/service/jobs.py @@ -2037,6 +2037,11 @@ class Format(Enum): @dataclass class GenAiComputeTask: + """DEPRECATED — use `AiRuntimeTask` for all new BYOT multi-node GPU workloads (see + ai_runtime_task.proto). `AiRuntimeTask` is the only supported BYOT task type for new workloads; + this proto is retained only for AIR CLI (fka SGCLI) pywheel backwards compatibility and will be + removed once the pywheel → databricks-cli migration completes (post- PuPr).""" + dl_runtime_image: str """Runtime image""" diff --git a/databricks/sdk/service/ml.py b/databricks/sdk/service/ml.py index e1a3ffd54..7a425501b 100755 --- a/databricks/sdk/service/ml.py +++ b/databricks/sdk/service/ml.py @@ -1037,6 +1037,9 @@ class DataSource: request_source: Optional[RequestSource] = None """A request-time data source.""" + stream_source: Optional[StreamSource] = None + """A Stream data source.""" + def as_dict(self) -> dict: """Serializes the DataSource into a dictionary suitable for use as a JSON request body.""" body = {} @@ -1046,6 +1049,8 @@ def as_dict(self) -> dict: body["kafka_source"] = self.kafka_source.as_dict() if self.request_source: body["request_source"] = self.request_source.as_dict() + if self.stream_source: + body["stream_source"] = self.stream_source.as_dict() return body def as_shallow_dict(self) -> dict: @@ -1057,6 +1062,8 @@ def as_shallow_dict(self) -> dict: body["kafka_source"] = self.kafka_source if self.request_source: body["request_source"] = self.request_source + if self.stream_source: + body["stream_source"] = self.stream_source return body @classmethod @@ -1066,6 +1073,7 @@ def from_dict(cls, d: Dict[str, Any]) -> DataSource: delta_table_source=_from_dict(d, "delta_table_source", DeltaTableSource), kafka_source=_from_dict(d, "kafka_source", KafkaSource), request_source=_from_dict(d, "request_source", RequestSource), + stream_source=_from_dict(d, "stream_source", StreamSource), ) @@ -3258,6 +3266,9 @@ class KafkaConfig: """Catch-all for miscellaneous options. Keys should be source options or Kafka consumer options (kafka.*)""" + ingestion_config: Optional[IngestionConfig] = None + """Configuration for ingesting Kafka data into a Databricks-managed Delta table.""" + key_schema: Optional[SchemaConfig] = None """Schema configuration for extracting message keys from topics. At least one of key_schema and value_schema must be provided.""" @@ -3277,6 +3288,8 @@ def as_dict(self) -> dict: body["bootstrap_servers"] = self.bootstrap_servers if self.extra_options: body["extra_options"] = self.extra_options + if self.ingestion_config: + body["ingestion_config"] = self.ingestion_config.as_dict() if self.key_schema: body["key_schema"] = self.key_schema.as_dict() if self.name is not None: @@ -3298,6 +3311,8 @@ def as_shallow_dict(self) -> dict: body["bootstrap_servers"] = self.bootstrap_servers if self.extra_options: body["extra_options"] = self.extra_options + if self.ingestion_config: + body["ingestion_config"] = self.ingestion_config if self.key_schema: body["key_schema"] = self.key_schema if self.name is not None: @@ -3316,6 +3331,7 @@ def from_dict(cls, d: Dict[str, Any]) -> KafkaConfig: backfill_source=_from_dict(d, "backfill_source", BackfillSource), bootstrap_servers=d.get("bootstrap_servers", None), extra_options=d.get("extra_options", None), + ingestion_config=_from_dict(d, "ingestion_config", IngestionConfig), key_schema=_from_dict(d, "key_schema", SchemaConfig), name=d.get("name", None), subscription_mode=_from_dict(d, "subscription_mode", SubscriptionMode), @@ -4348,7 +4364,8 @@ class MaterializedFeature: cron_schedule: Optional[str] = None """The quartz cron expression that defines the schedule of the materialization pipeline. The - schedule is evaluated in the UTC timezone.""" + schedule is evaluated in the UTC timezone. Hidden from GraphQL: superseded by the `trigger` + oneof (cron_schedule_trigger), so not exposed to Catalog Explorer.""" cron_schedule_trigger: Optional[CronSchedule] = None """A cron-based schedule trigger for the materialization pipeline.""" @@ -4370,7 +4387,8 @@ class MaterializedFeature: """Destination for writing feature values to an online Lakebase table.""" pipeline_schedule_state: Optional[MaterializedFeaturePipelineScheduleState] = None - """The schedule state of the materialization pipeline.""" + """The schedule state of the materialization pipeline. Hidden from GraphQL: being deprecated, so + not exposed to Catalog Explorer.""" streaming_mode: Optional[StreamingMode] = None """The Structured Streaming trigger mode used for materialization. Real-time mode (RTM) targets @@ -7149,6 +7167,33 @@ def from_dict(cls, d: Dict[str, Any]) -> StreamSchemaConfig: return cls(direct_schemas=_from_dict(d, "direct_schemas", DirectSchemas)) +@dataclass +class StreamSource: + """A Stream entity used as a data source for a feature.""" + + full_name: str + """Three-part full name of the Stream (catalog.schema.stream).""" + + def as_dict(self) -> dict: + """Serializes the StreamSource into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.full_name is not None: + body["full_name"] = self.full_name + return body + + def as_shallow_dict(self) -> dict: + """Serializes the StreamSource into a shallow dictionary of its immediate attributes.""" + body = {} + if self.full_name is not None: + body["full_name"] = self.full_name + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> StreamSource: + """Deserializes the StreamSource from a dictionary.""" + return cls(full_name=d.get("full_name", None)) + + @dataclass class StreamSourceConfig: """Source-specific configuration. Determines the streaming platform source.""" diff --git a/databricks/sdk/service/pipelines.py b/databricks/sdk/service/pipelines.py index 33fb3eba4..16b192170 100755 --- a/databricks/sdk/service/pipelines.py +++ b/databricks/sdk/service/pipelines.py @@ -1836,31 +1836,34 @@ class MetaMarketingOptions: """Meta Marketing (Meta Ads) specific options for ingestion""" action_attribution_windows: Optional[List[str]] = None - """(Optional) Action attribution windows for insights reporting (e.g. "28d_click", "1d_view")""" + """(Optional, DEPRECATED — use custom_report_options.action_attribution_windows) Action + attribution windows for insights reporting (e.g. "28d_click", "1d_view")""" action_breakdowns: Optional[List[str]] = None - """(Optional) Action breakdowns to configure for data aggregation""" + """(Optional, DEPRECATED — use custom_report_options.action_breakdowns) Action breakdowns""" action_report_time: Optional[str] = None - """(Optional) Timing used to report action statistics (impression, conversion, mixed, or lifetime)""" + """(Optional, DEPRECATED — use custom_report_options.action_report_time) Timing used to report + action statistics (impression, conversion, mixed, or lifetime)""" breakdowns: Optional[List[str]] = None - """(Optional) Breakdowns to configure for data aggregation""" + """(Optional, DEPRECATED — use custom_report_options.breakdowns) Breakdowns to configure""" custom_insights_lookback_window: Optional[int] = None """(Optional) Window in days to revisit data during sync to capture updated conversion data from - the API.""" + the API, shared by prebuilt and custom reports.""" level: Optional[str] = None - """(Optional) Granularity of data to pull (account, ad, adset, campaign)""" + """(Optional, DEPRECATED — use custom_report_options.level) Granularity of data to pull (account, + ad, adset, campaign)""" start_date: Optional[str] = None """(Optional) Start date in yyyy-MM-dd format (e.g. 2025-01-15). Data added after this date will be - ingested""" + ingested, shared by prebuilt and custom reports.""" time_increment: Optional[str] = None - """(Optional) Value in string by which to aggregate statistics (can take all_days, monthly or - number of days)""" + """(Optional, DEPRECATED — use custom_report_options.time_increment) Value in string by which to + aggregate statistics (can take all_days, monthly or number of days)""" def as_dict(self) -> dict: """Serializes the MetaMarketingOptions into a dictionary suitable for use as a JSON request body.""" @@ -4385,6 +4388,20 @@ class TableSpecificConfig: "auto_full_refresh_policy": { "enabled": true, "min_interval_hours": 23, } } If unspecified, auto full refresh is disabled.""" + clustering_columns: Optional[List[str]] = None + """List of column names to use for clustering the destination table. When specified, the + destination Delta table will be clustered by these columns. This can improve query performance + when filtering on these columns. Note: clustering_columns in table specific configuration will + override the pipeline definition. Note: we can only provide enable_auto_clustering or + clustering_columns, added as separate fields as we cannot have repeated field in oneof.""" + + enable_auto_clustering: Optional[bool] = None + """Whether to enable auto clustering on the destination table. When enabled, Delta will + automatically optimize the data layout based on the clustering columns for improved query + performance. Note: enable_auto_clustering in table specific configuration will override the + pipeline definition. Note: we can only provide enable_auto_clustering or clustering_columns, + added as separate fields as we cannot have repeated field in oneof.""" + exclude_columns: Optional[List[str]] = None """A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future @@ -4418,6 +4435,13 @@ class TableSpecificConfig: """The column names specifying the logical order of events in the source data. Spark Declarative Pipelines uses this sequencing to handle change events that arrive out of order.""" + table_properties: Optional[Dict[str, str]] = None + """Table properties to set on the destination table. These are key-value pairs that configure + various Delta table behaviors or any user defined properties. Example: + {"delta.feature.variantType": "supported", "delta.enableTypeWidening": "true"} Note: + table_properties in table specific configuration will override the table_properties of the + pipeline definition.""" + workday_report_parameters: Optional[IngestionPipelineDefinitionWorkdayReportParameters] = None """(Optional) Additional custom parameters for Workday Report""" @@ -4426,6 +4450,10 @@ def as_dict(self) -> dict: body = {} if self.auto_full_refresh_policy: body["auto_full_refresh_policy"] = self.auto_full_refresh_policy.as_dict() + if self.clustering_columns: + body["clustering_columns"] = [v for v in self.clustering_columns] + if self.enable_auto_clustering is not None: + body["enable_auto_clustering"] = self.enable_auto_clustering if self.exclude_columns: body["exclude_columns"] = [v for v in self.exclude_columns] if self.include_columns: @@ -4442,6 +4470,8 @@ def as_dict(self) -> dict: body["scd_type"] = self.scd_type.value if self.sequence_by: body["sequence_by"] = [v for v in self.sequence_by] + if self.table_properties: + body["table_properties"] = self.table_properties if self.workday_report_parameters: body["workday_report_parameters"] = self.workday_report_parameters.as_dict() return body @@ -4451,6 +4481,10 @@ def as_shallow_dict(self) -> dict: body = {} if self.auto_full_refresh_policy: body["auto_full_refresh_policy"] = self.auto_full_refresh_policy + if self.clustering_columns: + body["clustering_columns"] = self.clustering_columns + if self.enable_auto_clustering is not None: + body["enable_auto_clustering"] = self.enable_auto_clustering if self.exclude_columns: body["exclude_columns"] = self.exclude_columns if self.include_columns: @@ -4467,6 +4501,8 @@ def as_shallow_dict(self) -> dict: body["scd_type"] = self.scd_type if self.sequence_by: body["sequence_by"] = self.sequence_by + if self.table_properties: + body["table_properties"] = self.table_properties if self.workday_report_parameters: body["workday_report_parameters"] = self.workday_report_parameters return body @@ -4476,6 +4512,8 @@ def from_dict(cls, d: Dict[str, Any]) -> TableSpecificConfig: """Deserializes the TableSpecificConfig from a dictionary.""" return cls( auto_full_refresh_policy=_from_dict(d, "auto_full_refresh_policy", AutoFullRefreshPolicy), + clustering_columns=d.get("clustering_columns", None), + enable_auto_clustering=d.get("enable_auto_clustering", None), exclude_columns=d.get("exclude_columns", None), include_columns=d.get("include_columns", None), primary_keys=d.get("primary_keys", None), @@ -4488,6 +4526,7 @@ def from_dict(cls, d: Dict[str, Any]) -> TableSpecificConfig: salesforce_include_formula_fields=d.get("salesforce_include_formula_fields", None), scd_type=_enum(d, "scd_type", TableSpecificConfigScdType), sequence_by=d.get("sequence_by", None), + table_properties=d.get("table_properties", None), workday_report_parameters=_from_dict( d, "workday_report_parameters", IngestionPipelineDefinitionWorkdayReportParameters ), @@ -4507,32 +4546,27 @@ class TikTokAdsOptions: """TikTok Ads specific options for ingestion""" data_level: Optional[TikTokAdsOptionsTikTokDataLevel] = None - """(Optional) Data level for the report. If not specified, defaults to AUCTION_CAMPAIGN.""" + """Deprecated. Use custom_report_options.data_level instead.""" dimensions: Optional[List[str]] = None - """(Optional) Dimensions to include in the report. Examples: "campaign_id", "adgroup_id", "ad_id", - "stat_time_day", "stat_time_hour" If not specified, defaults to campaign_id.""" + """Deprecated. Use custom_report_options.dimensions instead.""" lookback_window_days: Optional[int] = None """(Optional) Number of days to look back for report tables during incremental sync to capture - late-arriving conversions and attribution data. If not specified, defaults to 7 days.""" + late-arriving conversions and attribution data.""" metrics: Optional[List[str]] = None - """(Optional) Metrics to include in the report. Examples: "spend", "impressions", "clicks", - "conversion", "cpc" If not specified, defaults to basic metrics (spend, impressions, clicks, - etc.)""" + """Deprecated. Use custom_report_options.metrics instead.""" query_lifetime: Optional[bool] = None - """(Optional) Whether to request lifetime metrics (all-time aggregated data). When true, the report - returns all-time data. If not specified, defaults to false.""" + """Deprecated. Use custom_report_options.query_lifetime instead.""" report_type: Optional[TikTokAdsOptionsTikTokReportType] = None - """(Optional) Report type for the TikTok Ads API. If not specified, defaults to BASIC.""" + """Deprecated. Use custom_report_options.report_type instead.""" sync_start_date: Optional[str] = None """(Optional) Start date for the initial sync of report tables in YYYY-MM-DD format. This - determines the earliest date from which to sync historical data. If not specified, defaults to 1 - year of historical data for daily reports and 30 days for hourly reports.""" + determines the earliest date from which to sync historical data.""" def as_dict(self) -> dict: """Serializes the TikTokAdsOptions into a dictionary suitable for use as a JSON request body.""" diff --git a/databricks/sdk/service/postgres.py b/databricks/sdk/service/postgres.py index 94858aaf6..badd749c0 100755 --- a/databricks/sdk/service/postgres.py +++ b/databricks/sdk/service/postgres.py @@ -33,6 +33,9 @@ @dataclass class Branch: + branch_id: Optional[str] = None + """The part of the name, chosen by the user when the resource was created.""" + create_time: Optional[Timestamp] = None """A timestamp indicating when the branch was created.""" @@ -61,6 +64,8 @@ class Branch: def as_dict(self) -> dict: """Serializes the Branch into a dictionary suitable for use as a JSON request body.""" body = {} + if self.branch_id is not None: + body["branch_id"] = self.branch_id if self.create_time is not None: body["create_time"] = self.create_time.ToJsonString() if self.name is not None: @@ -80,6 +85,8 @@ def as_dict(self) -> dict: def as_shallow_dict(self) -> dict: """Serializes the Branch into a shallow dictionary of its immediate attributes.""" body = {} + if self.branch_id is not None: + body["branch_id"] = self.branch_id if self.create_time is not None: body["create_time"] = self.create_time if self.name is not None: @@ -100,6 +107,7 @@ def as_shallow_dict(self) -> dict: def from_dict(cls, d: Dict[str, Any]) -> Branch: """Deserializes the Branch from a dictionary.""" return cls( + branch_id=d.get("branch_id", None), create_time=_timestamp(d, "create_time"), name=d.get("name", None), parent=d.get("parent", None), @@ -349,6 +357,9 @@ class BranchStatusState(Enum): @dataclass class Catalog: + catalog_id: Optional[str] = None + """The part of the name, chosen by the user when the resource was created.""" + create_time: Optional[Timestamp] = None """A timestamp indicating when the catalog was created.""" @@ -372,6 +383,8 @@ class Catalog: def as_dict(self) -> dict: """Serializes the Catalog into a dictionary suitable for use as a JSON request body.""" body = {} + if self.catalog_id is not None: + body["catalog_id"] = self.catalog_id if self.create_time is not None: body["create_time"] = self.create_time.ToJsonString() if self.name is not None: @@ -389,6 +402,8 @@ def as_dict(self) -> dict: def as_shallow_dict(self) -> dict: """Serializes the Catalog into a shallow dictionary of its immediate attributes.""" body = {} + if self.catalog_id is not None: + body["catalog_id"] = self.catalog_id if self.create_time is not None: body["create_time"] = self.create_time if self.name is not None: @@ -407,6 +422,7 @@ def as_shallow_dict(self) -> dict: def from_dict(cls, d: Dict[str, Any]) -> Catalog: """Deserializes the Catalog from a dictionary.""" return cls( + catalog_id=d.get("catalog_id", None), create_time=_timestamp(d, "create_time"), name=d.get("name", None), spec=_from_dict(d, "spec", CatalogCatalogSpec), @@ -555,6 +571,9 @@ class Database: create_time: Optional[Timestamp] = None """A timestamp indicating when the database was created.""" + database_id: Optional[str] = None + """The part of the name, chosen by the user when the resource was created.""" + name: Optional[str] = None """The resource name of the database. Format: projects/{project_id}/branches/{branch_id}/databases/{database_id}""" @@ -576,6 +595,8 @@ def as_dict(self) -> dict: body = {} if self.create_time is not None: body["create_time"] = self.create_time.ToJsonString() + if self.database_id is not None: + body["database_id"] = self.database_id if self.name is not None: body["name"] = self.name if self.parent is not None: @@ -593,6 +614,8 @@ def as_shallow_dict(self) -> dict: body = {} if self.create_time is not None: body["create_time"] = self.create_time + if self.database_id is not None: + body["database_id"] = self.database_id if self.name is not None: body["name"] = self.name if self.parent is not None: @@ -610,6 +633,7 @@ def from_dict(cls, d: Dict[str, Any]) -> Database: """Deserializes the Database from a dictionary.""" return cls( create_time=_timestamp(d, "create_time"), + database_id=d.get("database_id", None), name=d.get("name", None), parent=d.get("parent", None), spec=_from_dict(d, "spec", DatabaseDatabaseSpec), @@ -845,6 +869,9 @@ class Endpoint: create_time: Optional[Timestamp] = None """A timestamp indicating when the compute endpoint was created.""" + endpoint_id: Optional[str] = None + """The part of the name, chosen by the user when the resource was created.""" + name: Optional[str] = None """Output only. The full resource path of the endpoint. Format: projects/{project_id}/branches/{branch_id}/endpoints/{endpoint_id}""" @@ -871,6 +898,8 @@ def as_dict(self) -> dict: body = {} if self.create_time is not None: body["create_time"] = self.create_time.ToJsonString() + if self.endpoint_id is not None: + body["endpoint_id"] = self.endpoint_id if self.name is not None: body["name"] = self.name if self.parent is not None: @@ -890,6 +919,8 @@ def as_shallow_dict(self) -> dict: body = {} if self.create_time is not None: body["create_time"] = self.create_time + if self.endpoint_id is not None: + body["endpoint_id"] = self.endpoint_id if self.name is not None: body["name"] = self.name if self.parent is not None: @@ -909,6 +940,7 @@ def from_dict(cls, d: Dict[str, Any]) -> Endpoint: """Deserializes the Endpoint from a dictionary.""" return cls( create_time=_timestamp(d, "create_time"), + endpoint_id=d.get("endpoint_id", None), name=d.get("name", None), parent=d.get("parent", None), spec=_from_dict(d, "spec", EndpointSpec), @@ -1713,6 +1745,9 @@ class Project: name: Optional[str] = None """Output only. The full resource path of the project. Format: projects/{project_id}""" + project_id: Optional[str] = None + """The part of the name, chosen by the user when the resource was created.""" + purge_time: Optional[Timestamp] = None """A timestamp indicating when the project is scheduled for permanent deletion. Empty if the project is not deleted, otherwise set to a timestamp in the future.""" @@ -1741,6 +1776,8 @@ def as_dict(self) -> dict: body["initial_endpoint_spec"] = self.initial_endpoint_spec.as_dict() if self.name is not None: body["name"] = self.name + if self.project_id is not None: + body["project_id"] = self.project_id if self.purge_time is not None: body["purge_time"] = self.purge_time.ToJsonString() if self.spec: @@ -1764,6 +1801,8 @@ def as_shallow_dict(self) -> dict: body["initial_endpoint_spec"] = self.initial_endpoint_spec if self.name is not None: body["name"] = self.name + if self.project_id is not None: + body["project_id"] = self.project_id if self.purge_time is not None: body["purge_time"] = self.purge_time if self.spec: @@ -1784,6 +1823,7 @@ def from_dict(cls, d: Dict[str, Any]) -> Project: delete_time=_timestamp(d, "delete_time"), initial_endpoint_spec=_from_dict(d, "initial_endpoint_spec", InitialEndpointSpec), name=d.get("name", None), + project_id=d.get("project_id", None), purge_time=_timestamp(d, "purge_time"), spec=_from_dict(d, "spec", ProjectSpec), status=_from_dict(d, "status", ProjectStatus), @@ -2204,6 +2244,9 @@ class Role: parent: Optional[str] = None """The Branch where this Role exists. Format: projects/{project_id}/branches/{branch_id}""" + role_id: Optional[str] = None + """The part of the name, chosen by the user when the resource was created.""" + spec: Optional[RoleRoleSpec] = None """The spec contains the role configuration, including identity type, authentication method, and role attributes.""" @@ -2223,6 +2266,8 @@ def as_dict(self) -> dict: body["name"] = self.name if self.parent is not None: body["parent"] = self.parent + if self.role_id is not None: + body["role_id"] = self.role_id if self.spec: body["spec"] = self.spec.as_dict() if self.status: @@ -2240,6 +2285,8 @@ def as_shallow_dict(self) -> dict: body["name"] = self.name if self.parent is not None: body["parent"] = self.parent + if self.role_id is not None: + body["role_id"] = self.role_id if self.spec: body["spec"] = self.spec if self.status: @@ -2255,6 +2302,7 @@ def from_dict(cls, d: Dict[str, Any]) -> Role: create_time=_timestamp(d, "create_time"), name=d.get("name", None), parent=d.get("parent", None), + role_id=d.get("role_id", None), spec=_from_dict(d, "spec", RoleRoleSpec), status=_from_dict(d, "status", RoleRoleStatus), update_time=_timestamp(d, "update_time"), @@ -2520,6 +2568,9 @@ class SyncedTable: status: Optional[SyncedTableSyncedTableStatus] = None """Synced Table data synchronization status.""" + synced_table_id: Optional[str] = None + """The part of the name, chosen by the user when the resource was created.""" + uid: Optional[str] = None """The Unity Catalog table ID for this synced table.""" @@ -2534,6 +2585,8 @@ def as_dict(self) -> dict: body["spec"] = self.spec.as_dict() if self.status: body["status"] = self.status.as_dict() + if self.synced_table_id is not None: + body["synced_table_id"] = self.synced_table_id if self.uid is not None: body["uid"] = self.uid return body @@ -2549,6 +2602,8 @@ def as_shallow_dict(self) -> dict: body["spec"] = self.spec if self.status: body["status"] = self.status + if self.synced_table_id is not None: + body["synced_table_id"] = self.synced_table_id if self.uid is not None: body["uid"] = self.uid return body @@ -2561,6 +2616,7 @@ def from_dict(cls, d: Dict[str, Any]) -> SyncedTable: name=d.get("name", None), spec=_from_dict(d, "spec", SyncedTableSyncedTableSpec), status=_from_dict(d, "status", SyncedTableSyncedTableStatus), + synced_table_id=d.get("synced_table_id", None), uid=d.get("uid", None), ) diff --git a/databricks/sdk/service/settings.py b/databricks/sdk/service/settings.py index 9b24b5d59..036e9f7cc 100755 --- a/databricks/sdk/service/settings.py +++ b/databricks/sdk/service/settings.py @@ -3159,6 +3159,10 @@ class EgressNetworkPolicyNetworkAccessPolicy: restriction_mode: EgressNetworkPolicyNetworkAccessPolicyRestrictionMode """The restriction mode that controls how serverless workloads can access the internet.""" + allowed_databricks_destinations: Optional[List[EgressNetworkPolicyNetworkAccessPolicyDatabricksDestination]] = None + """List of Databricks workspace destinations that serverless workloads are allowed to access when + in RESTRICTED_ACCESS mode.""" + allowed_internet_destinations: Optional[List[EgressNetworkPolicyNetworkAccessPolicyInternetDestination]] = None """List of internet destinations that serverless workloads are allowed to access when in RESTRICTED_ACCESS mode.""" @@ -3178,6 +3182,8 @@ class EgressNetworkPolicyNetworkAccessPolicy: def as_dict(self) -> dict: """Serializes the EgressNetworkPolicyNetworkAccessPolicy into a dictionary suitable for use as a JSON request body.""" body = {} + if self.allowed_databricks_destinations: + body["allowed_databricks_destinations"] = [v.as_dict() for v in self.allowed_databricks_destinations] if self.allowed_internet_destinations: body["allowed_internet_destinations"] = [v.as_dict() for v in self.allowed_internet_destinations] if self.allowed_storage_destinations: @@ -3193,6 +3199,8 @@ def as_dict(self) -> dict: def as_shallow_dict(self) -> dict: """Serializes the EgressNetworkPolicyNetworkAccessPolicy into a shallow dictionary of its immediate attributes.""" body = {} + if self.allowed_databricks_destinations: + body["allowed_databricks_destinations"] = self.allowed_databricks_destinations if self.allowed_internet_destinations: body["allowed_internet_destinations"] = self.allowed_internet_destinations if self.allowed_storage_destinations: @@ -3209,6 +3217,9 @@ def as_shallow_dict(self) -> dict: def from_dict(cls, d: Dict[str, Any]) -> EgressNetworkPolicyNetworkAccessPolicy: """Deserializes the EgressNetworkPolicyNetworkAccessPolicy from a dictionary.""" return cls( + allowed_databricks_destinations=_repeated_dict( + d, "allowed_databricks_destinations", EgressNetworkPolicyNetworkAccessPolicyDatabricksDestination + ), allowed_internet_destinations=_repeated_dict( d, "allowed_internet_destinations", EgressNetworkPolicyNetworkAccessPolicyInternetDestination ), @@ -3225,6 +3236,31 @@ def from_dict(cls, d: Dict[str, Any]) -> EgressNetworkPolicyNetworkAccessPolicy: ) +@dataclass +class EgressNetworkPolicyNetworkAccessPolicyDatabricksDestination: + workspace_ids: Optional[List[int]] = None + """The workspace IDs to allow egress traffic to.""" + + def as_dict(self) -> dict: + """Serializes the EgressNetworkPolicyNetworkAccessPolicyDatabricksDestination into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.workspace_ids: + body["workspace_ids"] = [v for v in self.workspace_ids] + return body + + def as_shallow_dict(self) -> dict: + """Serializes the EgressNetworkPolicyNetworkAccessPolicyDatabricksDestination into a shallow dictionary of its immediate attributes.""" + body = {} + if self.workspace_ids: + body["workspace_ids"] = self.workspace_ids + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> EgressNetworkPolicyNetworkAccessPolicyDatabricksDestination: + """Deserializes the EgressNetworkPolicyNetworkAccessPolicyDatabricksDestination from a dictionary.""" + return cls(workspace_ids=d.get("workspace_ids", None)) + + @dataclass class EgressNetworkPolicyNetworkAccessPolicyInternetDestination: """Users can specify accessible internet destinations when outbound access is restricted. We only diff --git a/databricks/sdk/service/sql.py b/databricks/sdk/service/sql.py index 36683ab1c..886259b65 100755 --- a/databricks/sdk/service/sql.py +++ b/databricks/sdk/service/sql.py @@ -10466,7 +10466,6 @@ def start(self, id: str) -> Wait[GetWarehouseResponse]: headers = { "Accept": "application/json", - "Content-Type": "application/json", } cfg = self._api._cfg @@ -10492,7 +10491,6 @@ def stop(self, id: str) -> Wait[GetWarehouseResponse]: headers = { "Accept": "application/json", - "Content-Type": "application/json", } cfg = self._api._cfg diff --git a/databricks/sdk/service/vectorsearch.py b/databricks/sdk/service/vectorsearch.py index dab82f802..9d38c06cb 100755 --- a/databricks/sdk/service/vectorsearch.py +++ b/databricks/sdk/service/vectorsearch.py @@ -706,6 +706,40 @@ class EndpointType(Enum): STORAGE_OPTIMIZED = "STORAGE_OPTIMIZED" +@dataclass +class FacetResultData: + """Facet aggregation rows returned by a query.""" + + facet_array: Optional[List[List[str]]] = None + """Facet rows. Each row is `[facet_column_name, value_or_range, count]`.""" + + facet_row_count: Optional[int] = None + """Number of facet rows returned.""" + + def as_dict(self) -> dict: + """Serializes the FacetResultData into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.facet_array: + body["facet_array"] = [v for v in self.facet_array] + if self.facet_row_count is not None: + body["facet_row_count"] = self.facet_row_count + return body + + def as_shallow_dict(self) -> dict: + """Serializes the FacetResultData into a shallow dictionary of its immediate attributes.""" + body = {} + if self.facet_array: + body["facet_array"] = self.facet_array + if self.facet_row_count is not None: + body["facet_row_count"] = self.facet_row_count + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> FacetResultData: + """Deserializes the FacetResultData from a dictionary.""" + return cls(facet_array=d.get("facet_array", None), facet_row_count=d.get("facet_row_count", None)) + + @dataclass class GetVectorSearchEndpointPermissionLevelsResponse: permission_levels: Optional[List[VectorSearchEndpointPermissionsDescription]] = None @@ -1131,6 +1165,9 @@ class PipelineType(Enum): @dataclass class QueryVectorIndexResponse: + facet_result: Optional[FacetResultData] = None + """Facet aggregation rows returned by a query.""" + manifest: Optional[ResultManifest] = None """Metadata about the result set.""" @@ -1145,6 +1182,8 @@ class QueryVectorIndexResponse: def as_dict(self) -> dict: """Serializes the QueryVectorIndexResponse into a dictionary suitable for use as a JSON request body.""" body = {} + if self.facet_result: + body["facet_result"] = self.facet_result.as_dict() if self.manifest: body["manifest"] = self.manifest.as_dict() if self.next_page_token is not None: @@ -1156,6 +1195,8 @@ def as_dict(self) -> dict: def as_shallow_dict(self) -> dict: """Serializes the QueryVectorIndexResponse into a shallow dictionary of its immediate attributes.""" body = {} + if self.facet_result: + body["facet_result"] = self.facet_result if self.manifest: body["manifest"] = self.manifest if self.next_page_token is not None: @@ -1168,6 +1209,7 @@ def as_shallow_dict(self) -> dict: def from_dict(cls, d: Dict[str, Any]) -> QueryVectorIndexResponse: """Deserializes the QueryVectorIndexResponse from a dictionary.""" return cls( + facet_result=_from_dict(d, "facet_result", FacetResultData), manifest=_from_dict(d, "manifest", ResultManifest), next_page_token=d.get("next_page_token", None), result=_from_dict(d, "result", ResultData), @@ -1275,6 +1317,12 @@ class ResultManifest: columns: Optional[List[ColumnInfo]] = None """Information about each column in the result set.""" + facet_column_count: Optional[int] = None + """Number of columns in `facet_result`.""" + + facet_columns: Optional[List[ColumnInfo]] = None + """Information about each column in `facet_result`.""" + def as_dict(self) -> dict: """Serializes the ResultManifest into a dictionary suitable for use as a JSON request body.""" body = {} @@ -1282,6 +1330,10 @@ def as_dict(self) -> dict: body["column_count"] = self.column_count if self.columns: body["columns"] = [v.as_dict() for v in self.columns] + if self.facet_column_count is not None: + body["facet_column_count"] = self.facet_column_count + if self.facet_columns: + body["facet_columns"] = [v.as_dict() for v in self.facet_columns] return body def as_shallow_dict(self) -> dict: @@ -1291,12 +1343,21 @@ def as_shallow_dict(self) -> dict: body["column_count"] = self.column_count if self.columns: body["columns"] = self.columns + if self.facet_column_count is not None: + body["facet_column_count"] = self.facet_column_count + if self.facet_columns: + body["facet_columns"] = self.facet_columns return body @classmethod def from_dict(cls, d: Dict[str, Any]) -> ResultManifest: """Deserializes the ResultManifest from a dictionary.""" - return cls(column_count=d.get("column_count", None), columns=_repeated_dict(d, "columns", ColumnInfo)) + return cls( + column_count=d.get("column_count", None), + columns=_repeated_dict(d, "columns", ColumnInfo), + facet_column_count=d.get("facet_column_count", None), + facet_columns=_repeated_dict(d, "facet_columns", ColumnInfo), + ) @dataclass @@ -2561,13 +2622,16 @@ def query_index( columns: List[str], *, columns_to_rerank: Optional[List[str]] = None, + facets: Optional[List[str]] = None, filters_json: Optional[str] = None, num_results: Optional[int] = None, + query_columns: Optional[List[str]] = None, query_text: Optional[str] = None, query_type: Optional[str] = None, query_vector: Optional[List[float]] = None, reranker: Optional[RerankerConfig] = None, score_threshold: Optional[float] = None, + sort_columns: Optional[List[str]] = None, ) -> QueryVectorIndexResponse: """Query the specified vector index. @@ -2577,6 +2641,11 @@ def query_index( List of column names to include in the response. :param columns_to_rerank: List[str] (optional) Column names used to retrieve data to send to the reranker. + :param facets: List[str] (optional) + Facets to compute over the matched results. Each entry has one of these forms: `""` - top 10 + distinct values by count `" TOP "` - top n distinct values, where n > 0 `" + BUCKETS [[from,to],...]"` - inclusive numeric ranges `TOP` and `BUCKETS` are case-insensitive. A + column may appear at most once. :param filters_json: str (optional) JSON string representing query filters. @@ -2587,6 +2656,8 @@ def query_index( 5. - `{"id": 5}`: Filter for id equal to 5. :param num_results: int (optional) Number of results to return. Defaults to 10. + :param query_columns: List[str] (optional) + Text columns to search for `query_text`. When empty, all text columns are searched. :param query_text: str (optional) Query text. Required for Delta Sync Index using model endpoint. :param query_type: str (optional) @@ -2602,6 +2673,9 @@ def query_index( more information. :param score_threshold: float (optional) Threshold for the approximate nearest neighbor search. Defaults to 0.0. + :param sort_columns: List[str] (optional) + Sort results by column values instead of the default relevance ordering. Each clause has the form + `" ASC"` or `" DESC"`, for example `["rating DESC", "price ASC"]`. :returns: :class:`QueryVectorIndexResponse` """ @@ -2611,10 +2685,14 @@ def query_index( body["columns"] = [v for v in columns] if columns_to_rerank is not None: body["columns_to_rerank"] = [v for v in columns_to_rerank] + if facets is not None: + body["facets"] = [v for v in facets] if filters_json is not None: body["filters_json"] = filters_json if num_results is not None: body["num_results"] = num_results + if query_columns is not None: + body["query_columns"] = [v for v in query_columns] if query_text is not None: body["query_text"] = query_text if query_type is not None: @@ -2625,6 +2703,8 @@ def query_index( body["reranker"] = reranker.as_dict() if score_threshold is not None: body["score_threshold"] = score_threshold + if sort_columns is not None: + body["sort_columns"] = [v for v in sort_columns] headers = { "Accept": "application/json", "Content-Type": "application/json", diff --git a/databricks/sdk/service/workspace.py b/databricks/sdk/service/workspace.py index e1678b081..e6bff5f50 100755 --- a/databricks/sdk/service/workspace.py +++ b/databricks/sdk/service/workspace.py @@ -2159,6 +2159,7 @@ def update( repo_id: int, *, branch: Optional[str] = None, + dangerously_force_discard_all: Optional[bool] = None, sparse_checkout: Optional[SparseCheckoutUpdate] = None, tag: Optional[str] = None, ): @@ -2169,6 +2170,17 @@ def update( ID of the Git folder (repo) object in the workspace. :param branch: str (optional) Branch that the local version of the repo is checked out to. + :param dangerously_force_discard_all: bool (optional) + WARNING: DESTRUCTIVE AND IRREVERSIBLE. If true, permanently deletes ALL uncommitted changes in the + Git folder — staged, unstaged, and untracked files — before updating. Lost data CANNOT be + recovered. + + NEVER use this on Git folders where users author or edit files. This flag is intended ONLY for + automated jobs that treat the Git folder as a read-only mirror of a remote branch and need to + force-sync it. If any user has uncommitted work in the Git folder, that work will be permanently + destroyed without warning. + + Local commits that have been made but not yet pushed to the remote are preserved. :param sparse_checkout: :class:`SparseCheckoutUpdate` (optional) If specified, update the sparse checkout settings. The update will fail if sparse checkout is not enabled for the repo. @@ -2183,6 +2195,8 @@ def update( body = {} if branch is not None: body["branch"] = branch + if dangerously_force_discard_all is not None: + body["dangerously_force_discard_all"] = dangerously_force_discard_all if sparse_checkout is not None: body["sparse_checkout"] = sparse_checkout.as_dict() if tag is not None: diff --git a/docs/dbdataclasses/aisearch.rst b/docs/dbdataclasses/aisearch.rst new file mode 100644 index 000000000..34fa4dfbb --- /dev/null +++ b/docs/dbdataclasses/aisearch.rst @@ -0,0 +1,228 @@ +AISearch +======== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.aisearch`` module. + +.. py:currentmodule:: databricks.sdk.service.aisearch +.. autoclass:: ColumnInfo + :members: + :undoc-members: + +.. autoclass:: CustomTag + :members: + :undoc-members: + +.. autoclass:: DataModificationResult + :members: + :undoc-members: + +.. py:class:: DataModificationStatus + + Overall outcome of a data-plane upsert or delete. Mirrors the legacy `databricks.brickindexscheduler.UpsertDeleteDataStatus` value-for-value. + + .. py:attribute:: FAILURE + :value: "FAILURE" + + .. py:attribute:: PARTIAL_SUCCESS + :value: "PARTIAL_SUCCESS" + + .. py:attribute:: SUCCESS + :value: "SUCCESS" + +.. autoclass:: DeltaSyncIndexSpec + :members: + :undoc-members: + +.. autoclass:: DirectAccessIndexSpec + :members: + :undoc-members: + +.. autoclass:: EmbeddingSourceColumn + :members: + :undoc-members: + +.. autoclass:: EmbeddingVectorColumn + :members: + :undoc-members: + +.. autoclass:: Endpoint + :members: + :undoc-members: + +.. autoclass:: EndpointScalingInfo + :members: + :undoc-members: + +.. autoclass:: EndpointStatus + :members: + :undoc-members: + +.. py:class:: EndpointStatusState + + Lifecycle state of an AI Search endpoint, used by both Standard and Storage Optimized SKUs. + + .. py:attribute:: DELETED + :value: "DELETED" + + .. py:attribute:: OFFLINE + :value: "OFFLINE" + + .. py:attribute:: ONLINE + :value: "ONLINE" + + .. py:attribute:: PROVISIONING + :value: "PROVISIONING" + + .. py:attribute:: RED_STATE + :value: "RED_STATE" + + .. py:attribute:: YELLOW_STATE + :value: "YELLOW_STATE" + +.. autoclass:: EndpointThroughputInfo + :members: + :undoc-members: + +.. py:class:: EndpointType + + Type of endpoint. + + .. py:attribute:: STANDARD + :value: "STANDARD" + + .. py:attribute:: STORAGE_OPTIMIZED + :value: "STORAGE_OPTIMIZED" + +.. autoclass:: FacetResultData + :members: + :undoc-members: + +.. autoclass:: Index + :members: + :undoc-members: + +.. autoclass:: IndexStatus + :members: + :undoc-members: + +.. py:class:: IndexSubtype + + The subtype of the AI Search index, determining the indexing and retrieval strategy. - `VECTOR`: Not a supported create value — do not select it. Use `HYBRID` (vector + hybrid search) or `FULL_TEXT` (full-text only). It is the proto2 default (`= 0`) solely to mirror the legacy `index_v2.proto` enum value-for-value; it is not an offered index subtype. - `FULL_TEXT`: An index that uses full-text search without vector embeddings. - `HYBRID`: An index that uses vector embeddings for similarity search and hybrid search. + + .. py:attribute:: FULL_TEXT + :value: "FULL_TEXT" + + .. py:attribute:: HYBRID + :value: "HYBRID" + + .. py:attribute:: VECTOR + :value: "VECTOR" + +.. py:class:: IndexType + + There are 2 types of AI Search indexes: - `DELTA_SYNC`: An index that automatically syncs with a source Delta Table, automatically and incrementally updating the index as the underlying data in the Delta Table changes. - `DIRECT_ACCESS`: An index that supports direct read and write of vectors and metadata through our REST and SDK APIs. With this model, the user manages index updates. + + .. py:attribute:: DELTA_SYNC + :value: "DELTA_SYNC" + + .. py:attribute:: DIRECT_ACCESS + :value: "DIRECT_ACCESS" + +.. autoclass:: ListEndpointsResponse + :members: + :undoc-members: + +.. autoclass:: ListIndexesResponse + :members: + :undoc-members: + +.. py:class:: PipelineType + + Pipeline execution mode for a Delta Sync index. Required on create for Delta Sync indexes; the legacy backend rejects an unset value with INVALID_PARAMETER_VALUE. - `TRIGGERED`: the pipeline stops after refreshing the source table once, using the data available when the update started. - `CONTINUOUS`: the pipeline processes new data as it arrives in the source table to keep the index fresh. + + .. py:attribute:: CONTINUOUS + :value: "CONTINUOUS" + + .. py:attribute:: TRIGGERED + :value: "TRIGGERED" + +.. autoclass:: QueryIndexResponse + :members: + :undoc-members: + +.. autoclass:: RemoveDataResponse + :members: + :undoc-members: + +.. autoclass:: RerankerConfig + :members: + :undoc-members: + +.. py:class:: RerankerConfigModelType + + How the `model` field is interpreted. + + .. py:attribute:: MODEL_TYPE_BASE + :value: "MODEL_TYPE_BASE" + + .. py:attribute:: MODEL_TYPE_FINETUNED + :value: "MODEL_TYPE_FINETUNED" + +.. autoclass:: RerankerConfigRerankerParameters + :members: + :undoc-members: + +.. autoclass:: ResultData + :members: + :undoc-members: + +.. autoclass:: ResultManifest + :members: + :undoc-members: + +.. py:class:: ScalingChangeState + + State of the most recent scaling change request for a Storage Optimized endpoint. + + .. py:attribute:: SCALING_CHANGE_APPLIED + :value: "SCALING_CHANGE_APPLIED" + + .. py:attribute:: SCALING_CHANGE_IN_PROGRESS + :value: "SCALING_CHANGE_IN_PROGRESS" + + .. py:attribute:: SCALING_CHANGE_UNSPECIFIED + :value: "SCALING_CHANGE_UNSPECIFIED" + +.. autoclass:: ScanIndexResponse + :members: + :undoc-members: + +.. autoclass:: SyncIndexResponse + :members: + :undoc-members: + +.. py:class:: ThroughputChangeRequestState + + State of the most recent throughput change request issued against a Storage Optimized endpoint. Surfaced on `EndpointThroughputInfo.change_request_state`. + + .. py:attribute:: CHANGE_ADJUSTED + :value: "CHANGE_ADJUSTED" + + .. py:attribute:: CHANGE_FAILED + :value: "CHANGE_FAILED" + + .. py:attribute:: CHANGE_IN_PROGRESS + :value: "CHANGE_IN_PROGRESS" + + .. py:attribute:: CHANGE_REACHED_MAXIMUM + :value: "CHANGE_REACHED_MAXIMUM" + + .. py:attribute:: CHANGE_REACHED_MINIMUM + :value: "CHANGE_REACHED_MINIMUM" + + .. py:attribute:: CHANGE_SUCCESS + :value: "CHANGE_SUCCESS" + +.. autoclass:: UpsertDataResponse + :members: + :undoc-members: diff --git a/docs/dbdataclasses/bundledeployments.rst b/docs/dbdataclasses/bundledeployments.rst new file mode 100644 index 000000000..cba2afa97 --- /dev/null +++ b/docs/dbdataclasses/bundledeployments.rst @@ -0,0 +1,225 @@ +Bundle Deployments +================== + +These dataclasses are used in the SDK to represent API requests and responses for services in the ``databricks.sdk.service.bundledeployments`` module. + +.. py:currentmodule:: databricks.sdk.service.bundledeployments +.. autoclass:: Deployment + :members: + :undoc-members: + +.. py:class:: DeploymentMode + + Bundle target deployment mode. Mirrors the `mode` field on a bundle target in `databricks.yml` (see https://docs.databricks.com/dev-tools/bundles/deployment-modes). + + .. py:attribute:: DEPLOYMENT_MODE_DEVELOPMENT + :value: "DEPLOYMENT_MODE_DEVELOPMENT" + + .. py:attribute:: DEPLOYMENT_MODE_PRODUCTION + :value: "DEPLOYMENT_MODE_PRODUCTION" + +.. py:class:: DeploymentResourceType + + Type of a deployment resource. + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_ALERT + :value: "DEPLOYMENT_RESOURCE_TYPE_ALERT" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_APP + :value: "DEPLOYMENT_RESOURCE_TYPE_APP" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_CATALOG + :value: "DEPLOYMENT_RESOURCE_TYPE_CATALOG" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_CLUSTER + :value: "DEPLOYMENT_RESOURCE_TYPE_CLUSTER" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_DASHBOARD + :value: "DEPLOYMENT_RESOURCE_TYPE_DASHBOARD" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_DATABASE_CATALOG + :value: "DEPLOYMENT_RESOURCE_TYPE_DATABASE_CATALOG" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_DATABASE_INSTANCE + :value: "DEPLOYMENT_RESOURCE_TYPE_DATABASE_INSTANCE" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_EXPERIMENT + :value: "DEPLOYMENT_RESOURCE_TYPE_EXPERIMENT" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_EXTERNAL_LOCATION + :value: "DEPLOYMENT_RESOURCE_TYPE_EXTERNAL_LOCATION" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_JOB + :value: "DEPLOYMENT_RESOURCE_TYPE_JOB" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_MODEL + :value: "DEPLOYMENT_RESOURCE_TYPE_MODEL" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_MODEL_SERVING_ENDPOINT + :value: "DEPLOYMENT_RESOURCE_TYPE_MODEL_SERVING_ENDPOINT" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_PIPELINE + :value: "DEPLOYMENT_RESOURCE_TYPE_PIPELINE" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_POSTGRES_BRANCH + :value: "DEPLOYMENT_RESOURCE_TYPE_POSTGRES_BRANCH" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_POSTGRES_ENDPOINT + :value: "DEPLOYMENT_RESOURCE_TYPE_POSTGRES_ENDPOINT" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_POSTGRES_PROJECT + :value: "DEPLOYMENT_RESOURCE_TYPE_POSTGRES_PROJECT" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_QUALITY_MONITOR + :value: "DEPLOYMENT_RESOURCE_TYPE_QUALITY_MONITOR" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_REGISTERED_MODEL + :value: "DEPLOYMENT_RESOURCE_TYPE_REGISTERED_MODEL" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_SCHEMA + :value: "DEPLOYMENT_RESOURCE_TYPE_SCHEMA" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_SECRET_SCOPE + :value: "DEPLOYMENT_RESOURCE_TYPE_SECRET_SCOPE" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_SQL_WAREHOUSE + :value: "DEPLOYMENT_RESOURCE_TYPE_SQL_WAREHOUSE" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_SYNCED_DATABASE_TABLE + :value: "DEPLOYMENT_RESOURCE_TYPE_SYNCED_DATABASE_TABLE" + + .. py:attribute:: DEPLOYMENT_RESOURCE_TYPE_VOLUME + :value: "DEPLOYMENT_RESOURCE_TYPE_VOLUME" + +.. py:class:: DeploymentStatus + + Status of a deployment. + + .. py:attribute:: DEPLOYMENT_STATUS_ACTIVE + :value: "DEPLOYMENT_STATUS_ACTIVE" + + .. py:attribute:: DEPLOYMENT_STATUS_DELETED + :value: "DEPLOYMENT_STATUS_DELETED" + + .. py:attribute:: DEPLOYMENT_STATUS_FAILED + :value: "DEPLOYMENT_STATUS_FAILED" + + .. py:attribute:: DEPLOYMENT_STATUS_IN_PROGRESS + :value: "DEPLOYMENT_STATUS_IN_PROGRESS" + +.. autoclass:: GitInfo + :members: + :undoc-members: + +.. autoclass:: HeartbeatResponse + :members: + :undoc-members: + +.. autoclass:: ListDeploymentsResponse + :members: + :undoc-members: + +.. autoclass:: ListOperationsResponse + :members: + :undoc-members: + +.. autoclass:: ListResourcesResponse + :members: + :undoc-members: + +.. autoclass:: ListVersionsResponse + :members: + :undoc-members: + +.. autoclass:: Operation + :members: + :undoc-members: + +.. py:class:: OperationActionType + + Type of action performed on a resource during a deployment. + + .. py:attribute:: OPERATION_ACTION_TYPE_BIND + :value: "OPERATION_ACTION_TYPE_BIND" + + .. py:attribute:: OPERATION_ACTION_TYPE_BIND_AND_UPDATE + :value: "OPERATION_ACTION_TYPE_BIND_AND_UPDATE" + + .. py:attribute:: OPERATION_ACTION_TYPE_CREATE + :value: "OPERATION_ACTION_TYPE_CREATE" + + .. py:attribute:: OPERATION_ACTION_TYPE_DELETE + :value: "OPERATION_ACTION_TYPE_DELETE" + + .. py:attribute:: OPERATION_ACTION_TYPE_INITIAL_REGISTER + :value: "OPERATION_ACTION_TYPE_INITIAL_REGISTER" + + .. py:attribute:: OPERATION_ACTION_TYPE_RECREATE + :value: "OPERATION_ACTION_TYPE_RECREATE" + + .. py:attribute:: OPERATION_ACTION_TYPE_RESIZE + :value: "OPERATION_ACTION_TYPE_RESIZE" + + .. py:attribute:: OPERATION_ACTION_TYPE_UPDATE + :value: "OPERATION_ACTION_TYPE_UPDATE" + + .. py:attribute:: OPERATION_ACTION_TYPE_UPDATE_WITH_ID + :value: "OPERATION_ACTION_TYPE_UPDATE_WITH_ID" + +.. py:class:: OperationStatus + + Status of a resource operation. + + .. py:attribute:: OPERATION_STATUS_FAILED + :value: "OPERATION_STATUS_FAILED" + + .. py:attribute:: OPERATION_STATUS_SUCCEEDED + :value: "OPERATION_STATUS_SUCCEEDED" + +.. autoclass:: Resource + :members: + :undoc-members: + +.. autoclass:: Version + :members: + :undoc-members: + +.. py:class:: VersionComplete + + Reason why a version was completed. + + .. py:attribute:: VERSION_COMPLETE_FAILURE + :value: "VERSION_COMPLETE_FAILURE" + + .. py:attribute:: VERSION_COMPLETE_FORCE_ABORT + :value: "VERSION_COMPLETE_FORCE_ABORT" + + .. py:attribute:: VERSION_COMPLETE_LEASE_EXPIRED + :value: "VERSION_COMPLETE_LEASE_EXPIRED" + + .. py:attribute:: VERSION_COMPLETE_SUCCESS + :value: "VERSION_COMPLETE_SUCCESS" + +.. py:class:: VersionStatus + + Status of a version. + + .. py:attribute:: VERSION_STATUS_COMPLETED + :value: "VERSION_STATUS_COMPLETED" + + .. py:attribute:: VERSION_STATUS_IN_PROGRESS + :value: "VERSION_STATUS_IN_PROGRESS" + +.. py:class:: VersionType + + Type of version. + + .. py:attribute:: VERSION_TYPE_DEPLOY + :value: "VERSION_TYPE_DEPLOY" + + .. py:attribute:: VERSION_TYPE_DESTROY + :value: "VERSION_TYPE_DESTROY" + +.. autoclass:: WorkspaceInfo + :members: + :undoc-members: diff --git a/docs/dbdataclasses/catalog.rst b/docs/dbdataclasses/catalog.rst index 6064f7bbe..b5f52d058 100755 --- a/docs/dbdataclasses/catalog.rst +++ b/docs/dbdataclasses/catalog.rst @@ -721,6 +721,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: EnvironmentSettings + :members: + :undoc-members: + .. autoclass:: ExternalLineageExternalMetadata :members: :undoc-members: @@ -1548,7 +1552,7 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: SecurableKind - Latest kind: MEMORY_STORE_STANDARD = 342; Next id: 343. Reserved numbers: 316, 317, 327, 330, 341 (former ENDPOINT_LLM_*, MODEL_SERVICE_STANDARD, MODEL_SERVICE_SYSTEM_DELTASHARING, MCP_SERVICE_STANDARD). + Latest kind: CONNECTION_ADOBE_CAMPAIGNS_OAUTH_M2M = 345; Next id: 346. Reserved numbers: 316, 317, 327, 330, 341 (former ENDPOINT_LLM_*, MODEL_SERVICE_STANDARD, MODEL_SERVICE_SYSTEM_DELTASHARING, MCP_SERVICE_STANDARD). .. py:attribute:: TABLE_DB_STORAGE :value: "TABLE_DB_STORAGE" diff --git a/docs/dbdataclasses/index.rst b/docs/dbdataclasses/index.rst index 7edd53f2b..252130109 100755 --- a/docs/dbdataclasses/index.rst +++ b/docs/dbdataclasses/index.rst @@ -6,9 +6,10 @@ Dataclasses :maxdepth: 1 agentbricks + aisearch apps billing - bundle + bundledeployments catalog cleanrooms compute diff --git a/docs/dbdataclasses/ml.rst b/docs/dbdataclasses/ml.rst index ffb95b157..8c158745c 100755 --- a/docs/dbdataclasses/ml.rst +++ b/docs/dbdataclasses/ml.rst @@ -1098,6 +1098,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: StreamSource + :members: + :undoc-members: + .. autoclass:: StreamSourceConfig :members: :undoc-members: diff --git a/docs/dbdataclasses/settings.rst b/docs/dbdataclasses/settings.rst index 5b5dece3f..7e69e8e23 100755 --- a/docs/dbdataclasses/settings.rst +++ b/docs/dbdataclasses/settings.rst @@ -548,6 +548,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: EgressNetworkPolicyNetworkAccessPolicyDatabricksDestination + :members: + :undoc-members: + .. autoclass:: EgressNetworkPolicyNetworkAccessPolicyInternetDestination :members: :undoc-members: diff --git a/docs/dbdataclasses/vectorsearch.rst b/docs/dbdataclasses/vectorsearch.rst index b00e2d99d..01b28923f 100755 --- a/docs/dbdataclasses/vectorsearch.rst +++ b/docs/dbdataclasses/vectorsearch.rst @@ -103,6 +103,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: STORAGE_OPTIMIZED :value: "STORAGE_OPTIMIZED" +.. autoclass:: FacetResultData + :members: + :undoc-members: + .. autoclass:: GetVectorSearchEndpointPermissionLevelsResponse :members: :undoc-members: diff --git a/docs/packages.py b/docs/packages.py index 0bd6416a7..bb8181cc4 100755 --- a/docs/packages.py +++ b/docs/packages.py @@ -118,9 +118,10 @@ class Package: # into MANUAL_PACKAGES above to override. AUTO_PACKAGES = [ Package("agentbricks", "Agent Bricks Service", "The Custom LLMs service manages state and powers the UI for the Custom LLM product."), + Package("aisearch", "AISearch", "**AI Search Endpoint**: Represents the compute resources to host AI Search indexes. AIP-conformant replacement for the legacy VectorSearchEndpoints API; functionally equivalent."), Package("apps", "", ""), Package("billing", "", ""), - Package("bundle", "Bundle", "Service for managing bundle deployment metadata."), + Package("bundledeployments", "Bundle Deployments", "Service for managing bundle deployment metadata."), Package("catalog", "", ""), Package("cleanrooms", "", ""), Package("compute", "", ""), diff --git a/docs/workspace/aisearch/ai_search.rst b/docs/workspace/aisearch/ai_search.rst new file mode 100644 index 000000000..72e240130 --- /dev/null +++ b/docs/workspace/aisearch/ai_search.rst @@ -0,0 +1,229 @@ +``w.ai_search``: AISearch +========================= +.. currentmodule:: databricks.sdk.service.aisearch + +.. py:class:: AiSearchAPI + + **AI Search Endpoint**: Represents the compute resources to host AI Search indexes. AIP-conformant + replacement for the legacy VectorSearchEndpoints API; functionally equivalent. + + .. py:method:: create_endpoint(parent: str, endpoint: Endpoint [, endpoint_id: Optional[str]]) -> Endpoint + + Create a new AI Search endpoint. + + :param parent: str + The Workspace where this Endpoint will be created. Format: `workspaces/{workspace_id}` + :param endpoint: :class:`Endpoint` + The Endpoint resource to create. Fields other than `endpoint.name` carry the desired configuration; + `endpoint.name` is server-assigned from `parent` and `endpoint_id`. + :param endpoint_id: str (optional) + The user-supplied short name for the Endpoint, per AIP-133. The server composes the full + `Endpoint.name` as `{parent}/endpoints/{endpoint_id}`. AIP-133 does not list `endpoint_id` as a + fields-may-be-required entry, so we annotate it OPTIONAL on the wire; the server still rejects empty + values with INVALID_PARAMETER_VALUE. + + :returns: :class:`Endpoint` + + + .. py:method:: create_index(parent: str, index: Index [, index_id: Optional[str]]) -> Index + + Create a new AI Search index. + + :param parent: str + The Endpoint where this Index will be created. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}` + :param index: :class:`Index` + The Index resource to create. Fields other than `index.name` carry the desired configuration; + `index.name` is server-assigned from `parent` and `index_id`. + :param index_id: str (optional) + The user-supplied Unity Catalog table name for the Index, per AIP-133. The server composes the full + `Index.name` as `{parent}/indexes/{index_id}`. AIP-133 does not list `index_id` as a + fields-may-be-required entry, so we annotate it OPTIONAL on the wire; the server still rejects empty + values with INVALID_PARAMETER_VALUE. + + :returns: :class:`Index` + + + .. py:method:: delete_endpoint(name: str) + + Delete an AI Search endpoint. + + :param name: str + Full resource name of the endpoint to delete. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}` + + + + + .. py:method:: delete_index(name: str) + + Delete an AI Search index. + + :param name: str + Full resource name of the index to delete. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + + + + + .. py:method:: get_endpoint(name: str) -> Endpoint + + Get details for a single AI Search endpoint. + + :param name: str + Full resource name of the endpoint. Format: `workspaces/{workspace_id}/endpoints/{endpoint_id}` + + :returns: :class:`Endpoint` + + + .. py:method:: get_index(name: str) -> Index + + Get details for a single AI Search index. + + :param name: str + Full resource name of the index. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + + :returns: :class:`Index` + + + .. py:method:: list_endpoints(parent: str [, page_size: Optional[int], page_token: Optional[str]]) -> Iterator[Endpoint] + + List AI Search endpoints in a workspace. + + :param parent: str + The Workspace that owns this collection of endpoints. Format: `workspaces/{workspace_id}` + :param page_size: int (optional) + Best-effort upper bound on the number of results to return. Honored as an upper bound by the shim: + `page_size` only narrows the legacy backend's response, never widens it, so the practical cap is + `min(page_size, legacy_fixed_page_size)`. + :param page_token: str (optional) + Page token from a previous response. If not provided, returns the first page. + + :returns: Iterator over :class:`Endpoint` + + + .. py:method:: list_indexes(parent: str [, page_size: Optional[int], page_token: Optional[str]]) -> Iterator[Index] + + List AI Search indexes on an endpoint. + + :param parent: str + The Endpoint that owns this collection of indexes. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}` + :param page_size: int (optional) + Best-effort upper bound on the number of results to return. Honored as an upper bound by the shim: + `page_size` only narrows the legacy backend's response, never widens it, so the practical cap is + `min(page_size, legacy_fixed_page_size)`. + :param page_token: str (optional) + Page token from a previous response. If not provided, returns the first page. + + :returns: Iterator over :class:`Index` + + + .. py:method:: query_index(name: str, columns: List[str] [, columns_to_rerank: Optional[List[str]], facets: Optional[List[str]], filters_json: Optional[str], max_results: Optional[int], query_columns: Optional[List[str]], query_text: Optional[str], query_type: Optional[str], query_vector: Optional[List[float]], reranker: Optional[RerankerConfig], score_threshold: Optional[float], sort_columns: Optional[List[str]]]) -> QueryIndexResponse + + Query (search) an AI Search index. Read-only, so a read-scoped token may invoke it. + + :param name: str + Full resource name of the index to query. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + :param columns: List[str] + Column names to include in each result row. + :param columns_to_rerank: List[str] (optional) + Columns whose values are sent to the reranker. + :param facets: List[str] (optional) + Facets to compute over the matched results (e.g. `"category TOP 5"`). + :param filters_json: str (optional) + JSON string describing query filters (e.g. `{"id >": 5}`). + :param max_results: int (optional) + Maximum number of results to return (the legacy `num_results`). Defaults to 10. + :param query_columns: List[str] (optional) + Text columns to search for `query_text`. When empty, all text columns are searched. + :param query_text: str (optional) + Query text. Required for Delta Sync indexes that compute embeddings from a model endpoint. + :param query_type: str (optional) + Query type: `ANN`, `HYBRID`, or `FULL_TEXT`. Defaults to `ANN`. + :param query_vector: List[float] (optional) + Query vector. Required for Direct Access indexes and Delta Sync indexes with self-managed vectors. + :param reranker: :class:`RerankerConfig` (optional) + If set, results are reranked before being returned. + :param score_threshold: float (optional) + Score threshold for the approximate nearest-neighbor search. Defaults to 0.0. + :param sort_columns: List[str] (optional) + Sort clauses, e.g. `["rating DESC", "price ASC"]`. Overrides relevance ordering. + + :returns: :class:`QueryIndexResponse` + + + .. py:method:: remove_data(name: str, primary_keys: List[str]) -> RemoveDataResponse + + Remove rows by primary key from a Direct Access AI Search index. + + :param name: str + Full resource name of the index. Must be a Direct Access index. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + :param primary_keys: List[str] + Primary keys of the rows to remove. + + :returns: :class:`RemoveDataResponse` + + + .. py:method:: scan_index(name: str [, page_size: Optional[int], page_token: Optional[str]]) -> ScanIndexResponse + + Scan (paginate over) the rows of an AI Search index. + + :param name: str + Full resource name of the index to scan. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + :param page_size: int (optional) + Maximum number of rows to return in this page. + :param page_token: str (optional) + Page token from a previous response; if unset, scanning starts from the beginning. + + :returns: :class:`ScanIndexResponse` + + + .. py:method:: sync_index(name: str) -> SyncIndexResponse + + Synchronize a Delta Sync AI Search index with its source Delta table. Applies only to Delta Sync + indexes; Direct Access indexes are written via the data-plane upsert path. + + :param name: str + Full resource name of the index to synchronize. Must be a Delta Sync index. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + + :returns: :class:`SyncIndexResponse` + + + .. py:method:: update_endpoint(name: str, endpoint: Endpoint, update_mask: FieldMask) -> Endpoint + + Update an existing AI Search endpoint. Multi-bucket masks are supported and dispatched in + deterministic bucket order: budget policy, custom tags, throughput, then scaling/replicas. Per-bucket + dispatch is not atomic across buckets — if a later bucket fails, earlier buckets may already have + been applied. + + :param name: str + Name of the AI Search endpoint. Server-assigned full resource path + (`workspaces/{workspace}/endpoints/{endpoint}`) on output. On create, the user-supplied short name + is conveyed via `CreateEndpointRequest.endpoint_id`; the server composes the full `name` and returns + it on the response. + :param endpoint: :class:`Endpoint` + The Endpoint resource to update. `endpoint.name` carries the full resource path. + :param update_mask: FieldMask + The list of fields to update. + + :returns: :class:`Endpoint` + + + .. py:method:: upsert_data(name: str, inputs_json: str) -> UpsertDataResponse + + Upsert rows into a Direct Access AI Search index. + + :param name: str + Full resource name of the index. Must be a Direct Access index. Format: + `workspaces/{workspace_id}/endpoints/{endpoint_id}/indexes/{index_id}` + :param inputs_json: str + JSON document describing the rows to upsert. + + :returns: :class:`UpsertDataResponse` + \ No newline at end of file diff --git a/docs/workspace/aisearch/index.rst b/docs/workspace/aisearch/index.rst new file mode 100644 index 000000000..82902a527 --- /dev/null +++ b/docs/workspace/aisearch/index.rst @@ -0,0 +1,10 @@ + +AISearch +======== + +**AI Search Endpoint**: Represents the compute resources to host AI Search indexes. AIP-conformant replacement for the legacy VectorSearchEndpoints API; functionally equivalent. + +.. toctree:: + :maxdepth: 1 + + ai_search \ No newline at end of file diff --git a/docs/workspace/bundledeployments/bundle_deployments.rst b/docs/workspace/bundledeployments/bundle_deployments.rst new file mode 100644 index 000000000..90b06a38f --- /dev/null +++ b/docs/workspace/bundledeployments/bundle_deployments.rst @@ -0,0 +1,219 @@ +``w.bundle_deployments``: BundleDeployments.v1 +============================================== +.. currentmodule:: databricks.sdk.service.bundledeployments + +.. py:class:: BundleDeploymentsAPI + + Service for managing bundle deployment metadata. + + .. py:method:: complete_version(name: str, completion_reason: VersionComplete [, force: Optional[bool]]) -> Version + + Marks a version as complete and releases the deployment lock. + + The server atomically: 1. Sets the version status to the provided terminal status. 2. Sets + `complete_time` to the current server timestamp. 3. Releases the lock on the parent deployment. 4. + Updates the parent deployment's `status` and `last_version_id`. + + :param name: str + The name of the version to complete. Format: deployments/{deployment_id}/versions/{version_id} + :param completion_reason: :class:`VersionComplete` + The reason for completing the version. Must be a terminal reason: VERSION_COMPLETE_SUCCESS, + VERSION_COMPLETE_FAILURE, or VERSION_COMPLETE_FORCE_ABORT. + :param force: bool (optional) + If true, force-completes the version even if the caller is not the original creator. The + completion_reason must be VERSION_COMPLETE_FORCE_ABORT when force is true. + + :returns: :class:`Version` + + + .. py:method:: create_deployment(deployment: Deployment, deployment_id: str) -> Deployment + + Creates a new deployment in the workspace. + + The caller must provide a `deployment_id` which becomes the final component of the deployment's + resource name. If a deployment with the same ID already exists, the server returns `ALREADY_EXISTS`. + + :param deployment: :class:`Deployment` + The deployment to create. + :param deployment_id: str + The ID to use for the deployment, which will become the final component of the deployment's resource + name (i.e. `deployments/{deployment_id}`). + + :returns: :class:`Deployment` + + + .. py:method:: create_operation(parent: str, operation: Operation, resource_key: str) -> Operation + + Creates a resource operation under a version. + + The caller must provide a `resource_key` which becomes the final component of the operation's name. If + an operation with the same key already exists under the version, the server returns `ALREADY_EXISTS`. + + On success the server also updates the corresponding deployment-level Resource (creating it if this is + the first operation for that resource_key, or removing it if action_type is DELETE). + + :param parent: str + The parent version where this operation will be recorded. Format: + deployments/{deployment_id}/versions/{version_id} + :param operation: :class:`Operation` + The resource operation to create. + :param resource_key: str + The key identifying the resource this operation applies to. Becomes the final component of the + operation's name. + + :returns: :class:`Operation` + + + .. py:method:: create_version(parent: str, version: Version, version_id: str) -> Version + + Creates a new version under a deployment. + + Creating a version acquires an exclusive lock on the deployment, preventing concurrent deploys. The + caller provides a `version_id` which the server validates equals `last_version_id + 1` on the + deployment. + + :param parent: str + The parent deployment where this version will be created. Format: deployments/{deployment_id} + :param version: :class:`Version` + The version to create. + :param version_id: str + The version ID the caller expects to create. The server validates this equals `last_version_id + 1` + on the deployment. If it doesn't match, the server returns `ABORTED`. + + :returns: :class:`Version` + + + .. py:method:: delete_deployment(name: str) + + Deletes a deployment. + + The deployment is marked as deleted. It and all its children (versions and their operations) will be + permanently deleted after the retention policy expires. If the deployment has an in-progress version, + the server returns `RESOURCE_CONFLICT`. + + :param name: str + Resource name of the deployment to delete. Format: deployments/{deployment_id} + + + + + .. py:method:: get_deployment(name: str) -> Deployment + + Retrieves a deployment by its resource name. + + :param name: str + Resource name of the deployment to retrieve. Format: deployments/{deployment_id} + + :returns: :class:`Deployment` + + + .. py:method:: get_operation(name: str) -> Operation + + Retrieves a resource operation by its resource name. + + :param name: str + The name of the resource operation to retrieve. Format: + deployments/{deployment_id}/versions/{version_id}/operations/{resource_key} + + :returns: :class:`Operation` + + + .. py:method:: get_resource(name: str) -> Resource + + Retrieves a deployment resource by its resource name. + + :param name: str + The name of the resource to retrieve. Format: deployments/{deployment_id}/resources/{resource_key} + + :returns: :class:`Resource` + + + .. py:method:: get_version(name: str) -> Version + + Retrieves a version by its resource name. + + :param name: str + The name of the version to retrieve. Format: deployments/{deployment_id}/versions/{version_id} + + :returns: :class:`Version` + + + .. py:method:: heartbeat(name: str) -> HeartbeatResponse + + Sends a heartbeat to renew the lock held by a version. + + The server validates that the version is the active (non-terminal) version on the parent deployment + and resets the lock expiry. If the lock has already expired or the version is no longer active, the + server returns `ABORTED`. + + :param name: str + The version whose lock to renew. Format: deployments/{deployment_id}/versions/{version_id} + + :returns: :class:`HeartbeatResponse` + + + .. py:method:: list_deployments( [, page_size: Optional[int], page_token: Optional[str]]) -> Iterator[Deployment] + + Lists deployments in the workspace. + + :param page_size: int (optional) + The maximum number of deployments to return. The service may return fewer than this value. If + unspecified, at most 50 deployments will be returned. The maximum value is 1000; values above 1000 + will be coerced to 1000. + :param page_token: str (optional) + A page token, received from a previous `ListDeployments` call. Provide this to retrieve the + subsequent page. + + :returns: Iterator over :class:`Deployment` + + + .. py:method:: list_operations(parent: str [, page_size: Optional[int], page_token: Optional[str]]) -> Iterator[Operation] + + Lists resource operations under a version. + + :param parent: str + The parent version. Format: deployments/{deployment_id}/versions/{version_id} + :param page_size: int (optional) + The maximum number of operations to return. The service may return fewer than this value. If + unspecified, at most 50 operations will be returned. The maximum value is 1000; values above 1000 + will be coerced to 1000. + :param page_token: str (optional) + A page token, received from a previous `ListOperations` call. Provide this to retrieve the + subsequent page. + + :returns: Iterator over :class:`Operation` + + + .. py:method:: list_resources(parent: str [, page_size: Optional[int], page_token: Optional[str]]) -> Iterator[Resource] + + Lists resources under a deployment. + + :param parent: str + The parent deployment. Format: deployments/{deployment_id} + :param page_size: int (optional) + The maximum number of resources to return. The service may return fewer than this value. If + unspecified, at most 50 resources will be returned. The maximum value is 1000; values above 1000 + will be coerced to 1000. + :param page_token: str (optional) + A page token, received from a previous `ListResources` call. Provide this to retrieve the subsequent + page. + + :returns: Iterator over :class:`Resource` + + + .. py:method:: list_versions(parent: str [, page_size: Optional[int], page_token: Optional[str]]) -> Iterator[Version] + + Lists versions under a deployment, ordered by version_id descending (most recent first). + + :param parent: str + The parent deployment. Format: deployments/{deployment_id} + :param page_size: int (optional) + The maximum number of versions to return. The service may return fewer than this value. If + unspecified, at most 50 versions will be returned. The maximum value is 1000; values above 1000 will + be coerced to 1000. + :param page_token: str (optional) + A page token, received from a previous `ListVersions` call. Provide this to retrieve the subsequent + page. + + :returns: Iterator over :class:`Version` + \ No newline at end of file diff --git a/docs/workspace/bundledeployments/index.rst b/docs/workspace/bundledeployments/index.rst new file mode 100644 index 000000000..121f2ec56 --- /dev/null +++ b/docs/workspace/bundledeployments/index.rst @@ -0,0 +1,10 @@ + +Bundle Deployments +================== + +Service for managing bundle deployment metadata. + +.. toctree:: + :maxdepth: 1 + + bundle_deployments \ No newline at end of file diff --git a/docs/workspace/catalog/catalogs.rst b/docs/workspace/catalog/catalogs.rst index 22de6e65c..ae77bc760 100755 --- a/docs/workspace/catalog/catalogs.rst +++ b/docs/workspace/catalog/catalogs.rst @@ -11,7 +11,7 @@ the workspaces in a Databricks account. Users in different workspaces can share access to the same data, depending on privileges granted centrally in Unity Catalog. - .. py:method:: create(name: str [, comment: Optional[str], connection_name: Optional[str], managed_encryption_settings: Optional[EncryptionSettings], options: Optional[Dict[str, str]], properties: Optional[Dict[str, str]], provider_name: Optional[str], share_name: Optional[str], storage_root: Optional[str]]) -> CatalogInfo + .. py:method:: create(name: str [, comment: Optional[str], connection_name: Optional[str], custom_max_retention_hours: Optional[int], managed_encryption_settings: Optional[EncryptionSettings], options: Optional[Dict[str, str]], properties: Optional[Dict[str, str]], provider_name: Optional[str], share_name: Optional[str], storage_root: Optional[str]]) -> CatalogInfo Usage: @@ -24,10 +24,10 @@ w = WorkspaceClient() - created_catalog = w.catalogs.create(name=f"sdk-{time.time_ns()}") + new_catalog = w.catalogs.create(name=f"sdk-{time.time_ns()}") # cleanup - w.catalogs.delete(name=created_catalog.name, force=True) + w.catalogs.delete(name=new_catalog.name, force=True) Creates a new catalog instance in the parent metastore if the caller is a metastore admin or has the **CREATE_CATALOG** privilege. @@ -38,6 +38,8 @@ User-provided free-form text description. :param connection_name: str (optional) The name of the connection to an external data source. + :param custom_max_retention_hours: int (optional) + Custom maximum retention period in hours for the catalog :param managed_encryption_settings: :class:`EncryptionSettings` (optional) Control CMK encryption for managed catalog data :param options: Dict[str,str] (optional) @@ -147,7 +149,7 @@ :returns: Iterator over :class:`CatalogInfo` - .. py:method:: update(name: str [, comment: Optional[str], enable_predictive_optimization: Optional[EnablePredictiveOptimization], isolation_mode: Optional[CatalogIsolationMode], managed_encryption_settings: Optional[EncryptionSettings], new_name: Optional[str], options: Optional[Dict[str, str]], owner: Optional[str], properties: Optional[Dict[str, str]]]) -> CatalogInfo + .. py:method:: update(name: str [, comment: Optional[str], custom_max_retention_hours: Optional[int], enable_predictive_optimization: Optional[EnablePredictiveOptimization], isolation_mode: Optional[CatalogIsolationMode], managed_encryption_settings: Optional[EncryptionSettings], new_name: Optional[str], options: Optional[Dict[str, str]], owner: Optional[str], properties: Optional[Dict[str, str]]]) -> CatalogInfo Usage: @@ -175,6 +177,8 @@ The name of the catalog. :param comment: str (optional) User-provided free-form text description. + :param custom_max_retention_hours: int (optional) + Custom maximum retention period in hours for the catalog :param enable_predictive_optimization: :class:`EnablePredictiveOptimization` (optional) Whether predictive optimization should be enabled for this object and objects under it. :param isolation_mode: :class:`CatalogIsolationMode` (optional) diff --git a/docs/workspace/catalog/connections.rst b/docs/workspace/catalog/connections.rst index acfeecd53..0674d749d 100644 --- a/docs/workspace/catalog/connections.rst +++ b/docs/workspace/catalog/connections.rst @@ -13,7 +13,7 @@ objects based on cloud storage. Users may create different types of connections with each connection having a unique set of configuration options to support credential management and other settings. - .. py:method:: create(name: str, connection_type: ConnectionType, options: Dict[str, str] [, comment: Optional[str], properties: Optional[Dict[str, str]], read_only: Optional[bool]]) -> ConnectionInfo + .. py:method:: create(name: str, connection_type: ConnectionType, options: Dict[str, str] [, comment: Optional[str], environment_settings: Optional[EnvironmentSettings], properties: Optional[Dict[str, str]], read_only: Optional[bool]]) -> ConnectionInfo Usage: @@ -54,6 +54,8 @@ A map of key-value properties attached to the securable. :param comment: str (optional) User-provided free-form text description. + :param environment_settings: :class:`EnvironmentSettings` (optional) + [Create,Update:OPT] Connection environment settings as EnvironmentSettings object. :param properties: Dict[str,str] (optional) A map of key-value properties attached to the securable. :param read_only: bool (optional) @@ -153,7 +155,7 @@ :returns: Iterator over :class:`ConnectionInfo` - .. py:method:: update(name: str, options: Dict[str, str] [, new_name: Optional[str], owner: Optional[str]]) -> ConnectionInfo + .. py:method:: update(name: str, options: Dict[str, str] [, environment_settings: Optional[EnvironmentSettings], new_name: Optional[str], owner: Optional[str]]) -> ConnectionInfo Usage: @@ -196,6 +198,8 @@ Name of the connection. :param options: Dict[str,str] A map of key-value properties attached to the securable. + :param environment_settings: :class:`EnvironmentSettings` (optional) + [Create,Update:OPT] Connection environment settings as EnvironmentSettings object. :param new_name: str (optional) New name for the connection. :param owner: str (optional) diff --git a/docs/workspace/catalog/external_locations.rst b/docs/workspace/catalog/external_locations.rst index 0578df8b4..41716522a 100755 --- a/docs/workspace/catalog/external_locations.rst +++ b/docs/workspace/catalog/external_locations.rst @@ -115,20 +115,20 @@ credential = w.storage_credentials.create( name=f"sdk-{time.time_ns()}", - aws_iam_role=catalog.AwsIamRoleRequest(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), ) created = w.external_locations.create( name=f"sdk-{time.time_ns()}", credential_name=credential.name, - url="s3://%s/%s" % (os.environ["TEST_BUCKET"], f"sdk-{time.time_ns()}"), + url=f's3://{os.environ["TEST_BUCKET"]}/sdk-{time.time_ns()}', ) - _ = w.external_locations.get(name=created.name) + _ = w.external_locations.get(get=created.name) # cleanup - w.storage_credentials.delete(name=credential.name) - w.external_locations.delete(name=created.name) + w.storage_credentials.delete(delete=credential.name) + w.external_locations.delete(delete=created.name) Gets an external location from the metastore. The caller must be either a metastore admin, the owner of the external location, or a user that has some privilege on the external location. @@ -200,24 +200,24 @@ credential = w.storage_credentials.create( name=f"sdk-{time.time_ns()}", - aws_iam_role=catalog.AwsIamRoleRequest(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), ) created = w.external_locations.create( name=f"sdk-{time.time_ns()}", credential_name=credential.name, - url="s3://%s/%s" % (os.environ["TEST_BUCKET"], f"sdk-{time.time_ns()}"), + url=f's3://{os.environ["TEST_BUCKET"]}/sdk-{time.time_ns()}', ) _ = w.external_locations.update( name=created.name, credential_name=credential.name, - url="s3://%s/%s" % (os.environ["TEST_BUCKET"], f"sdk-{time.time_ns()}"), + url=f's3://{os.environ["TEST_BUCKET"]}/sdk-{time.time_ns()}', ) # cleanup - w.storage_credentials.delete(name=credential.name) - w.external_locations.delete(name=created.name) + w.storage_credentials.delete(delete=credential.name) + w.external_locations.delete(delete=created.name) Updates an external location in the metastore. The caller must be the owner of the external location, or be a metastore admin. In the second case, the admin can only update the name of the external diff --git a/docs/workspace/catalog/schemas.rst b/docs/workspace/catalog/schemas.rst index 719d5a156..89e06ab3a 100755 --- a/docs/workspace/catalog/schemas.rst +++ b/docs/workspace/catalog/schemas.rst @@ -9,7 +9,7 @@ the USE_SCHEMA data permission on the schema and its parent catalog, and they must have the SELECT permission on the table or view. - .. py:method:: create(name: str, catalog_name: str [, comment: Optional[str], properties: Optional[Dict[str, str]], storage_root: Optional[str]]) -> SchemaInfo + .. py:method:: create(name: str, catalog_name: str [, comment: Optional[str], custom_max_retention_hours: Optional[int], properties: Optional[Dict[str, str]], storage_root: Optional[str]]) -> SchemaInfo Usage: @@ -22,13 +22,13 @@ w = WorkspaceClient() - created_catalog = w.catalogs.create(name=f"sdk-{time.time_ns()}") + new_catalog = w.catalogs.create(name=f"sdk-{time.time_ns()}") - created_schema = w.schemas.create(name=f"sdk-{time.time_ns()}", catalog_name=created_catalog.name) + created = w.schemas.create(name=f"sdk-{time.time_ns()}", catalog_name=new_catalog.name) # cleanup - w.catalogs.delete(name=created_catalog.name, force=True) - w.schemas.delete(full_name=created_schema.full_name) + w.catalogs.delete(name=new_catalog.name, force=True) + w.schemas.delete(full_name=created.full_name) Creates a new schema for catalog in the Metastore. The caller must be a metastore admin, or have the **CREATE_SCHEMA** privilege in the parent catalog. @@ -39,6 +39,8 @@ Name of parent catalog. :param comment: str (optional) User-provided free-form text description. + :param custom_max_retention_hours: int (optional) + Custom maximum retention period in hours for the schema. :param properties: Dict[str,str] (optional) A map of key-value properties attached to the securable. :param storage_root: str (optional) @@ -143,7 +145,7 @@ :returns: Iterator over :class:`SchemaInfo` - .. py:method:: update(full_name: str [, comment: Optional[str], enable_predictive_optimization: Optional[EnablePredictiveOptimization], new_name: Optional[str], owner: Optional[str], properties: Optional[Dict[str, str]]]) -> SchemaInfo + .. py:method:: update(full_name: str [, comment: Optional[str], custom_max_retention_hours: Optional[int], enable_predictive_optimization: Optional[EnablePredictiveOptimization], new_name: Optional[str], owner: Optional[str], properties: Optional[Dict[str, str]]]) -> SchemaInfo Usage: @@ -175,6 +177,8 @@ Full name of the schema. :param comment: str (optional) User-provided free-form text description. + :param custom_max_retention_hours: int (optional) + Custom maximum retention period in hours for the schema. :param enable_predictive_optimization: :class:`EnablePredictiveOptimization` (optional) Whether predictive optimization should be enabled for this object and objects under it. :param new_name: str (optional) diff --git a/docs/workspace/catalog/storage_credentials.rst b/docs/workspace/catalog/storage_credentials.rst index c174e87a3..92da2c568 100755 --- a/docs/workspace/catalog/storage_credentials.rst +++ b/docs/workspace/catalog/storage_credentials.rst @@ -30,14 +30,13 @@ w = WorkspaceClient() - storage_credential = w.storage_credentials.create( + created = w.storage_credentials.create( name=f"sdk-{time.time_ns()}", aws_iam_role=catalog.AwsIamRoleRequest(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), - comment="created via SDK", ) # cleanup - w.storage_credentials.delete(name=storage_credential.name) + w.storage_credentials.delete(name=created.name) Creates a new storage credential. @@ -99,13 +98,13 @@ created = w.storage_credentials.create( name=f"sdk-{time.time_ns()}", - aws_iam_role=catalog.AwsIamRoleRequest(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), ) - by_name = w.storage_credentials.get(name=created.name) + by_name = w.storage_credentials.get(get=created.name) # cleanup - w.storage_credentials.delete(name=created.name) + w.storage_credentials.delete(delete=created.name) Gets a storage credential from the metastore. The caller must be a metastore admin, the owner of the storage credential, or have some permission on the storage credential. @@ -124,11 +123,10 @@ .. code-block:: from databricks.sdk import WorkspaceClient - from databricks.sdk.service import catalog w = WorkspaceClient() - all = w.storage_credentials.list(catalog.ListStorageCredentialsRequest()) + all = w.storage_credentials.list() Gets an array of storage credentials (as __StorageCredentialInfo__ objects). The array is limited to only those storage credentials the caller has permission to access. If the caller is a metastore diff --git a/docs/workspace/iam/permissions.rst b/docs/workspace/iam/permissions.rst index 531208e2f..1f836a4b1 100755 --- a/docs/workspace/iam/permissions.rst +++ b/docs/workspace/iam/permissions.rst @@ -44,7 +44,7 @@ obj = w.workspace.get_status(path=notebook_path) - levels = w.permissions.get_permission_levels(request_object_type="notebooks", request_object_id="%d" % (obj.object_id)) + _ = w.permissions.get(request_object_type="notebooks", request_object_id="%d" % (obj.object_id)) Gets the permissions of an object. Objects can inherit permissions from their parent objects or root object. diff --git a/docs/workspace/index.rst b/docs/workspace/index.rst index 3d3e8cfb9..c30775c41 100755 --- a/docs/workspace/index.rst +++ b/docs/workspace/index.rst @@ -8,8 +8,9 @@ These APIs are available from WorkspaceClient :maxdepth: 1 agentbricks/index + aisearch/index apps/index - bundle/index + bundledeployments/index catalog/index cleanrooms/index compute/index diff --git a/docs/workspace/ml/model_registry.rst b/docs/workspace/ml/model_registry.rst index 46c3a4565..c528f4329 100755 --- a/docs/workspace/ml/model_registry.rst +++ b/docs/workspace/ml/model_registry.rst @@ -90,9 +90,7 @@ w = WorkspaceClient() - model = w.model_registry.create_model(name=f"sdk-{time.time_ns()}") - - created = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + created = w.model_registry.create_model(name=f"sdk-{time.time_ns()}") Creates a new registered model with the name specified in the request body. Throws `RESOURCE_ALREADY_EXISTS` if a registered model with the given name exists. @@ -122,7 +120,7 @@ model = w.model_registry.create_model(name=f"sdk-{time.time_ns()}") - created = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + mv = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") Creates a model version. @@ -736,13 +734,14 @@ w = WorkspaceClient() - created = w.model_registry.create_model(name=f"sdk-{time.time_ns()}") + model = w.model_registry.create_model(name=f"sdk-{time.time_ns()}") - model = w.model_registry.get_model(name=created.registered_model.name) + created = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") - w.model_registry.update_model( - name=model.registered_model_databricks.name, + w.model_registry.update_model_version( description=f"sdk-{time.time_ns()}", + name=created.model_version.name, + version=created.model_version.version, ) Updates a registered model. diff --git a/docs/workspace/vectorsearch/vector_search_indexes.rst b/docs/workspace/vectorsearch/vector_search_indexes.rst index ebd31e9d3..2e7439ed4 100755 --- a/docs/workspace/vectorsearch/vector_search_indexes.rst +++ b/docs/workspace/vectorsearch/vector_search_indexes.rst @@ -81,7 +81,7 @@ :returns: Iterator over :class:`MiniVectorIndex` - .. py:method:: query_index(index_name: str, columns: List[str] [, columns_to_rerank: Optional[List[str]], filters_json: Optional[str], num_results: Optional[int], query_text: Optional[str], query_type: Optional[str], query_vector: Optional[List[float]], reranker: Optional[RerankerConfig], score_threshold: Optional[float]]) -> QueryVectorIndexResponse + .. py:method:: query_index(index_name: str, columns: List[str] [, columns_to_rerank: Optional[List[str]], facets: Optional[List[str]], filters_json: Optional[str], num_results: Optional[int], query_columns: Optional[List[str]], query_text: Optional[str], query_type: Optional[str], query_vector: Optional[List[float]], reranker: Optional[RerankerConfig], score_threshold: Optional[float], sort_columns: Optional[List[str]]]) -> QueryVectorIndexResponse Query the specified vector index. @@ -91,6 +91,11 @@ List of column names to include in the response. :param columns_to_rerank: List[str] (optional) Column names used to retrieve data to send to the reranker. + :param facets: List[str] (optional) + Facets to compute over the matched results. Each entry has one of these forms: `""` - top 10 + distinct values by count `" TOP "` - top n distinct values, where n > 0 `" + BUCKETS [[from,to],...]"` - inclusive numeric ranges `TOP` and `BUCKETS` are case-insensitive. A + column may appear at most once. :param filters_json: str (optional) JSON string representing query filters. @@ -101,6 +106,8 @@ 5. - `{"id": 5}`: Filter for id equal to 5. :param num_results: int (optional) Number of results to return. Defaults to 10. + :param query_columns: List[str] (optional) + Text columns to search for `query_text`. When empty, all text columns are searched. :param query_text: str (optional) Query text. Required for Delta Sync Index using model endpoint. :param query_type: str (optional) @@ -116,6 +123,9 @@ more information. :param score_threshold: float (optional) Threshold for the approximate nearest neighbor search. Defaults to 0.0. + :param sort_columns: List[str] (optional) + Sort results by column values instead of the default relevance ordering. Each clause has the form + `" ASC"` or `" DESC"`, for example `["rating DESC", "price ASC"]`. :returns: :class:`QueryVectorIndexResponse` diff --git a/docs/workspace/workspace/repos.rst b/docs/workspace/workspace/repos.rst index a2db2dc45..563330eef 100755 --- a/docs/workspace/workspace/repos.rst +++ b/docs/workspace/workspace/repos.rst @@ -163,7 +163,7 @@ :returns: :class:`RepoPermissions` - .. py:method:: update(repo_id: int [, branch: Optional[str], sparse_checkout: Optional[SparseCheckoutUpdate], tag: Optional[str]]) + .. py:method:: update(repo_id: int [, branch: Optional[str], dangerously_force_discard_all: Optional[bool], sparse_checkout: Optional[SparseCheckoutUpdate], tag: Optional[str]]) Usage: @@ -196,6 +196,17 @@ ID of the Git folder (repo) object in the workspace. :param branch: str (optional) Branch that the local version of the repo is checked out to. + :param dangerously_force_discard_all: bool (optional) + WARNING: DESTRUCTIVE AND IRREVERSIBLE. If true, permanently deletes ALL uncommitted changes in the + Git folder — staged, unstaged, and untracked files — before updating. Lost data CANNOT be + recovered. + + NEVER use this on Git folders where users author or edit files. This flag is intended ONLY for + automated jobs that treat the Git folder as a read-only mirror of a remote branch and need to + force-sync it. If any user has uncommitted work in the Git folder, that work will be permanently + destroyed without warning. + + Local commits that have been made but not yet pushed to the remote are preserved. :param sparse_checkout: :class:`SparseCheckoutUpdate` (optional) If specified, update the sparse checkout settings. The update will fail if sparse checkout is not enabled for the repo. diff --git a/docs/workspace/workspace/workspace.rst b/docs/workspace/workspace/workspace.rst index 23362f2fd..49a62632b 100755 --- a/docs/workspace/workspace/workspace.rst +++ b/docs/workspace/workspace/workspace.rst @@ -150,9 +150,9 @@ w = WorkspaceClient() - notebook = f"/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}" + notebook_path = f"/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}" - get_status_response = w.workspace.get_status(path=notebook) + obj = w.workspace.get_status(path=notebook_path) Gets the status of an object or a directory. If `path` does not exist, this call returns an error `RESOURCE_DOES_NOT_EXIST`. @@ -181,18 +181,11 @@ notebook_path = f"/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}" w.workspace.import_( - path=notebook_path, - overwrite=true_, + content=base64.b64encode(("CREATE LIVE TABLE dlt_sample AS SELECT 1").encode()).decode(), format=workspace.ImportFormat.SOURCE, - language=workspace.Language.PYTHON, - content=base64.b64encode( - ( - """import time - time.sleep(10) - dbutils.notebook.exit('hello') - """ - ).encode() - ).decode(), + language=workspace.Language.SQL, + overwrite=true_, + path=notebook_path, ) Imports a workspace object (for example, a notebook or file) or the contents of an entire directory. @@ -236,14 +229,16 @@ .. code-block:: + import os + import time + from databricks.sdk import WorkspaceClient w = WorkspaceClient() - names = [] - for i in w.workspace.list(f"/Users/{w.current_user.me().user_name}", recursive=True): - names.append(i.path) - assert len(names) > 0 + notebook = f"/Users/{w.current_user.me().user_name}/sdk-{time.time_ns()}" + + objects = w.workspace.list(path=os.path.dirname(notebook)) List workspace objects diff --git a/tagging.py b/tagging.py deleted file mode 100755 index a3897bbee..000000000 --- a/tagging.py +++ /dev/null @@ -1,1057 +0,0 @@ -#!/usr/bin/env python3 -# /// script -# dependencies = ["PyGithub>=2,<3", "pyjwt<2.12.0", "charset-normalizer<3.4.6"] -# /// - -import os -import re -import argparse -from typing import Optional, List, Callable, Dict -from dataclasses import dataclass, replace -import subprocess -import time -import json -from github import Github, Repository, InputGitTreeElement, InputGitAuthor -from datetime import datetime, timezone - -NEXT_CHANGELOG_FILE_NAME = "NEXT_CHANGELOG.md" -CHANGELOG_FILE_NAME = "CHANGELOG.md" -PACKAGE_FILE_NAME = ".package.json" -CODEGEN_FILE_NAME = ".codegen.json" -CREATED_TAGS_FILE_NAME = "created_tags.json" -""" -This script tags the release of the SDKs using a combination of the GitHub API and Git commands. -It reads the local repository to determine necessary changes, updates changelogs, and creates tags. - -### How it Works: -- It does **not** modify the local repository directly. -- Instead of committing and pushing changes locally, it uses the **GitHub API** to create commits and tags. -""" - - -@dataclass(frozen=True) -class Version: - """ - A semver 2.0.0-compliant version (https://semver.org). - - Mirrors the API of the `semver` PyPI package so this implementation can be - swapped for that library if it is ever added to the wheelhouse. Supports - parsing, stringification, and the two bumps we need: minor (for stable - releases) and prerelease (for release trains). - """ - - # Permissive pattern for locating a semver version string inside larger - # text (e.g. a changelog header). Callers use it in f-strings; strict - # validation happens via Version.parse. - PATTERN = r"\d+\.\d+\.\d+(?:-[0-9A-Za-z.-]+)?(?:\+[0-9A-Za-z.-]+)?" - - # Strict anchored regex per https://semver.org. Rejects leading zeros in - # numeric identifiers and invalid pre-release/build identifier charsets. - _PARSE_REGEX = re.compile( - r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)" - r"(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)" - r"(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?" - r"(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$" - ) - - major: int - minor: int - patch: int - prerelease: str = "" - build: str = "" - - @classmethod - def parse(cls, text: str) -> "Version": - """Parse a semver string, raising ValueError on malformed input.""" - match = cls._PARSE_REGEX.match(text) - if not match: - raise ValueError(f"Invalid semver version: {text!r}") - major, minor, patch, prerelease, build = match.groups() - return cls( - major=int(major), - minor=int(minor), - patch=int(patch), - prerelease=prerelease or "", - build=build or "", - ) - - def __str__(self) -> str: - result = f"{self.major}.{self.minor}.{self.patch}" - if self.prerelease: - result += f"-{self.prerelease}" - if self.build: - result += f"+{self.build}" - return result - - def bump_minor(self) -> "Version": - """ - Bump the minor version and reset patch. - - Per semver item 9, a pre-release version has lower precedence than - the same MAJOR.MINOR.PATCH, so bumping to a new minor drops any - pre-release and build metadata. - """ - return Version(major=self.major, minor=self.minor + 1, patch=0) - - def bump_prerelease(self) -> "Version": - """ - Increment the rightmost numeric identifier in the pre-release. - - Matches the npm `prerelease` bump semantics: - 0.0.0-alpha.1 -> 0.0.0-alpha.2 - 0.0.0-alpha -> 0.0.0-alpha.1 - 0.0.0-rc.1.2 -> 0.0.0-rc.1.3 - - Raises ValueError if the version has no pre-release to bump. - Build metadata is dropped since it does not affect precedence. - """ - if not self.prerelease: - raise ValueError(f"Cannot bump prerelease of {self}: no pre-release component") - parts = self.prerelease.split(".") - for i in range(len(parts) - 1, -1, -1): - if parts[i].isdigit(): - parts[i] = str(int(parts[i]) + 1) - return replace(self, prerelease=".".join(parts), build="") - # No numeric identifier exists; append ".1" to start a counter. - return replace(self, prerelease=f"{self.prerelease}.1", build="") - - def next_release_version(self) -> "Version": - """ - Default next version for the changelog after this one is released. - - If on a pre-release track, stay on it by bumping the pre-release - identifier (npm convention). Otherwise, bump the minor version, - the script's historical default for stable releases. Teams can - override the default in the release PR. - """ - if self.prerelease: - return self.bump_prerelease() - return self.bump_minor() - - -def _read_local_head_sha() -> str: - """ - Returns the SHA of the local working tree's HEAD via ``git rev-parse``. - """ - return subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip() - - -class MainAdvancedError(Exception): - """ - Raised when ``origin/main`` has advanced since the workflow's - checkout — i.e., another commit landed during this run. The local - working tree is now stale, so any commit produced from it would - silently revert whatever the concurrent push added. - """ - - -# GitHub does not support signing commits for GitHub Apps directly. -# This class replaces usages for git commands such as "git add", "git commit", and "git push". -@dataclass -class GitHubRepo: - def __init__(self, repo: Repository): - self.repo = repo - self.changed_files: list[InputGitTreeElement] = [] - self.ref = "heads/main" - # Anchor ``self.sha`` to the **local checkout** rather than a - # live API call. ``actions/checkout`` populates the working tree - # at this SHA, and every subsequent file read in this run is - # against that tree; the API HEAD is only relevant when we go - # to push. - self.sha = _read_local_head_sha() - - # Replaces "git add file" - def add_file(self, loc: str, content: str): - local_path = os.path.relpath(loc, os.getcwd()) - print(f"Adding file {local_path}") - blob = self.repo.create_git_blob(content=content, encoding="utf-8") - element = InputGitTreeElement(path=local_path, mode="100644", type="blob", sha=blob.sha) - self.changed_files.append(element) - - # Replaces "git commit && git push" - def commit_and_push(self, message: str): - head_ref = self.repo.get_git_ref(self.ref) - if head_ref.object.sha != self.sha: - raise MainAdvancedError( - f"origin/main advanced from {self.sha} to {head_ref.object.sha} " - f"during this run. Local working tree is stale; aborting before " - f"the commit would silently revert the new content. Re-run the " - f"workflow." - ) - base_tree = self.repo.get_git_tree(sha=head_ref.object.sha) - new_tree = self.repo.create_git_tree(self.changed_files, base_tree) - parent_commit = self.repo.get_git_commit(head_ref.object.sha) - - new_commit = self.repo.create_git_commit(message=message, tree=new_tree, parents=[parent_commit]) - # Update branch reference. - head_ref.edit(new_commit.sha) - self.sha = new_commit.sha - - def reset(self, sha: Optional[str] = None): - self.changed_files = [] - if sha: - self.sha = sha - else: - self.sha = _read_local_head_sha() - - def tag(self, tag_name: str, tag_message: str): - # Create a tag pointing to the new commit - # The email MUST be the GitHub Apps email. - # Otherwise, the tag will not be verified. - tagger = InputGitAuthor( - name="Databricks SDK Release Bot", email="DECO-SDK-Tagging[bot]@users.noreply.github.com" - ) - - tag = self.repo.create_git_tag(tag=tag_name, message=tag_message, object=self.sha, type="commit", tagger=tagger) - # Create a Git ref (the actual reference for the tag in the repo) - self.repo.create_git_ref(ref=f"refs/tags/{tag_name}", sha=tag.sha) - - -gh: Optional[GitHubRepo] = None - - -@dataclass -class Package: - """ - Represents a package in the repository. - :name: The package name. - :path: The path to the package relative to the repository root. - """ - - name: str - path: str - - -@dataclass -class TagInfo: - """ - Represents all changes on a release. - :package: package info. - :version: release version for the package. Format: v.. - :content: changes for the release, as they appear in the changelog. - When written to CHANGELOG.md, the current date (YYYY-MM-DD) is automatically added. - - Example (from NEXT_CHANGELOG.md): - - ## Release v0.56.0 - - ### New Features and Improvements - * Feature - * Some improvement - - ### Bug Fixes - * Bug fix - - ### Documentation - * Doc Changes - - ### Internal Changes - * More Changes - - ### API Changes - * Add new Service - - Note: When written to CHANGELOG.md, the header becomes: ## Release v0.56.0 (YYYY-MM-DD) - - """ - - package: Package - version: str - content: str - - def tag_name(self) -> str: - return f"{self.package.name}/v{self.version}" if self.package.name else f"v{self.version}" - - -def get_package_name(package_path: str) -> str: - """ - Returns the package name from the package path. - The name is found inside the .package.json file: - { - "package": "package_name" - } - """ - filepath = os.path.join(os.getcwd(), package_path, PACKAGE_FILE_NAME) - with open(filepath, "r") as file: - content = json.load(file) - if "package" in content: - return content["package"] - # Legacy SDKs have no packages. - return "" - - -def stage_version_updates(tag_infos: List[TagInfo], packages: List[Package]) -> None: - """ - Stages all version-related edits for the release in a single pass over - every package the workspace already opts in via ``.package.json``. - """ - - # Load patterns from '.codegen.json' at the top level of the repository. - package_file_path = os.path.join(os.getcwd(), CODEGEN_FILE_NAME) - with open(package_file_path, "r") as file: - codegen = json.load(file) - - version_patterns = codegen.get("version", {}) - dep_patterns = codegen.get("dependency_pattern", {}) - name_template = codegen.get("dependency_name_template", "") - - if not version_patterns and not dep_patterns: - print("Neither `version` nor `dependency_pattern` found in .codegen.json. Nothing to update.") - return - - bumped_by_dir: Dict[str, TagInfo] = {info.package.path: info for info in tag_infos} - new_dep_versions = compute_dependency_rewrites(tag_infos, name_template) - - files = sorted(set(version_patterns.keys()) | set(dep_patterns.keys())) - - for pkg in packages: - for filename in files: - loc = os.path.join(os.getcwd(), pkg.path, filename) - - with open(loc, "r") as file: - content = file.read() - original = content - - # Own version (only when this package is being released and the - # file has a version pattern declared). - info = bumped_by_dir.get(pkg.path) - if info is not None and filename in version_patterns: - pattern = version_patterns[filename] - previous_version = pattern.replace("$VERSION", Version.PATTERN) - new_version = pattern.replace("$VERSION", info.version) - content = re.sub(previous_version, new_version, content) - - # Sibling dependency rewrites (only when the file has a - # dependency pattern and there is at least one bumped sibling). - if filename in dep_patterns and new_dep_versions: - content = rewrite_dependencies(content, dep_patterns[filename], new_dep_versions) - - if content != original: - gh.add_file(loc, content) - - -def compute_dependency_rewrites( - tag_infos: List[TagInfo], - name_template: str, -) -> Dict[str, str]: - """ - Returns a map of dependency-name to the new semver string for each - bumped package. - """ - if not name_template: - return {} - rewrites: Dict[str, str] = {} - for info in tag_infos: - # Skip legacy releases that don't have a per-package name; their - # tag_info has an empty package.name and they can't be referenced - # as a sibling dep anyway. - if not info.package.name: - continue - dep_name = name_template.replace("$PACKAGE", info.package.name) - rewrites[dep_name] = info.version - return rewrites - - -def rewrite_dependencies(content: str, pattern: str, new_versions: Dict[str, str]) -> str: - """ - Apply ``pattern`` (with ``$DEPENDENCY`` and ``$VERSION`` placeholders) to - rewrite every entry in ``content`` whose dependency name appears in - ``new_versions``. - """ - # Sentinel strings used to protect the placeholders through re.escape: - # we substitute them in, escape the whole template, then swap them out - # for the dep-name literal and Version.PATTERN. Control characters so - # they can't collide with anything in real .codegen.json patterns. - dep_sentinel = "\x01DEPENDENCY\x01" - ver_sentinel = "\x01VERSION\x01" - - for dep_name, new_value in new_versions.items(): - regex = pattern.replace("$DEPENDENCY", dep_sentinel).replace("$VERSION", ver_sentinel) - regex = re.escape(regex) - regex = regex.replace(re.escape(dep_sentinel), re.escape(dep_name)) - regex = regex.replace(re.escape(ver_sentinel), Version.PATTERN) - - # Build the literal replacement text by substituting the same - # placeholders directly. A lambda is used instead of a string to - # avoid re.sub interpreting \1, \g<...>, etc. inside the value. - replacement_text = pattern.replace("$DEPENDENCY", dep_name).replace("$VERSION", new_value) - content = re.sub(regex, lambda _m, text=replacement_text: text, content) - return content - - -def clean_next_changelog(package_path: str) -> None: - """ - Cleans the "NEXT_CHANGELOG.md" file. It performs 2 operations: - * Increase the version to the next minor version. - * Remove release notes. Sections names are kept to - keep consistency in the section names between releases. - """ - - file_path = os.path.join(os.getcwd(), package_path, NEXT_CHANGELOG_FILE_NAME) - with open(file_path, "r") as file: - content = file.read() - - # Remove content between ### sections. - cleaned_content = re.sub(r"(### [^\n]+\n)(?:.*?\n?)*?(?=###|$)", r"\1", content) - # Ensure there is exactly one empty line before each section. - cleaned_content = re.sub(r"(\n*)(###[^\n]+)", r"\n\n\2", cleaned_content) - # Find the version number and compute the default next release version. - # Teams can adjust the version in the PR if the default is not desired. - # For stable versions, bump minor (historical default since minor releases - # are more common than patch or major). For pre-release versions, stay on - # the same track by bumping the pre-release identifier (npm convention). - version_match = re.search(rf"Release v({Version.PATTERN})", cleaned_content) - if not version_match: - raise Exception("Version not found in the changelog") - current = Version.parse(version_match.group(1)) - new_header = f"Release v{current.next_release_version()}" - cleaned_content = cleaned_content.replace(version_match.group(0), new_header) - - # Update file with cleaned content - gh.add_file(file_path, cleaned_content) - - -def get_previous_tag_info(package: Package) -> Optional[TagInfo]: - """ - Extracts the previous tag info from the "CHANGELOG.md" file. - Used for failure recovery purposes. - """ - changelog_path = os.path.join(os.getcwd(), package.path, CHANGELOG_FILE_NAME) - - with open(changelog_path, "r") as f: - changelog = f.read() - - # Extract the latest release section using regex. - match = re.search( - rf"## (\[Release\] )?Release v{Version.PATTERN}.*?(?=\n## (\[Release\] )?Release v|\Z)", - changelog, - re.S, - ) - - # E.g., for new packages. - if not match: - return None - - latest_release = match.group(0) - version_match = re.search(rf"## (\[Release\] )?Release v({Version.PATTERN})", latest_release) - - if not version_match: - raise Exception("Version not found in the changelog") - - # Validate the extracted string is spec-compliant; fail loudly otherwise. - version = str(Version.parse(version_match.group(2))) - return TagInfo(package=package, version=version, content=latest_release) - - -def _load_codegen_config() -> Dict: - """ - Loads ``.codegen.json`` from the repo root. Returns an empty dict when - the file is missing. - """ - package_file_path = os.path.join(os.getcwd(), CODEGEN_FILE_NAME) - if not os.path.exists(package_file_path): - return {} - with open(package_file_path, "r") as file: - return json.load(file) - - -def get_next_tag_info(package: Package) -> Optional[TagInfo]: - """ - Extracts the changes from the "NEXT_CHANGELOG.md" file. - The result is already processed. - """ - next_changelog_path = os.path.join(os.getcwd(), package.path, NEXT_CHANGELOG_FILE_NAME) - # Read NEXT_CHANGELOG.md - with open(next_changelog_path, "r") as f: - next_changelog = f.read() - - # Remove "# NEXT CHANGELOG" line - next_changelog = re.sub(r"^# NEXT CHANGELOG(\n+)", "", next_changelog, flags=re.MULTILINE) - - # Remove empty sections - next_changelog = re.sub(r"###[^\n]+\n+(?=##|\Z)", "", next_changelog) - # Ensure there is exactly one empty line before each section - next_changelog = re.sub(r"(\n*)(###[^\n]+)", r"\n\n\2", next_changelog) - - # By default, packages whose NEXT_CHANGELOG.md has no populated - # sections are skipped — there's nothing meaningful to release. - # Repos like sdk-js which are still in development can opt in - # by setting ``allow_empty_changelog: true`` in .codegen.json. - if not re.search(r"###", next_changelog) and not _load_codegen_config().get("allow_empty_changelog", False): - print("All sections are empty. No changes will be made to the changelog.") - return None - - version_match = re.search(rf"## Release v({Version.PATTERN})", next_changelog) - - if not version_match: - raise Exception("Version not found in the changelog") - - # Validate the extracted string is spec-compliant; fail loudly otherwise. - version = str(Version.parse(version_match.group(1))) - return TagInfo(package=package, version=version, content=next_changelog) - - -def write_changelog(tag_info: TagInfo) -> None: - """ - Updates the changelog with a new tag info. - """ - changelog_path = os.path.join(os.getcwd(), tag_info.package.path, CHANGELOG_FILE_NAME) - with open(changelog_path, "r") as f: - changelog = f.read() - - # Add current date to the release header. - current_date = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d") - content_with_date = re.sub( - rf"## Release v({Version.PATTERN})", - rf"## Release v\1 ({current_date})", - tag_info.content.strip(), - ) - - updated_changelog = re.sub(r"(# Version changelog\n\n)", f"\\1{content_with_date}\n\n\n", changelog) - gh.add_file(changelog_path, updated_changelog) - - -def process_package(package: Package) -> TagInfo: - """ - Processes a package's changelog scaffolding for the release. - """ - print(f"Processing package {package.name}") - tag_info = get_next_tag_info(package) - - # If there are no updates, skip. - if tag_info is None: - return - - write_changelog(tag_info) - clean_next_changelog(package.path) - return tag_info - - -def find_packages() -> List[Package]: - """ - Returns all directories which contains a ".package.json" file. - """ - paths = _find_directories_with_file(PACKAGE_FILE_NAME) - return [Package(name=get_package_name(path), path=path) for path in paths] - - -def _find_directories_with_file(target_file: str) -> List[str]: - root_path = os.getcwd() - matching_directories = [] - - for dirpath, _, filenames in os.walk(root_path): - if target_file in filenames: - path = os.path.relpath(dirpath, root_path) - # If the path is the root directory (e.g., SDK V0), set it to an empty string. - if path == ".": - path = "" - matching_directories.append(path) - - return matching_directories - - -def is_tag_applied(tag: TagInfo) -> bool: - """ - Returns whether a tag is already applied in the repository. - - :param tag: The tag to check. - :return: True if the tag is applied, False otherwise. - :raises Exception: If the git command fails. - """ - try: - # Check if the specific tag exists - result = subprocess.check_output(["git", "tag", "--list", tag.tag_name()], stderr=subprocess.PIPE, text=True) - return result.strip() == tag.tag_name() - except subprocess.CalledProcessError as e: - # Raise a exception for git command errors - raise Exception(f"Git command failed: {e.stderr.strip() or e}") from e - - -def find_last_release_tag(package: Package) -> Optional[str]: - """ - Returns the most recent ``/v*`` tag in the repository, or - ``None`` if no such tag exists. Tags are sorted by semver ordering - (``--sort=-v:refname``) so pre-releases sort below their stable - counterparts. - - :raises Exception: If the git command fails. - """ - pattern = f"{package.name}/v*" if package.name else "v*" - try: - output = subprocess.check_output( - ["git", "tag", "--list", pattern, "--sort=-v:refname"], - stderr=subprocess.PIPE, - text=True, - ).strip() - except subprocess.CalledProcessError as e: - raise Exception(f"Git command failed: {e.stderr.strip() or e}") from e - if not output: - return None - return output.split("\n")[0].strip() - - -def has_commits_since_tag(tag: str, path: str) -> bool: - """ - Returns True iff at least one commit reachable from HEAD but not from - ``tag`` touches ``path``. Used to detect that a sibling dependency has - unreleased changes that would ship stale if we tagged a dependent - without re-tagging the dependency. - - :raises Exception: If the git command fails. - """ - args = ["git", "log", "--oneline", f"{tag}..HEAD", "--", path or "."] - try: - output = subprocess.check_output(args, stderr=subprocess.PIPE, text=True).strip() - except subprocess.CalledProcessError as e: - raise Exception(f"Git command failed: {e.stderr.strip() or e}") from e - return bool(output) - - -def check_dependency_freshness(tag_infos: List[TagInfo], all_packages: List[Package]) -> None: - """ - Hard-fails when a package being released depends on a sibling package - that has unreleased commits since its last tag. - - Why: dependency rewrites (``stage_version_updates``) only fire for - siblings that are *also* being released. Without this check, releasing - package_a alone — when package_b has commits since its last tag — - publishes ``package_a@new`` pinning the *old* package_b artifact, which - won't have the changes package_a's source depends on. The check is - commit-based (not changelog-based) so a missing ``NEXT_CHANGELOG.md`` - entry on package_b is still caught. - - No-op when ``.codegen.json`` declares no dependency pattern (legacy - SDKs without per-package wiring). - """ - if not tag_infos: - return - - package_file_path = os.path.join(os.getcwd(), CODEGEN_FILE_NAME) - with open(package_file_path, "r") as file: - codegen = json.load(file) - - name_template = codegen.get("dependency_name_template", "") - dep_patterns = codegen.get("dependency_pattern", {}) - if not name_template or not dep_patterns: - return - - releasing_paths = {info.package.path for info in tag_infos} - by_dep_name: Dict[str, Package] = {} - for pkg in all_packages: - if not pkg.name: - continue - by_dep_name[name_template.replace("$PACKAGE", pkg.name)] = pkg - - issues: List[str] = [] - for info in tag_infos: - for filename, pattern in dep_patterns.items(): - loc = os.path.join(os.getcwd(), info.package.path, filename) - if not os.path.exists(loc): - continue - with open(loc, "r") as f: - content = f.read() - - for dep_name, dep_pkg in by_dep_name.items(): - if dep_pkg.path == info.package.path: - continue - if dep_pkg.path in releasing_paths: - continue - - # Same regex construction used by ``rewrite_dependencies``, - # so "is this dep referenced?" matches "would the rewrite - # touch it?". Keeps the two in lockstep. - regex = ( - re.escape(pattern) - .replace(re.escape("$DEPENDENCY"), re.escape(dep_name)) - .replace(re.escape("$VERSION"), Version.PATTERN) - ) - if not re.search(regex, content): - continue - - last_tag = find_last_release_tag(dep_pkg) - if last_tag is None: - # No prior tag means the dep was never released; we - # can't reason about staleness. Surface it anyway so - # the human resolves it explicitly. - issues.append( - f"{info.package.name} depends on {dep_pkg.name}, " - f"which has never been released. Release " - f"{dep_pkg.name} first or include it in this run." - ) - continue - if has_commits_since_tag(last_tag, dep_pkg.path): - issues.append( - f"{info.package.name} depends on {dep_pkg.name}, " - f"which has commits since {last_tag} but is not " - f"being released. Either release {dep_pkg.name} " - f"as well, or hold this release until its changes " - f"are reverted." - ) - - if issues: - raise Exception("Dependency freshness check failed:\n - " + "\n - ".join(issues)) - - -def find_last_tags() -> List[TagInfo]: - """ - Finds the last tags for each package. - - Returns a list of TagInfo objects for each package with a non-None changelog. - """ - packages = find_packages() - - return [info for info in (get_previous_tag_info(package) for package in packages) if info is not None] - - -def find_pending_tags() -> List[TagInfo]: - """ - Finds all tags that are pending to be applied. - """ - tag_infos = find_last_tags() - return [tag for tag in tag_infos if not is_tag_applied(tag)] - - -def generate_commit_message(tag_infos: List[TagInfo]) -> str: - """ - Generates a commit message for the release. - """ - if not tag_infos: - raise Exception("No tag infos provided to generate commit message") - - info = tag_infos[0] - # Legacy mode for SDKs without per service packaging - if not info.package.name: - if len(tag_infos) > 1: - raise Exception("Multiple packages found in legacy mode") - return f"[Release] Release v{info.version}\n\n{info.content}" - - # Sort tag_infos by package name for consistency. - tag_infos.sort(key=lambda info: info.package.name) - titles = ", ".join(f"{info.package.name}/v{info.version}" for info in tag_infos) - body = "\n\n".join(f"## {info.package.name}/v{info.version}\n\n{info.content}" for info in tag_infos) - return f"[Release] {titles}\n\n{body}" - - -def push_changes(tag_infos: List[TagInfo]) -> None: - """Pushes changes to the remote repository after handling possible merge conflicts.""" - - commit_message = generate_commit_message(tag_infos) - - # Create the release metadata file - file_name = os.path.join(os.getcwd(), ".release_metadata.json") - metadata = {"timestamp": datetime.now(tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S%z")} - content = json.dumps(metadata, indent=4) - gh.add_file(file_name, content) - - gh.commit_and_push(commit_message) - - -def reset_repository(hash: Optional[str] = None) -> None: - """ - Reset git to the specified commit. Defaults to HEAD. - - :param hash: The commit hash to reset to. If None, it resets to HEAD. - """ - # Fetch the latest changes from the remote repository. - subprocess.run(["git", "fetch"]) - - # Determine the commit hash (default to origin/main if none is provided). - commit_hash = hash or "origin/main" - - # ``git reset --hard`` must land before ``gh.reset(None)``, since - # ``gh.reset(None)`` reads ``git rev-parse HEAD`` to anchor - # ``self.sha`` to the local working tree. - subprocess.run(["git", "reset", "--hard", commit_hash], check=True) - gh.reset(hash) - - -def retry_function( - func: Callable[[], List[TagInfo]], cleanup: Callable[[], None], max_attempts: int = 5, delay: int = 5 -) -> List[TagInfo]: - """ - Calls a function call up to `max_attempts` times if an exception occurs. - - :param func: The function to call. - :param cleanup: Cleanup function in between retries - :param max_attempts: The maximum number of retries. - :param delay: The delay between retries in seconds. - :return: The return value of the function, or None if all retries fail. - """ - attempts = 0 - while attempts <= max_attempts: - try: - return func() # Call the function - except MainAdvancedError: - # Permanent failure: another commit landed on main during - # this run, so the local tree is stale. Retrying with the - # same stale tree would just hit the same mismatch — only - # a fresh workflow run against the new main can recover. - raise - except Exception as e: - attempts += 1 - print(f"Attempt {attempts} failed: {e}") - if attempts < max_attempts: - time.sleep(delay) # Wait before retrying - cleanup() - else: - print("All retry attempts failed.") - raise e # Re-raise the exception after max retries - - -def update_changelogs(selected_packages: List[Package], all_packages: List[Package]) -> List[TagInfo]: - """ - Updates changelogs and pushes the commits. - - ``selected_packages`` are the packages whose ``NEXT_CHANGELOG.md`` is - consulted to decide what gets released this run. ``all_packages`` is - the full repo inventory used for cross-package dep rewrites. - - The freshness check is deliberately *not* called here. ``process`` - runs it before entering the retry loop so a freshness violation - fails fast — the check is deterministic against the same git state, - so wrapping it in retry would just delay the same failure five - times. - """ - tag_infos = [info for info in (process_package(package) for package in selected_packages) if info is not None] - # If any package was changed, stage version updates and push. - if tag_infos: - stage_version_updates(tag_infos, all_packages) - push_changes(tag_infos) - return tag_infos - - -def preview_tag_infos(packages: List[Package]) -> List[TagInfo]: - """ - Read-only sibling of ``process_package``: returns the TagInfos that - would be released for ``packages`` without writing any changelog - edits. ``process`` calls this before the retry loop so the freshness - check has a snapshot to validate against. ``process_package`` will - re-derive the same TagInfos when ``update_changelogs`` runs; the - duplication is just a couple of NEXT_CHANGELOG.md reads. - """ - return [info for info in (get_next_tag_info(package) for package in packages) if info is not None] - - -def order_tag_infos_by_dependency(tag_infos: List[TagInfo]) -> List[TagInfo]: - """ - Returns ``tag_infos`` in topological order: every package appears - after every sibling it depends on. - """ - if not tag_infos: - return list(tag_infos) - - if any(not info.package.name for info in tag_infos) and len(tag_infos) > 1: - raise Exception("Multiple packages found in legacy mode") - - package_file_path = os.path.join(os.getcwd(), CODEGEN_FILE_NAME) - with open(package_file_path, "r") as file: - codegen = json.load(file) - - name_template = codegen.get("dependency_name_template", "") - dep_patterns = codegen.get("dependency_pattern", {}) - if not name_template or not dep_patterns: - return list(tag_infos) - - by_dep_name: Dict[str, TagInfo] = { - name_template.replace("$PACKAGE", info.package.name): info for info in tag_infos if info.package.name - } - - # Adjacency: path -> set of paths it depends on (within tag_infos). - deps: Dict[str, set] = {info.package.path: set() for info in tag_infos} - for info in tag_infos: - for filename, pattern in dep_patterns.items(): - loc = os.path.join(os.getcwd(), info.package.path, filename) - if not os.path.exists(loc): - continue - with open(loc, "r") as f: - content = f.read() - for dep_name, dep_info in by_dep_name.items(): - if dep_info.package.path == info.package.path: - continue - regex = ( - re.escape(pattern) - .replace(re.escape("$DEPENDENCY"), re.escape(dep_name)) - .replace(re.escape("$VERSION"), Version.PATTERN) - ) - if re.search(regex, content): - deps[info.package.path].add(dep_info.package.path) - - # Stable topological sort: at each step, emit every node whose deps - # are already emitted, alphabetically by package name. Ties broken - # alphabetically so the manifest is reproducible across runs. - emitted: set = set() - ordered: List[TagInfo] = [] - while len(ordered) < len(tag_infos): - ready = sorted( - ( - info - for info in tag_infos - if info.package.path not in emitted and deps[info.package.path].issubset(emitted) - ), - key=lambda info: info.package.name, - ) - if not ready: - remaining = [info.package.name for info in tag_infos if info.package.path not in emitted] - raise Exception(f"Cyclic dependency detected among packages: {remaining}") - for info in ready: - ordered.append(info) - emitted.add(info.package.path) - return ordered - - -def push_tags(tag_infos: List[TagInfo]) -> None: - """ - Creates and pushes tags to the repository. - - Tags are emitted in topological order — dependencies before - dependents — so downstream publishing pipelines reading - ``created_tags.json`` can walk it sequentially without re-deriving - the dependency graph. See ``order_tag_infos_by_dependency``. - - As a side effect, writes the names of successfully created tags to - ``./created_tags.json`` so that workflows triggering this script can - discover what was produced (the GitHub Actions workflow uploads this - file as the ``created-tags`` artifact). - - Schema: - {"tags": ["service-a/v1.2.3", "service-b/v0.4.0"]} - - The manifest is written even if tag creation fails partway through: - tags that succeeded before the failure are flushed before the - exception is re-raised, so recovery-mode runs still surface their - output. - """ - tag_infos = order_tag_infos_by_dependency(tag_infos) - created: List[str] = [] - try: - for tag_info in tag_infos: - gh.tag(tag_info.tag_name(), tag_info.content) - created.append(tag_info.tag_name()) - finally: - manifest_path = os.path.join(os.getcwd(), CREATED_TAGS_FILE_NAME) - with open(manifest_path, "w") as f: - json.dump({"tags": created}, f) - - -def run_command(command: List[str]) -> str: - """ - Runs a command and returns the output - """ - output = subprocess.check_output(command) - print(f'Running command: {" ".join(command)}') - return output.decode() - - -def pull_last_release_commit() -> None: - """ - Reset the repository to the last release. - Uses commit for last change to .release_metadata.json, since it's only updated on releases. - """ - commit_hash = subprocess.check_output( - ["git", "log", "-n", "1", "--format=%H", "--", ".release_metadata.json"], text=True - ).strip() - - # If no commit is found, raise an exception - if not commit_hash: - raise ValueError("No commit found for .release_metadata.json") - - # Reset the repository to the commit - reset_repository(commit_hash) - - -def get_packages_from_args() -> List[str]: - """ - Retrieves the list of packages to tag. - - python3 ./tagging.py --package # single package - python3 ./tagging.py --package , # multiple packages - - Returns an empty list when --package is omitted, which means all packages - with pending releases will be tagged. - """ - parser = argparse.ArgumentParser(description="Update changelogs and tag the release.") - parser.add_argument( - "--package", - "-p", - type=str, - default="", - help="Comma-separated list of packages to tag. Leave empty to tag all packages with pending releases.", - ) - args = parser.parse_args() - return [name.strip() for name in args.package.split(",") if name.strip()] - - -def init_github(): - token = os.environ["GITHUB_TOKEN"] - repo_name = os.environ["GITHUB_REPOSITORY"] - g = Github(token) - repo = g.get_repo(repo_name) - global gh - gh = GitHubRepo(repo) - - -def process(): - """ - Main entry point for tagging process. - - Tagging process consist of multiple steps: - * For each package, update the corresponding CHANGELOG.md file based on the contents of NEXT_CHANGELOG.md file - * If any package has been updated, commit and push the changes. - * Apply and push the new tags matching the version. - - If a specific pagkage is provided as a parameter, only that package will be tagged. - - If any tag are pending from an early process, it will skip updating the CHANGELOG.md files and only apply the tags. - """ - - package_names = get_packages_from_args() - pending_tags = find_pending_tags() - - # pending_tags is non-empty only when the tagging process previously failed or interrupted. - # We must complete the interrupted tagging process before starting a new one to avoid inconsistent states and missing changelog entries. - # Therefore, we don't support specifying packages until the previously started process has been successfully completed. - if pending_tags and package_names: - pending_packages = [tag.package.name for tag in pending_tags] - raise Exception(f"Cannot release packages {package_names}. Pending release for {pending_packages}") - - if pending_tags: - print("Found pending tags from previous executions, entering recovery mode.") - pull_last_release_commit() - push_tags(pending_tags) - return - - all_packages = find_packages() - # If packages are specified as an argument, only release those — but - # dep rewrites and the freshness check still operate over the full - # set. - selected_packages = all_packages - if package_names: - selected_packages = [package for package in all_packages if package.name in package_names] - - # Run the freshness check against a read-only preview before the - # retry loop, since the check is deterministic. A freshness - # violation fails the run immediately, with no commits, no tags, no - # retry storm. - check_dependency_freshness(preview_tag_infos(selected_packages), all_packages) - - pending_tags = retry_function( - func=lambda: update_changelogs(selected_packages, all_packages), - cleanup=reset_repository, - ) - push_tags(pending_tags) - - -def validate_git_root(): - """ - Validate that the script is run from the root of the repository. - """ - repo_root = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).strip().decode("utf-8") - current_dir = subprocess.check_output(["pwd"]).strip().decode("utf-8") - if repo_root != current_dir: - raise Exception("Please run this script from the root of the repository.") - - -if __name__ == "__main__": - validate_git_root() - init_github() - process() diff --git a/tests/test_errors.py b/tests/test_errors.py index d644979ab..57e045c3a 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -314,7 +314,7 @@ class TestCase: want_message=( "unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. " "Please report this issue with the following debugging information to the SDK issue tracker at " - "https://github.com/databricks/databricks-sdk-py/issues. Request log:```GET /api/2.0/service\n" + "https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n" "< 400 Bad Request\n" "< this is not a real response```" ), @@ -343,7 +343,7 @@ class TestCase: response_body=json.dumps("This is JSON but not a dictionary"), ), want_err_type=errors.NotFound, - want_message='unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-py/issues. Request log:```GET /api/2.0/service\n< 404 Not Found\n< "This is JSON but not a dictionary"```', + want_message='unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n< 404 Not Found\n< "This is JSON but not a dictionary"```', ), TestCase( name="unable_to_parse_response3", @@ -353,7 +353,7 @@ class TestCase: response_body=b"\x80", ), want_err_type=errors.NotFound, - want_message="unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-py/issues. Request log:```GET /api/2.0/service\n< 404 Not Found\n< �```", + want_message="unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n< 404 Not Found\n< �```", ), ]