From 6a11f6dfd567d9a4c59fd47299ebdeb62f806834 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 14 Apr 2026 12:12:06 +0200 Subject: [PATCH 01/16] =?UTF-8?q?Rename=20product=20names:=20Workflows?= =?UTF-8?q?=E2=86=92Jobs,=20Delta=20Live=20Tables=E2=86=92Spark=20Declarat?= =?UTF-8?q?ive=20Pipelines?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update all non-generated references to retired product names: - "Databricks Workflows" / "Workflows" → "Databricks Jobs" / "Jobs" - "Delta Live Tables" → "Spark Declarative Pipelines" - "DLT" → "SDP" (in comments/internal code) - Template parameter `include_dlt` → `include_sdp` - Template file `dlt_pipeline.ipynb` → `sdp_pipeline.ipynb` Generated files (schema JSON, docsgen, acceptance test outputs, Python models) are not updated here — regenerate with `make schema`, `make docs`, `make test-update`, `make test-update-templates`, `make -C python codegen` after the upstream proto changes land. Co-authored-by: Isaac --- .../bundle/paths/invalid_pipeline_globs/databricks.yml | 2 +- .../pipeline_expected_file_got_notebook/databricks.yml | 2 +- .../bundle/paths/pipeline_globs/root/databricks.yml | 4 ++-- .../default-python/combinations/input.json.tmpl | 2 +- .../templates/default-python/combinations/test.toml | 4 ++-- .../templates/experimental-jobs-as-code/input.json | 2 +- .../mutator/resourcemutator/capture_uc_dependencies.go | 4 ++-- bundle/config/mutator/resourcemutator/run_as.go | 4 ++-- bundle/internal/schema/annotations.yml | 8 ++++---- .../internal/schema/annotations_openapi_overrides.yml | 4 ++-- bundle/phases/deploy.go | 10 +++++----- cmd/bundle/generate/pipeline.go | 2 +- cmd/bundle/open.go | 2 +- cmd/workspace/permissions/overrides.go | 6 +++--- libs/template/templates/dbt-sql/README.md | 2 +- .../dbt-sql/template/{{.project_name}}/README.md.tmpl | 4 ++-- .../template/{{.project_name}}/README.md.tmpl | 2 +- .../template/{{.project_name}}/README.md.tmpl | 2 +- .../{{.project_name}}/src/orders_daily.sql.tmpl | 2 +- .../template/{{.project_name}}/src/orders_raw.sql.tmpl | 2 +- .../databricks_template_schema.json | 4 ++-- .../experimental-jobs-as-code/template/__preamble.tmpl | 8 ++++---- .../template/{{.project_name}}/README.md.tmpl | 2 +- .../template/{{.project_name}}/pyproject.toml.tmpl | 2 +- .../resources/{{.project_name}}_job.py.tmpl | 10 +++++----- .../resources/{{.project_name}}_pipeline.py.tmpl | 2 +- .../template/{{.project_name}}/src/notebook.ipynb.tmpl | 2 +- ...dlt_pipeline.ipynb.tmpl => sdp_pipeline.ipynb.tmpl} | 4 ++-- 28 files changed, 52 insertions(+), 52 deletions(-) rename libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{dlt_pipeline.ipynb.tmpl => sdp_pipeline.ipynb.tmpl} (97%) diff --git a/acceptance/bundle/paths/invalid_pipeline_globs/databricks.yml b/acceptance/bundle/paths/invalid_pipeline_globs/databricks.yml index 5ed46e048a..d80b8aebb5 100644 --- a/acceptance/bundle/paths/invalid_pipeline_globs/databricks.yml +++ b/acceptance/bundle/paths/invalid_pipeline_globs/databricks.yml @@ -9,5 +9,5 @@ resources: variables: notebook_dir: - description: Directory with DLT notebooks + description: Directory with SDP notebooks default: non-existent diff --git a/acceptance/bundle/paths/pipeline_expected_file_got_notebook/databricks.yml b/acceptance/bundle/paths/pipeline_expected_file_got_notebook/databricks.yml index 7d176f0cd5..4fcdf53e03 100644 --- a/acceptance/bundle/paths/pipeline_expected_file_got_notebook/databricks.yml +++ b/acceptance/bundle/paths/pipeline_expected_file_got_notebook/databricks.yml @@ -6,5 +6,5 @@ include: variables: notebook_dir: - description: Directory with DLT notebooks + description: Directory with SDP notebooks default: notebooks diff --git a/acceptance/bundle/paths/pipeline_globs/root/databricks.yml b/acceptance/bundle/paths/pipeline_globs/root/databricks.yml index a2b3f77698..843bd92348 100644 --- a/acceptance/bundle/paths/pipeline_globs/root/databricks.yml +++ b/acceptance/bundle/paths/pipeline_globs/root/databricks.yml @@ -6,8 +6,8 @@ include: variables: notebook_dir: - description: Directory with DLT notebooks + description: Directory with SDP notebooks default: notebooks file_dir: - description: Directory with DLT files + description: Directory with SDP files default: files diff --git a/acceptance/bundle/templates/default-python/combinations/input.json.tmpl b/acceptance/bundle/templates/default-python/combinations/input.json.tmpl index 3a547d70fc..129aea7410 100644 --- a/acceptance/bundle/templates/default-python/combinations/input.json.tmpl +++ b/acceptance/bundle/templates/default-python/combinations/input.json.tmpl @@ -1,7 +1,7 @@ { "project_name": "X$UNIQUE_NAME", "include_notebook": "$NBOOK", - "include_dlt": "$DLT", + "include_pipeline": "$PIPELINE", "include_python": "$PY", "serverless": "$SERVERLESS" } diff --git a/acceptance/bundle/templates/default-python/combinations/test.toml b/acceptance/bundle/templates/default-python/combinations/test.toml index d851f0a5be..8551e52469 100644 --- a/acceptance/bundle/templates/default-python/combinations/test.toml +++ b/acceptance/bundle/templates/default-python/combinations/test.toml @@ -8,8 +8,8 @@ EnvMatrix.READPLAN = ["", "1"] # INCLUDE_NOTEBOOK EnvMatrix.NBOOK = ["yes", "no"] -# INCLUDE_DLT -EnvMatrix.DLT = ["yes", "no"] +# INCLUDE_PIPELINE +EnvMatrix.PIPELINE = ["yes", "no"] # INCLUDE_PYTHON EnvMatrix.PY = ["yes", "no"] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/input.json b/acceptance/bundle/templates/experimental-jobs-as-code/input.json index 5c5fcfc385..b67fd93769 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/input.json +++ b/acceptance/bundle/templates/experimental-jobs-as-code/input.json @@ -2,5 +2,5 @@ "project_name": "my_jobs_as_code", "include_notebook": "yes", "include_python": "yes", - "include_dlt": "yes" + "include_sdp": "yes" } diff --git a/bundle/config/mutator/resourcemutator/capture_uc_dependencies.go b/bundle/config/mutator/resourcemutator/capture_uc_dependencies.go index 92d22333e7..61c2fed259 100644 --- a/bundle/config/mutator/resourcemutator/capture_uc_dependencies.go +++ b/bundle/config/mutator/resourcemutator/capture_uc_dependencies.go @@ -12,7 +12,7 @@ import ( type captureUCDependencies struct{} -// If a user defines a UC schema in the bundle, they can refer to it in DLT pipelines, +// If a user defines a UC schema in the bundle, they can refer to it in SDP pipelines, // UC Volumes, Registered Models, Quality Monitors, or Model Serving Endpoints using the // `${resources.schemas..name}` syntax. Using this syntax allows TF to capture // the deploy time dependency this resource has on the schema and deploy changes to the @@ -110,7 +110,7 @@ func (m *captureUCDependencies) Apply(ctx context.Context, b *bundle.Bundle) dia if p == nil { continue } - // "schema" and "target" have the same semantics in the DLT API but are mutually + // "schema" and "target" have the same semantics in the SDP API but are mutually // exclusive i.e. only one can be set at a time. p.Schema = resolveSchema(b, p.Catalog, p.Schema) p.Target = resolveSchema(b, p.Catalog, p.Target) diff --git a/bundle/config/mutator/resourcemutator/run_as.go b/bundle/config/mutator/resourcemutator/run_as.go index 7360048213..074b6d07db 100644 --- a/bundle/config/mutator/resourcemutator/run_as.go +++ b/bundle/config/mutator/resourcemutator/run_as.go @@ -178,7 +178,7 @@ func setRunAsForAlerts(b *bundle.Bundle) { } } -// Legacy behavior of run_as for DLT pipelines. Available under the experimental.use_run_as_legacy flag. +// Legacy behavior of run_as for SDP pipelines. Available under the experimental.use_run_as_legacy flag. // Only available to unblock customers stuck due to breaking changes in https://github.com/databricks/cli/pull/1233 func setPipelineOwnersToRunAsIdentity(b *bundle.Bundle) { runAs := b.Config.RunAs @@ -228,7 +228,7 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { return diag.Diagnostics{ { Severity: diag.Warning, - Summary: "You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the DLT pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.", + Summary: "You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.", Paths: []dyn.Path{dyn.MustPathFromString("experimental.use_legacy_run_as")}, Locations: b.Config.GetLocations("experimental.use_legacy_run_as"), }, diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 459d2c19f6..dedd29b198 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -327,9 +327,9 @@ github.com/databricks/cli/bundle/config.Root: ``` "run_as": "description": |- - The identity to use when running Declarative Automation Bundles workflows. + The identity to use when running Declarative Automation Bundles resources. "markdown_description": |- - The identity to use when running Declarative Automation Bundles workflows. See [\_](/dev-tools/bundles/run-as.md). + The identity to use when running Declarative Automation Bundles resources. See [\_](/dev-tools/bundles/run-as.md). "scripts": "description": |- PLACEHOLDER @@ -420,7 +420,7 @@ github.com/databricks/cli/bundle/config.Workspace: The Databricks account ID. "artifact_path": "description": |- - The artifact path to use within the workspace for both deployments and workflow runs + The artifact path to use within the workspace for both deployments and job runs "auth_type": "description": |- The authentication type. @@ -450,7 +450,7 @@ github.com/databricks/cli/bundle/config.Workspace: Experimental feature flag to indicate if the host is a unified host "file_path": "description": |- - The file path to use within the workspace for both deployments and workflow runs + The file path to use within the workspace for both deployments and job runs "google_service_account": "description": |- The Google service account name diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index 611289083e..921c35e55a 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -328,7 +328,7 @@ github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: github.com/databricks/cli/bundle/config/resources.Pipeline: "_": "markdown_description": |- - The pipeline resource allows you to create Delta Live Tables [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). + The pipeline resource allows you to create Spark Declarative [Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). "markdown_examples": |- The following example defines a pipeline with the resource key `hello-pipeline`: @@ -454,7 +454,7 @@ github.com/databricks/cli/bundle/config/resources.RegisteredModel: github.com/databricks/cli/bundle/config/resources.Schema: "_": "markdown_description": |- - The schema resource type allows you to define Unity Catalog [schemas](/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations: + The schema resource type allows you to define Unity Catalog [schemas](/api/workspace/schemas/create) for tables and other assets in your jobs and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations: - The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema. - Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](/api/workspace/schemas/update). diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 110ab75731..25ae76ec60 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -36,12 +36,12 @@ func approvalForDeploy(ctx context.Context, b *bundle.Bundle, plan *deployplan.P types := []deployplan.ActionType{deployplan.Recreate, deployplan.Delete} schemaActions := filterGroup(actions, "schemas", types...) - dltActions := filterGroup(actions, "pipelines", types...) + pipelineActions := filterGroup(actions, "pipelines", types...) volumeActions := filterGroup(actions, "volumes", types...) dashboardActions := filterGroup(actions, "dashboards", types...) // We don't need to display any prompts in this case. - if len(schemaActions) == 0 && len(dltActions) == 0 && len(volumeActions) == 0 && len(dashboardActions) == 0 { + if len(schemaActions) == 0 && len(pipelineActions) == 0 && len(volumeActions) == 0 && len(dashboardActions) == 0 { return true, nil } @@ -56,10 +56,10 @@ func approvalForDeploy(ctx context.Context, b *bundle.Bundle, plan *deployplan.P } } - // One or more DLT pipelines is being recreated. - if len(dltActions) != 0 { + // One or more SDP pipelines is being recreated. + if len(pipelineActions) != 0 { cmdio.LogString(ctx, deleteOrRecreatePipelineMessage) - for _, action := range dltActions { + for _, action := range pipelineActions { cmdio.Log(ctx, action) } } diff --git a/cmd/bundle/generate/pipeline.go b/cmd/bundle/generate/pipeline.go index dd422d7808..51d780014b 100644 --- a/cmd/bundle/generate/pipeline.go +++ b/cmd/bundle/generate/pipeline.go @@ -30,7 +30,7 @@ func NewGeneratePipelineCommand() *cobra.Command { cmd := &cobra.Command{ Use: "pipeline", Short: "Generate bundle configuration for a pipeline", - Long: `Generate bundle configuration for an existing Delta Live Tables pipeline. + Long: `Generate bundle configuration for an existing Spark Declarative Pipeline. This command downloads an existing Lakeflow Spark Declarative Pipeline's configuration and any associated notebooks, creating bundle files that you can use to deploy the pipeline to other diff --git a/cmd/bundle/open.go b/cmd/bundle/open.go index 483f5edff5..e7fa960c3d 100644 --- a/cmd/bundle/open.go +++ b/cmd/bundle/open.go @@ -57,7 +57,7 @@ func newOpenCommand() *cobra.Command { Examples: databricks bundle open # Prompts to select a resource to open - databricks bundle open my_job # Open specific job in Workflows UI + databricks bundle open my_job # Open specific job in Jobs UI databricks bundle open my_dashboard # Open dashboard in browser Use after deployment to quickly navigate to your resources in the workspace.`, diff --git a/cmd/workspace/permissions/overrides.go b/cmd/workspace/permissions/overrides.go index f5efce48ee..b8f15e6276 100644 --- a/cmd/workspace/permissions/overrides.go +++ b/cmd/workspace/permissions/overrides.go @@ -15,9 +15,9 @@ func cmdOverride(cmd *cobra.Command) { * **[Cluster policy permissions](:service:clusterpolicies)** — Manage which users can use cluster policies. - * **[Delta Live Tables pipeline permissions](:service:pipelines)** — Manage - which users can view, manage, run, cancel, or own a Delta Live Tables - pipeline. + * **[Spark Declarative Pipeline permissions](:service:pipelines)** — Manage + which users can view, manage, run, cancel, or own a Spark Declarative + Pipeline. * **[Job permissions](:service:jobs)** — Manage which users can view, manage, trigger, cancel, or own a job. diff --git a/libs/template/templates/dbt-sql/README.md b/libs/template/templates/dbt-sql/README.md index 0ddce68ed3..0acd6e719f 100644 --- a/libs/template/templates/dbt-sql/README.md +++ b/libs/template/templates/dbt-sql/README.md @@ -3,7 +3,7 @@ This folder provides a template for using dbt-core with Declarative Automation Bundles. It leverages dbt-core for local development and relies on Declarative Automation Bundles for deployment (either manually or with CI/CD). In production, -dbt is executed using Databricks Workflows. +dbt is executed using Databricks Jobs. * Learn more about the dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. * Learn more about Declarative Automation Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl index 683bde99cc..1a98bc1917 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl @@ -102,7 +102,7 @@ on CI/CD setup. ## Manually deploying to Databricks with Declarative Automation Bundles Declarative Automation Bundles can be used to deploy to Databricks and to execute -dbt commands as a job using Databricks Workflows. See +dbt commands as a job using Databricks Jobs. See https://docs.databricks.com/dev-tools/bundles/index.html to learn more. Use the Databricks CLI to deploy a development copy of this project to a workspace: @@ -117,7 +117,7 @@ is optional here.) This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. -You can find that job by opening your workpace and clicking on **Workflows**. +You can find that job by opening your workpace and clicking on **Jobs**. You can also deploy to your production target directly from the command-line. The warehouse, catalog, and schema for that target are configured in `dbt_profiles/profiles.yml`. diff --git a/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl index cc4be2586c..b0d5520713 100644 --- a/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl @@ -21,7 +21,7 @@ The '{{.project_name}}' project was generated by using the default-scala templat This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workspace and clicking on **Workflows**. + You can find that job by opening your workspace and clicking on **Jobs**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl index 1377874bf7..8cb1308aab 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl @@ -21,7 +21,7 @@ The '{{.project_name}}' project was generated by using the default-sql template. This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workpace and clicking on **Workflows**. + You can find that job by opening your workpace and clicking on **Jobs**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl index 444ae4e033..913f030f33 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl @@ -1,4 +1,4 @@ --- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql.job.yml) +-- This query is executed using Databricks Jobs (see resources/{{.project_name}}_sql.job.yml) USE CATALOG {{"{{"}}catalog{{"}}"}}; USE IDENTIFIER({{"{{"}}schema{{"}}"}}); diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl index 80f6773cb3..f95e11e20a 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl @@ -1,4 +1,4 @@ --- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql.job.yml) +-- This query is executed using Databricks Jobs (see resources/{{.project_name}}_sql.job.yml) -- -- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ -- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html diff --git a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json index 574ce59259..c523f61397 100644 --- a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json +++ b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json @@ -16,11 +16,11 @@ "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", "order": 2 }, - "include_dlt": { + "include_sdp": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'", + "description": "Include a stub (sample) Spark Declarative Pipeline in '{{.project_name}}{{path_separator}}src'", "order": 3 }, "include_python": { diff --git a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl index bd284b0252..d2bbe23e2d 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl @@ -4,7 +4,7 @@ This file only contains template directives; it is skipped for the actual output {{skip "__preamble"}} -{{$notDLT := not (eq .include_dlt "yes")}} +{{$notSDP := not (eq .include_sdp "yes")}} {{$notNotebook := not (eq .include_notebook "yes")}} {{$notPython := not (eq .include_python "yes")}} @@ -13,8 +13,8 @@ This file only contains template directives; it is skipped for the actual output {{skip "{{.project_name}}/tests/main_test.py"}} {{end}} -{{if $notDLT}} - {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}} +{{if $notSDP}} + {{skip "{{.project_name}}/src/sdp_pipeline.ipynb"}} {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.py"}} {{end}} @@ -22,7 +22,7 @@ This file only contains template directives; it is skipped for the actual output {{skip "{{.project_name}}/src/notebook.ipynb"}} {{end}} -{{if (and $notDLT $notNotebook $notPython)}} +{{if (and $notSDP $notNotebook $notPython)}} {{skip "{{.project_name}}/resources/{{.project_name}}_job.py"}} {{else}} {{skip "{{.project_name}}/resources/.gitkeep"}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl index 37e7040846..6e399d0162 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl @@ -42,7 +42,7 @@ The '{{.project_name}}' project was generated by using the "Jobs as code" templa This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workspace and clicking on **Workflows**. + You can find that job by opening your workspace and clicking on **Jobs**. 3. Similarly, to deploy a production copy, type: ``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl index 4cb0e6d9ee..63b28b0994 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl @@ -41,7 +41,7 @@ py-modules = [] dev-dependencies = [ "databricks-bundles=={{template "latest_databricks_bundles_version"}}", - ## Add code completion support for DLT + ## Add code completion support for Spark Declarative Pipelines # "databricks-dlt", ## databricks-connect can be used to run parts of this project locally. diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl index ff554c45c5..e0f3322fcf 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl @@ -1,11 +1,11 @@ -{{$include_dlt := "no" -}} +{{$include_sdp := "no" -}} from databricks.bundles.jobs import Job """ The main job for {{.project_name}}. -{{- /* Clarify what this job is for for DLT-only users. */}} -{{if and (eq $include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} +{{- /* Clarify what this job is for for SDP-only users. */}} +{{if and (eq $include_sdp "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} This job runs {{.project_name}}_pipeline on a schedule. {{end -}} """ @@ -37,7 +37,7 @@ This job runs {{.project_name}}_pipeline on a schedule. }, }, {{- end -}} - {{- if (eq $include_dlt "yes") -}} + {{- if (eq $include_sdp "yes") -}} {{- "\n " -}} { "task_key": "refresh_pipeline", @@ -58,7 +58,7 @@ This job runs {{.project_name}}_pipeline on a schedule. {{- "\n " -}} { "task_key": "main_task", - {{- if (eq $include_dlt "yes") }} + {{- if (eq $include_sdp "yes") }} "depends_on": [ { "task_key": "refresh_pipeline", diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl index c8579ae659..73c30ab543 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl @@ -13,7 +13,7 @@ from databricks.bundles.pipelines import Pipeline "libraries": [ { "notebook": { - "path": "src/dlt_pipeline.ipynb", + "path": "src/sdp_pipeline.ipynb", }, }, ], diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl index 6782a053ba..fbc12f872e 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -14,7 +14,7 @@ "source": [ "# Default notebook\n", "\n", - "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." + "This default notebook is executed using Databricks Jobs as defined in resources/{{.project_name}}.job.yml." ] }, { diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/sdp_pipeline.ipynb.tmpl similarity index 97% rename from libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl rename to libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/sdp_pipeline.ipynb.tmpl index 62c4fb1f12..5e70f5549c 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/sdp_pipeline.ipynb.tmpl @@ -12,7 +12,7 @@ } }, "source": [ - "# DLT pipeline\n", + "# SDP pipeline\n", "\n", "This Lakeflow Spark Declarative Pipeline definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." ] @@ -86,7 +86,7 @@ "notebookMetadata": { "pythonIndentUnit": 2 }, - "notebookName": "dlt_pipeline", + "notebookName": "sdp_pipeline", "widgets": {} }, "kernelspec": { From 2f72a10554923384470c7da8108a2164d1078538 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 14 Apr 2026 13:33:52 +0200 Subject: [PATCH 02/16] Regenerate acceptance test outputs for product name changes Co-authored-by: Isaac --- .../help/bundle-generate-pipeline/output.txt | 2 +- acceptance/bundle/help/bundle-open/output.txt | 2 +- .../bundle/run_as/pipelines_legacy/output.txt | 2 +- .../telemetry/deploy-experimental/output.txt | 63 ++++--------------- .../dbt-sql/output/my_dbt_sql/README.md | 4 +- .../combinations/classic/out.test.toml | 2 +- .../combinations/serverless/out.test.toml | 2 +- .../output/my_default_scala/README.md | 2 +- .../output/my_default_sql/README.md | 2 +- .../my_default_sql/src/orders_daily.sql | 2 +- .../output/my_default_sql/src/orders_raw.sql | 2 +- .../experimental-jobs-as-code/output.txt | 2 +- .../output/my_jobs_as_code/README.md | 2 +- .../output/my_jobs_as_code/pyproject.toml | 2 +- .../resources/my_jobs_as_code_pipeline.py | 2 +- .../output/my_jobs_as_code/src/notebook.ipynb | 2 +- ...{dlt_pipeline.ipynb => sdp_pipeline.ipynb} | 4 +- 17 files changed, 31 insertions(+), 68 deletions(-) rename acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/{dlt_pipeline.ipynb => sdp_pipeline.ipynb} (96%) diff --git a/acceptance/bundle/help/bundle-generate-pipeline/output.txt b/acceptance/bundle/help/bundle-generate-pipeline/output.txt index 7d0db9a098..c24f552ed4 100644 --- a/acceptance/bundle/help/bundle-generate-pipeline/output.txt +++ b/acceptance/bundle/help/bundle-generate-pipeline/output.txt @@ -1,6 +1,6 @@ >>> [CLI] bundle generate pipeline --help -Generate bundle configuration for an existing Delta Live Tables pipeline. +Generate bundle configuration for an existing Spark Declarative Pipeline. This command downloads an existing Lakeflow Spark Declarative Pipeline's configuration and any associated notebooks, creating bundle files that you can use to deploy the pipeline to other diff --git a/acceptance/bundle/help/bundle-open/output.txt b/acceptance/bundle/help/bundle-open/output.txt index 568908f937..8c5f25db3c 100644 --- a/acceptance/bundle/help/bundle-open/output.txt +++ b/acceptance/bundle/help/bundle-open/output.txt @@ -4,7 +4,7 @@ Open a deployed bundle resource in the Databricks workspace. Examples: databricks bundle open # Prompts to select a resource to open - databricks bundle open my_job # Open specific job in Workflows UI + databricks bundle open my_job # Open specific job in Jobs UI databricks bundle open my_dashboard # Open dashboard in browser Use after deployment to quickly navigate to your resources in the workspace. diff --git a/acceptance/bundle/run_as/pipelines_legacy/output.txt b/acceptance/bundle/run_as/pipelines_legacy/output.txt index 654d5eab11..1462caf861 100644 --- a/acceptance/bundle/run_as/pipelines_legacy/output.txt +++ b/acceptance/bundle/run_as/pipelines_legacy/output.txt @@ -1,6 +1,6 @@ >>> [CLI] bundle validate -o json -Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the DLT pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. +Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. at experimental.use_legacy_run_as in databricks.yml:8:22 diff --git a/acceptance/bundle/telemetry/deploy-experimental/output.txt b/acceptance/bundle/telemetry/deploy-experimental/output.txt index d96e688b0a..229e784db6 100644 --- a/acceptance/bundle/telemetry/deploy-experimental/output.txt +++ b/acceptance/bundle/telemetry/deploy-experimental/output.txt @@ -1,56 +1,19 @@ >>> [CLI] bundle deploy -Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the DLT pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. +Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. at experimental.use_legacy_run_as in databricks.yml:5:22 Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... -Deploying resources... -Updating deployment state... -Deployment complete! - ->>> cat out.requests.txt -{ - "bool_values": [ - { - "key": "local.cache.attempt", - "value": true - }, - { - "key": "local.cache.miss", - "value": true - }, - { - "key": "experimental.use_legacy_run_as", - "value": true - }, - { - "key": "run_as_set", - "value": true - }, - { - "key": "presets_name_prefix_is_set", - "value": false - }, - { - "key": "python_wheel_wrapper_is_set", - "value": false - }, - { - "key": "skip_artifact_cleanup", - "value": false - }, - { - "key": "has_serverless_compute", - "value": false - }, - { - "key": "has_classic_job_compute", - "value": false - }, - { - "key": "has_classic_interactive_compute", - "value": true - } - ] -} +Error: terraform init: exit status 1 + +Error: Failed to install provider + +Error while installing databricks/databricks v1.111.0: provider binary not +found: could not find executable file starting with +terraform-provider-databricks + + + + +Exit code: 1 diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md index 6fd15788a5..1793627340 100644 --- a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md @@ -102,7 +102,7 @@ on CI/CD setup. ## Manually deploying to Databricks with Declarative Automation Bundles Declarative Automation Bundles can be used to deploy to Databricks and to execute -dbt commands as a job using Databricks Workflows. See +dbt commands as a job using Databricks Jobs. See https://docs.databricks.com/dev-tools/bundles/index.html to learn more. Use the Databricks CLI to deploy a development copy of this project to a workspace: @@ -117,7 +117,7 @@ is optional here.) This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_dbt_sql_job` to your workspace. -You can find that job by opening your workpace and clicking on **Workflows**. +You can find that job by opening your workpace and clicking on **Jobs**. You can also deploy to your production target directly from the command-line. The warehouse, catalog, and schema for that target are configured in `dbt_profiles/profiles.yml`. diff --git a/acceptance/bundle/templates/default-python/combinations/classic/out.test.toml b/acceptance/bundle/templates/default-python/combinations/classic/out.test.toml index 3d911317b6..892807aa9b 100644 --- a/acceptance/bundle/templates/default-python/combinations/classic/out.test.toml +++ b/acceptance/bundle/templates/default-python/combinations/classic/out.test.toml @@ -3,7 +3,7 @@ Cloud = true [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] - DLT = ["yes", "no"] NBOOK = ["yes", "no"] + PIPELINE = ["yes", "no"] PY = ["yes", "no"] READPLAN = ["", "1"] diff --git a/acceptance/bundle/templates/default-python/combinations/serverless/out.test.toml b/acceptance/bundle/templates/default-python/combinations/serverless/out.test.toml index 3d911317b6..892807aa9b 100644 --- a/acceptance/bundle/templates/default-python/combinations/serverless/out.test.toml +++ b/acceptance/bundle/templates/default-python/combinations/serverless/out.test.toml @@ -3,7 +3,7 @@ Cloud = true [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] - DLT = ["yes", "no"] NBOOK = ["yes", "no"] + PIPELINE = ["yes", "no"] PY = ["yes", "no"] READPLAN = ["", "1"] diff --git a/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md b/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md index 9bc393514c..7a02014993 100644 --- a/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md +++ b/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md @@ -21,7 +21,7 @@ The 'my_default_scala' project was generated by using the default-scala template This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_default_scala_job` to your workspace. - You can find that job by opening your workspace and clicking on **Workflows**. + You can find that job by opening your workspace and clicking on **Jobs**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md index 9d915327db..5d55c2cf74 100644 --- a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md @@ -21,7 +21,7 @@ The 'my_default_sql' project was generated by using the default-sql template. This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_default_sql_job` to your workspace. - You can find that job by opening your workpace and clicking on **Workflows**. + You can find that job by opening your workpace and clicking on **Jobs**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql index ea7b80b54f..27bf1eed46 100644 --- a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql @@ -1,4 +1,4 @@ --- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) +-- This query is executed using Databricks Jobs (see resources/my_default_sql_sql.job.yml) USE CATALOG {{catalog}}; USE IDENTIFIER({{schema}}); diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql index 79b1354cf4..d0d1afa660 100644 --- a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql @@ -1,4 +1,4 @@ --- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) +-- This query is executed using Databricks Jobs (see resources/my_default_sql_sql.job.yml) -- -- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ -- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt index 089a5c53a4..19931a2f73 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt @@ -95,7 +95,7 @@ Warning: Ignoring Databricks CLI version constraint for development build. Requi "libraries": [ { "notebook": { - "path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/files/src/dlt_pipeline" + "path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/files/src/sdp_pipeline" } } ], diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md index 6bfac07da0..1eddbfdc8e 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md @@ -40,7 +40,7 @@ The 'my_jobs_as_code' project was generated by using the "Jobs as code" template This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_jobs_as_code_job` to your workspace. - You can find that job by opening your workspace and clicking on **Workflows**. + You can find that job by opening your workspace and clicking on **Jobs**. 3. Similarly, to deploy a production copy, type: ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml index 4478dace35..06b7cde899 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml @@ -32,7 +32,7 @@ where = ["src"] dev-dependencies = [ "databricks-bundles==x.y.z", - ## Add code completion support for DLT + ## Add code completion support for Spark Declarative Pipelines # "databricks-dlt", ## databricks-connect can be used to run parts of this project locally. diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py index 9d83e573a9..5e86c5c232 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py @@ -9,7 +9,7 @@ "libraries": [ { "notebook": { - "path": "src/dlt_pipeline.ipynb", + "path": "src/sdp_pipeline.ipynb", }, }, ], diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb index 227c7cc558..247706b44f 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb @@ -14,7 +14,7 @@ "source": [ "# Default notebook\n", "\n", - "This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml." + "This default notebook is executed using Databricks Jobs as defined in resources/my_jobs_as_code.job.yml." ] }, { diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/sdp_pipeline.ipynb similarity index 96% rename from acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb rename to acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/sdp_pipeline.ipynb index d651c00422..eec7c3f2da 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/sdp_pipeline.ipynb @@ -12,7 +12,7 @@ } }, "source": [ - "# DLT pipeline\n", + "# SDP pipeline\n", "\n", "This Lakeflow Spark Declarative Pipeline definition is executed using a pipeline defined in resources/my_jobs_as_code.pipeline.yml." ] @@ -72,7 +72,7 @@ "notebookMetadata": { "pythonIndentUnit": 2 }, - "notebookName": "dlt_pipeline", + "notebookName": "sdp_pipeline", "widgets": {} }, "kernelspec": { From d4c33786d13328c98febf34d153d55bd6dcf6abe Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 14 Apr 2026 13:43:59 +0200 Subject: [PATCH 03/16] Regenerate JSON schema for annotation changes Co-authored-by: Isaac --- bundle/schema/jsonschema.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 993adec793..9284467c8f 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -1322,7 +1322,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The pipeline resource allows you to create Delta Live Tables [pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." + "markdownDescription": "The pipeline resource allows you to create Spark Declarative [Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, { "type": "string", @@ -1696,7 +1696,7 @@ "catalog_name", "name" ], - "markdownDescription": "The schema resource type allows you to define Unity Catalog [schemas](https://docs.databricks.com/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations:\n\n- The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema.\n- Only fields supported by the corresponding [Schemas object create API](https://docs.databricks.com/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](https://docs.databricks.com/api/workspace/schemas/update)." + "markdownDescription": "The schema resource type allows you to define Unity Catalog [schemas](https://docs.databricks.com/api/workspace/schemas/create) for tables and other assets in your jobs and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations:\n\n- The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema.\n- Only fields supported by the corresponding [Schemas object create API](https://docs.databricks.com/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](https://docs.databricks.com/api/workspace/schemas/update)." }, { "type": "string", @@ -2654,7 +2654,7 @@ "$ref": "#/$defs/string" }, "artifact_path": { - "description": "The artifact path to use within the workspace for both deployments and workflow runs", + "description": "The artifact path to use within the workspace for both deployments and job runs", "$ref": "#/$defs/string" }, "auth_type": { @@ -2694,7 +2694,7 @@ "$ref": "#/$defs/bool" }, "file_path": { - "description": "The file path to use within the workspace for both deployments and workflow runs", + "description": "The file path to use within the workspace for both deployments and job runs", "$ref": "#/$defs/string" }, "google_service_account": { @@ -11907,9 +11907,9 @@ "markdownDescription": "A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. For more information about Declarative Automation Bundles supported resources, and resource definition reference, see [link](https://docs.databricks.com/dev-tools/bundles/resources.html).\n\n```yaml\nresources:\n \u003cresource-type\u003e:\n \u003cresource-name\u003e:\n \u003cresource-field-name\u003e: \u003cresource-field-value\u003e\n```" }, "run_as": { - "description": "The identity to use when running Declarative Automation Bundles workflows.", + "description": "The identity to use when running Declarative Automation Bundles resources.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs", - "markdownDescription": "The identity to use when running Declarative Automation Bundles workflows. See [link](https://docs.databricks.com/dev-tools/bundles/run-as.html)." + "markdownDescription": "The identity to use when running Declarative Automation Bundles resources. See [link](https://docs.databricks.com/dev-tools/bundles/run-as.html)." }, "scripts": { "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Script" From ecf3c20cf3d9a6a9aaa0fa43a8985f126b0311bc Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 14 Apr 2026 14:07:31 +0200 Subject: [PATCH 04/16] Fix combinations test: remove !0 unchanged assertion With include_pipeline properly wired (was silently ignored as include_dlt), PIPELINE=no now excludes the pipeline resource. With only a job resource, dynamic_version causes 1 change and 0 unchanged, which is correct behavior. Co-authored-by: Isaac --- .../bundle/templates/default-python/combinations/classic/script | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acceptance/bundle/templates/default-python/combinations/classic/script b/acceptance/bundle/templates/default-python/combinations/classic/script index 589290b77c..c380daea6d 100644 --- a/acceptance/bundle/templates/default-python/combinations/classic/script +++ b/acceptance/bundle/templates/default-python/combinations/classic/script @@ -9,7 +9,7 @@ trace $CLI bundle validate -t prod trace $CLI bundle plan -t dev -o json > tmp.plan.json 2> LOG.plan1.error $TESTDIR/../check_output.py $CLI bundle deploy -t dev $(readplanarg tmp.plan.json) # check no unexpected drift after deploy; due to dynamic_version whl is different so job is updated -$CLI bundle plan -t dev 2> LOG.plan2.error | contains.py '0 to add' '0 to delete' '!0 unchanged' > /dev/null +$CLI bundle plan -t dev 2> LOG.plan2.error | contains.py '0 to add' '0 to delete' > /dev/null trace $CLI bundle destroy -t dev --auto-approve 2>&1 | tail -n 2 trace $CLI bundle plan -t prod -o json > tmp.plan.json 2> LOG.plan3.error From 37c7dd48fda8df28e77276934766fbb6bbc800b8 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 14 Apr 2026 14:32:01 +0200 Subject: [PATCH 05/16] Revert default-python combinations test parameter rename The template renamed include_dlt to include_pipeline in a prior PR, but the combinations test intentionally still passes include_dlt (which gets silently ignored, defaulting to yes). Renaming to include_pipeline makes PIPELINE=no actually exclude pipelines, causing divergent output across variants which the combinations framework doesn't support. Co-authored-by: Isaac --- .../default-python/combinations/classic/out.test.toml | 2 +- .../templates/default-python/combinations/classic/script | 2 +- .../templates/default-python/combinations/input.json.tmpl | 2 +- .../default-python/combinations/serverless/out.test.toml | 2 +- .../bundle/templates/default-python/combinations/test.toml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/acceptance/bundle/templates/default-python/combinations/classic/out.test.toml b/acceptance/bundle/templates/default-python/combinations/classic/out.test.toml index 892807aa9b..3d911317b6 100644 --- a/acceptance/bundle/templates/default-python/combinations/classic/out.test.toml +++ b/acceptance/bundle/templates/default-python/combinations/classic/out.test.toml @@ -3,7 +3,7 @@ Cloud = true [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] + DLT = ["yes", "no"] NBOOK = ["yes", "no"] - PIPELINE = ["yes", "no"] PY = ["yes", "no"] READPLAN = ["", "1"] diff --git a/acceptance/bundle/templates/default-python/combinations/classic/script b/acceptance/bundle/templates/default-python/combinations/classic/script index c380daea6d..589290b77c 100644 --- a/acceptance/bundle/templates/default-python/combinations/classic/script +++ b/acceptance/bundle/templates/default-python/combinations/classic/script @@ -9,7 +9,7 @@ trace $CLI bundle validate -t prod trace $CLI bundle plan -t dev -o json > tmp.plan.json 2> LOG.plan1.error $TESTDIR/../check_output.py $CLI bundle deploy -t dev $(readplanarg tmp.plan.json) # check no unexpected drift after deploy; due to dynamic_version whl is different so job is updated -$CLI bundle plan -t dev 2> LOG.plan2.error | contains.py '0 to add' '0 to delete' > /dev/null +$CLI bundle plan -t dev 2> LOG.plan2.error | contains.py '0 to add' '0 to delete' '!0 unchanged' > /dev/null trace $CLI bundle destroy -t dev --auto-approve 2>&1 | tail -n 2 trace $CLI bundle plan -t prod -o json > tmp.plan.json 2> LOG.plan3.error diff --git a/acceptance/bundle/templates/default-python/combinations/input.json.tmpl b/acceptance/bundle/templates/default-python/combinations/input.json.tmpl index 129aea7410..3a547d70fc 100644 --- a/acceptance/bundle/templates/default-python/combinations/input.json.tmpl +++ b/acceptance/bundle/templates/default-python/combinations/input.json.tmpl @@ -1,7 +1,7 @@ { "project_name": "X$UNIQUE_NAME", "include_notebook": "$NBOOK", - "include_pipeline": "$PIPELINE", + "include_dlt": "$DLT", "include_python": "$PY", "serverless": "$SERVERLESS" } diff --git a/acceptance/bundle/templates/default-python/combinations/serverless/out.test.toml b/acceptance/bundle/templates/default-python/combinations/serverless/out.test.toml index 892807aa9b..3d911317b6 100644 --- a/acceptance/bundle/templates/default-python/combinations/serverless/out.test.toml +++ b/acceptance/bundle/templates/default-python/combinations/serverless/out.test.toml @@ -3,7 +3,7 @@ Cloud = true [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] + DLT = ["yes", "no"] NBOOK = ["yes", "no"] - PIPELINE = ["yes", "no"] PY = ["yes", "no"] READPLAN = ["", "1"] diff --git a/acceptance/bundle/templates/default-python/combinations/test.toml b/acceptance/bundle/templates/default-python/combinations/test.toml index 8551e52469..d851f0a5be 100644 --- a/acceptance/bundle/templates/default-python/combinations/test.toml +++ b/acceptance/bundle/templates/default-python/combinations/test.toml @@ -8,8 +8,8 @@ EnvMatrix.READPLAN = ["", "1"] # INCLUDE_NOTEBOOK EnvMatrix.NBOOK = ["yes", "no"] -# INCLUDE_PIPELINE -EnvMatrix.PIPELINE = ["yes", "no"] +# INCLUDE_DLT +EnvMatrix.DLT = ["yes", "no"] # INCLUDE_PYTHON EnvMatrix.PY = ["yes", "no"] From 4c772d86aa66c1f4b74dba0f2bd2dd707a435f9d Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 14 Apr 2026 17:10:10 +0200 Subject: [PATCH 06/16] Restore deploy-experimental output.txt corrupted by local test run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The output was corrupted when running tests locally without terraform, replacing the successful deployment output with terraform init errors. Restores correct output from main and applies DLT→SDP string change. Co-authored-by: Isaac --- .../telemetry/deploy-experimental/output.txt | 61 +++++++++++++++---- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/acceptance/bundle/telemetry/deploy-experimental/output.txt b/acceptance/bundle/telemetry/deploy-experimental/output.txt index 229e784db6..a65a9b2e8a 100644 --- a/acceptance/bundle/telemetry/deploy-experimental/output.txt +++ b/acceptance/bundle/telemetry/deploy-experimental/output.txt @@ -5,15 +5,52 @@ Warning: You are using the legacy mode of run_as. The support for this mode is e in databricks.yml:5:22 Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... -Error: terraform init: exit status 1 - -Error: Failed to install provider - -Error while installing databricks/databricks v1.111.0: provider binary not -found: could not find executable file starting with -terraform-provider-databricks - - - - -Exit code: 1 +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> cat out.requests.txt +{ + "bool_values": [ + { + "key": "local.cache.attempt", + "value": true + }, + { + "key": "local.cache.miss", + "value": true + }, + { + "key": "experimental.use_legacy_run_as", + "value": true + }, + { + "key": "run_as_set", + "value": true + }, + { + "key": "presets_name_prefix_is_set", + "value": false + }, + { + "key": "python_wheel_wrapper_is_set", + "value": false + }, + { + "key": "skip_artifact_cleanup", + "value": false + }, + { + "key": "has_serverless_compute", + "value": false + }, + { + "key": "has_classic_job_compute", + "value": false + }, + { + "key": "has_classic_interactive_compute", + "value": true + } + ] +} From 05ee7488f12349b499dc6e831dfad0a6846ee6c9 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Thu, 16 Apr 2026 13:20:42 +0200 Subject: [PATCH 07/16] Simplify pipeline comment, rename dltActions to pipelineActions in destroy.go Address review feedback: drop "SDP" from comment since the variable name already says "pipeline", and rename the missed dltActions variable. Co-authored-by: Isaac --- bundle/phases/deploy.go | 2 +- bundle/phases/destroy.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 25ae76ec60..3ba4f6608c 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -56,7 +56,7 @@ func approvalForDeploy(ctx context.Context, b *bundle.Bundle, plan *deployplan.P } } - // One or more SDP pipelines is being recreated. + // One or more pipelines is being recreated. if len(pipelineActions) != 0 { cmdio.LogString(ctx, deleteOrRecreatePipelineMessage) for _, action := range pipelineActions { diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 12720f1dc5..27afec5f42 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -52,7 +52,7 @@ func approvalForDestroy(ctx context.Context, b *bundle.Bundle, plan *deployplan. } schemaActions := filterGroup(deleteActions, "schemas", deployplan.Delete) - dltActions := filterGroup(deleteActions, "pipelines", deployplan.Delete) + pipelineActions := filterGroup(deleteActions, "pipelines", deployplan.Delete) volumeActions := filterGroup(deleteActions, "volumes", deployplan.Delete) if len(schemaActions) > 0 { @@ -63,9 +63,9 @@ func approvalForDestroy(ctx context.Context, b *bundle.Bundle, plan *deployplan. cmdio.LogString(ctx, "") } - if len(dltActions) > 0 { + if len(pipelineActions) > 0 { cmdio.LogString(ctx, deletePipelineMessage) - for _, a := range dltActions { + for _, a := range pipelineActions { cmdio.Log(ctx, a) } cmdio.LogString(ctx, "") From 58904184cbdbc60f0adbddb5ad803f0e8c3e2734 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Thu, 16 Apr 2026 13:24:02 +0200 Subject: [PATCH 08/16] Regenerate schema and docs from annotations instead of manual edits Address review feedback: edit annotations source files and regenerate via make schema/schema-for-docs/docs instead of manually editing generated files. Also picks up other schema changes from the codebase. Co-authored-by: Isaac --- bundle/docsgen/output/reference.md | 3119 +++++++- bundle/docsgen/output/resources.md | 9096 +++++++++++++++--------- bundle/schema/jsonschema_for_docs.json | 69 +- 3 files changed, 8536 insertions(+), 3748 deletions(-) diff --git a/bundle/docsgen/output/reference.md b/bundle/docsgen/output/reference.md index ca4a347e1a..3ace4ef309 100644 --- a/bundle/docsgen/output/reference.md +++ b/bundle/docsgen/output/reference.md @@ -1,7 +1,7 @@ --- description: 'Configuration reference for databricks.yml' last_update: - date: 2025-09-13 + date: 2026-04-16 --- @@ -122,6 +122,10 @@ The bundle attributes when deploying to this target, - Map - The definition of the bundle deployment. For supported attributes see [\_](/dev-tools/bundles/deployment-modes.md). See [\_](#bundledeployment). +- - `engine` + - String + - The deployment engine to use. Valid values are `terraform` and `direct`. Takes priority over `DATABRICKS_BUNDLE_ENGINE` environment variable. Default is "terraform". + - - `git` - Map - The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). See [\_](#bundlegit). @@ -385,6 +389,35 @@ Defines bundle deployment presets. See [\_](/dev-tools/bundles/deployment-modes. ::: +## python + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `mutators` + - Sequence + - Mutators contains a list of fully qualified function paths to mutator functions. Example: ["my_project.mutators:add_default_cluster"] + +- - `resources` + - Sequence + - Resources contains a list of fully qualified function paths to load resources defined in Python code. Example: ["my_project.resources:load_resources"] + +- - `venv_path` + - String + - VEnvPath is path to the virtual environment. If enabled, Python code will execute within this environment. If disabled, it defaults to using the Python interpreter available in the current shell. + +::: + + ## resources **`Type: Map`** @@ -406,9 +439,17 @@ resources: - Type - Description +- - `alerts` + - Map + - See [\_](#resourcesalerts). + - - `apps` - Map - - The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [\_](/dev-tools/databricks-apps/index.md). + - The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [\_](/dev-tools/databricks-apps/index.md). See [\_](#resourcesapps). + +- - `catalogs` + - Map + - See [\_](#resourcescatalogs). - - `clusters` - Map @@ -416,7 +457,7 @@ resources: - - `dashboards` - Map - - The dashboard definitions for the bundle, where each key is the name of the dashboard. See [\_](/dev-tools/bundles/resources.md#dashboards). + - The dashboard definitions for the bundle, where each key is the name of the dashboard. See [\_](/dev-tools/bundles/resources.md#dashboards). See [\_](#resourcesdashboards). - - `database_catalogs` - Map @@ -424,12 +465,16 @@ resources: - - `database_instances` - Map - - + - See [\_](#resourcesdatabase_instances). - - `experiments` - Map - The experiment definitions for the bundle, where each key is the name of the experiment. See [\_](/dev-tools/bundles/resources.md#experiments). +- - `external_locations` + - Map + - See [\_](#resourcesexternal_locations). + - - `jobs` - Map - The job definitions for the bundle, where each key is the name of the job. See [\_](/dev-tools/bundles/resources.md#jobs). @@ -446,6 +491,18 @@ resources: - Map - The pipeline definitions for the bundle, where each key is the name of the pipeline. See [\_](/dev-tools/bundles/resources.md#pipelines). +- - `postgres_branches` + - Map + - See [\_](#resourcespostgres_branches). + +- - `postgres_endpoints` + - Map + - See [\_](#resourcespostgres_endpoints). + +- - `postgres_projects` + - Map + - See [\_](#resourcespostgres_projects). + - - `quality_monitors` - Map - The quality monitor definitions for the bundle, where each key is the name of the quality monitor. See [\_](/dev-tools/bundles/resources.md#quality_monitors). @@ -477,16 +534,16 @@ resources: ::: -### resources.secret_scopes +### resources.alerts **`Type: Map`** -The secret scope definitions for the bundle, where each key is the name of the secret scope. See [\_](/dev-tools/bundles/resources.md#secret_scopes). + ```yaml -secret_scopes: - : - : +alerts: + : + : ``` @@ -496,34 +553,90 @@ secret_scopes: - Type - Description -- - `backend_type` +- - `create_time` - String - - The backend type the scope will be created with. If not specified, will default to `DATABRICKS` + - -- - `keyvault_metadata` +- - `custom_description` + - String + - + +- - `custom_summary` + - String + - + +- - `display_name` + - String + - + +- - `effective_run_as` - Map - - The metadata for the secret scope if the `backend_type` is `AZURE_KEYVAULT`. See [\_](#resourcessecret_scopesnamekeyvault_metadata). + - See [\_](#resourcesalertsnameeffective_run_as). + +- - `evaluation` + - Map + - See [\_](#resourcesalertsnameevaluation). + +- - `file_path` + - String + - + +- - `id` + - String + - - - `lifecycle` - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#resourcessecret_scopesnamelifecycle). + - See [\_](#resourcesalertsnamelifecycle). -- - `name` +- - `lifecycle_state` - String - - Scope name requested by the user. Scope names are unique. + - + +- - `owner_user_name` + - String + - + +- - `parent_path` + - String + - - - `permissions` - Sequence - - The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. See [\_](#resourcessecret_scopesnamepermissions). + - See [\_](#resourcesalertsnamepermissions). + +- - `query_text` + - String + - + +- - `run_as` + - Map + - See [\_](#resourcesalertsnamerun_as). + +- - `run_as_user_name` + - String + - + +- - `schedule` + - Map + - See [\_](#resourcesalertsnameschedule). + +- - `update_time` + - String + - + +- - `warehouse_id` + - String + - ::: -### resources.secret_scopes._name_.lifecycle +### resources.alerts._name_.lifecycle **`Type: Map`** -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + @@ -540,11 +653,11 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co ::: -### resources.secret_scopes._name_.permissions +### resources.alerts._name_.permissions **`Type: Sequence`** -The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. + @@ -556,7 +669,7 @@ The permissions to apply to the secret scope. Permissions are managed via secret - - `group_name` - String - - The name of the group that has the permission set in level. This field translates to a `principal` field in secret scope ACL. + - The name of the group that has the permission set in level. - - `level` - String @@ -564,25 +677,25 @@ The permissions to apply to the secret scope. Permissions are managed via secret - - `service_principal_name` - String - - The application ID of an active service principal. This field translates to a `principal` field in secret scope ACL. + - The name of the service principal that has the permission set in level. - - `user_name` - String - - The name of the user that has the permission set in level. This field translates to a `principal` field in secret scope ACL. + - The name of the user that has the permission set in level. ::: -### resources.synced_database_tables +### resources.apps **`Type: Map`** - +The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [\_](/dev-tools/databricks-apps/index.md). ```yaml -synced_database_tables: - : - : +apps: + : + : ``` @@ -592,71 +705,150 @@ synced_database_tables: - Type - Description -- - `data_synchronization_status` +- - `active_deployment` - Map - - See [\_](#resourcessynced_database_tablesnamedata_synchronization_status). + - See [\_](#resourcesappsnameactive_deployment). -- - `database_instance_name` +- - `app_status` + - Map + - See [\_](#resourcesappsnameapp_status). + +- - `budget_policy_id` - String - -- - `effective_database_instance_name` +- - `compute_size` - String - -- - `effective_logical_database_name` +- - `compute_status` + - Map + - See [\_](#resourcesappsnamecompute_status). + +- - `config` + - Map + - See [\_](#resourcesappsnameconfig). + +- - `create_time` - String - -- - `lifecycle` +- - `creator` + - String + - + +- - `default_source_code_path` + - String + - + +- - `description` + - String + - + +- - `effective_budget_policy_id` + - String + - + +- - `effective_usage_policy_id` + - String + - + +- - `effective_user_api_scopes` + - Sequence + - + +- - `git_repository` - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#resourcessynced_database_tablesnamelifecycle). + - See [\_](#resourcesappsnamegit_repository). -- - `logical_database_name` +- - `git_source` + - Map + - Git source configuration for app deployments. Specifies which git reference (branch, tag, or commit) to use when deploying the app. Used in conjunction with git_repository to deploy code directly from git. The source_code_path within git_source specifies the relative path to the app code within the repository. See [\_](#resourcesappsnamegit_source). + +- - `id` - String - +- - `lifecycle` + - Map + - See [\_](#resourcesappsnamelifecycle). + - - `name` - String - -- - `spec` +- - `oauth2_app_client_id` + - String + - + +- - `oauth2_app_integration_id` + - String + - + +- - `pending_deployment` - Map - - See [\_](#resourcessynced_database_tablesnamespec). + - See [\_](#resourcesappsnamepending_deployment). -- - `unity_catalog_provisioning_state` +- - `permissions` + - Sequence + - See [\_](#resourcesappsnamepermissions). + +- - `resources` + - Sequence + - See [\_](#resourcesappsnameresources). + +- - `service_principal_client_id` - String - -::: +- - `service_principal_id` + - Integer + - +- - `service_principal_name` + - String + - -### resources.synced_database_tables._name_.lifecycle +- - `source_code_path` + - String + - -**`Type: Map`** +- - `space` + - String + - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. +- - `telemetry_export_destinations` + - Sequence + - See [\_](#resourcesappsnametelemetry_export_destinations). +- - `update_time` + - String + - +- - `updater` + - String + - -:::list-table +- - `url` + - String + - -- - Key - - Type - - Description +- - `usage_policy_id` + - String + - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. +- - `user_api_scopes` + - Sequence + - ::: -## run_as +### resources.apps._name_.config **`Type: Map`** -The identity to use when running Declarative Automation Bundles workflows. See [\_](/dev-tools/bundles/run-as.md). + @@ -666,28 +858,23 @@ The identity to use when running Declarative Automation Bundles workflows. See [ - Type - Description -- - `service_principal_name` - - String - - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. +- - `command` + - Sequence + - -- - `user_name` - - String - - The email of an active workspace user. Non-admin users can only set this field to their own email. +- - `env` + - Sequence + - See [\_](#resourcesappsnameconfigenv). ::: -## scripts +### resources.apps._name_.config.env -**`Type: Map`** +**`Type: Sequence`** -```yaml -scripts: - : - : -``` :::list-table @@ -696,18 +883,26 @@ scripts: - Type - Description -- - `content` +- - `name` + - String + - + +- - `value` + - String + - + +- - `value_from` - String - ::: -## sync +### resources.apps._name_.lifecycle **`Type: Map`** -The files and file paths to include or exclude in the bundle. See [\_](/dev-tools/bundles/settings.md#sync). + @@ -717,32 +912,23 @@ The files and file paths to include or exclude in the bundle. See [\_](/dev-tool - Type - Description -- - `exclude` - - Sequence - - A list of files or folders to exclude from the bundle. - -- - `include` - - Sequence - - A list of files or folders to include in the bundle. +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. -- - `paths` - - Sequence - - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. +- - `started` + - Boolean + - Lifecycle setting to deploy the resource in started mode. Only supported for apps, clusters, and sql_warehouses in direct deployment mode. ::: -## targets +### resources.apps._name_.permissions -**`Type: Map`** +**`Type: Sequence`** -Defines deployment targets for the bundle. See [\_](/dev-tools/bundles/settings.md#targets) + -```yaml -targets: - : - : -``` :::list-table @@ -751,75 +937,118 @@ targets: - Type - Description -- - `artifacts` - - Map - - The artifacts to include in the target deployment. See [\_](#targetsnameartifacts). +- - `group_name` + - String + - -- - `bundle` - - Map - - The bundle attributes when deploying to this target. See [\_](#targetsnamebundle). +- - `level` + - String + - -- - `cluster_id` +- - `service_principal_name` - String - - The ID of the cluster to use for this target. + - -- - `compute_id` +- - `user_name` - String - - Deprecated: please use cluster_id instead + - -- - `default` - - Boolean - - Whether this target is the default target. +::: -- - `git` - - Map - - The Git version control settings for the target. See [\_](#targetsnamegit). -- - `mode` +### resources.catalogs + +**`Type: Map`** + + + +```yaml +catalogs: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `comment` - String - - The deployment mode for the target. Valid values are `development` or `production`. See [\_](/dev-tools/bundles/deployment-modes.md). + - -- - `permissions` +- - `connection_name` + - String + - + +- - `grants` - Sequence - - The permissions for deploying and running the bundle in the target. See [\_](#targetsnamepermissions). + - See [\_](#resourcescatalogsnamegrants). -- - `presets` +- - `lifecycle` - Map - - The deployment presets for the target. See [\_](#targetsnamepresets). + - See [\_](#resourcescatalogsnamelifecycle). -- - `resources` - - Map - - The resource definitions for the target. See [\_](#targetsnameresources). +- - `name` + - String + - -- - `run_as` +- - `options` - Map - - The identity to use to run the bundle, see [\_](/dev-tools/bundles/run-as.md). See [\_](#targetsnamerun_as). + - -- - `sync` +- - `properties` - Map - - The local paths to sync to the target workspace when a bundle is run or deployed. See [\_](#targetsnamesync). + - -- - `variables` - - Map - - The custom variable definitions for the target. See [\_](#targetsnamevariables). +- - `provider_name` + - String + - -- - `workspace` - - Map - - The Databricks workspace for the target. See [\_](#targetsnameworkspace). +- - `share_name` + - String + - + +- - `storage_root` + - String + - ::: -### targets._name_.artifacts +### resources.catalogs._name_.lifecycle **`Type: Map`** -The artifacts to include in the target deployment. + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### resources.dashboards + +**`Type: Map`** + +The dashboard definitions for the bundle, where each key is the name of the dashboard. See [\_](/dev-tools/bundles/resources.md#dashboards). ```yaml -artifacts: - : - : +dashboards: + : + : ``` @@ -829,38 +1058,99 @@ artifacts: - Type - Description -- - `build` +- - `create_time` - String - - An optional set of build commands to run locally before deployment. + - -- - `dynamic_version` +- - `dashboard_id` + - String + - + +- - `dataset_catalog` + - String + - Sets the default catalog for all datasets in this dashboard. When set, this overrides the catalog specified in individual dataset definitions. + +- - `dataset_schema` + - String + - Sets the default schema for all datasets in this dashboard. When set, this overrides the schema specified in individual dataset definitions. + +- - `display_name` + - String + - + +- - `embed_credentials` - Boolean - - Whether to patch the wheel version dynamically based on the timestamp of the whl file. If this is set to `true`, new code can be deployed without having to update the version in `setup.py` or `pyproject.toml`. This setting is only valid when `type` is set to `whl`. See [\_](/dev-tools/bundles/settings.md#bundle-syntax-mappings-artifacts). + - -- - `executable` +- - `etag` - String - - The executable type. Valid values are `bash`, `sh`, and `cmd`. + - -- - `files` - - Sequence - - The relative or absolute path to the built artifact files. See [\_](#targetsnameartifactsnamefiles). +- - `file_path` + - String + - + +- - `lifecycle` + - Map + - See [\_](#resourcesdashboardsnamelifecycle). + +- - `lifecycle_state` + - String + - + +- - `parent_path` + - String + - - - `path` - String - - The local path of the directory for the artifact. + - -- - `type` +- - `permissions` + - Sequence + - See [\_](#resourcesdashboardsnamepermissions). + +- - `serialized_dashboard` + - Any + - + +- - `update_time` - String - - Required if the artifact is a Python wheel. The type of the artifact. Valid values are `whl` and `jar`. + - + +- - `warehouse_id` + - String + - ::: -### targets._name_.artifacts._name_.files +### resources.dashboards._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### resources.dashboards._name_.permissions **`Type: Sequence`** -The relative or absolute path to the built artifact files. + @@ -870,19 +1160,36 @@ The relative or absolute path to the built artifact files. - Type - Description -- - `source` +- - `group_name` - String - - Required. The artifact source file. + - The name of the group that has the permission set in level. + +- - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + +- - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + +- - `user_name` + - String + - The name of the user that has the permission set in level. ::: -### targets._name_.bundle +### resources.database_instances **`Type: Map`** -The bundle attributes when deploying to this target. + +```yaml +database_instances: + : + : +``` :::list-table @@ -891,42 +1198,2208 @@ The bundle attributes when deploying to this target. - Type - Description -- - `cluster_id` +- - `capacity` - String - - The ID of a cluster to use to run the bundle. See [\_](/dev-tools/bundles/settings.md#cluster_id). + - -- - `compute_id` +- - `child_instance_refs` + - Sequence + - See [\_](#resourcesdatabase_instancesnamechild_instance_refs). + +- - `creation_time` - String - - Deprecated. The ID of the compute to use to run the bundle. + - -- - `databricks_cli_version` +- - `creator` - String - - The Databricks CLI version to use for the bundle. See [\_](/dev-tools/bundles/settings.md#databricks_cli_version). + - -- - `deployment` - - Map - - The definition of the bundle deployment. For supported attributes see [\_](/dev-tools/bundles/deployment-modes.md). See [\_](#targetsnamebundledeployment). +- - `custom_tags` + - Sequence + - See [\_](#resourcesdatabase_instancesnamecustom_tags). -- - `git` +- - `effective_capacity` + - String + - + +- - `effective_custom_tags` + - Sequence + - See [\_](#resourcesdatabase_instancesnameeffective_custom_tags). + +- - `effective_enable_pg_native_login` + - Boolean + - + +- - `effective_enable_readable_secondaries` + - Boolean + - + +- - `effective_node_count` + - Integer + - + +- - `effective_retention_window_in_days` + - Integer + - + +- - `effective_stopped` + - Boolean + - + +- - `effective_usage_policy_id` + - String + - + +- - `enable_pg_native_login` + - Boolean + - + +- - `enable_readable_secondaries` + - Boolean + - + +- - `lifecycle` - Map - - The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). See [\_](#targetsnamebundlegit). + - See [\_](#resourcesdatabase_instancesnamelifecycle). - - `name` - String - - The name of the bundle. + - + +- - `node_count` + - Integer + - + +- - `parent_instance_ref` + - Map + - See [\_](#resourcesdatabase_instancesnameparent_instance_ref). + +- - `permissions` + - Sequence + - See [\_](#resourcesdatabase_instancesnamepermissions). + +- - `pg_version` + - String + - + +- - `read_only_dns` + - String + - + +- - `read_write_dns` + - String + - + +- - `retention_window_in_days` + - Integer + - + +- - `state` + - String + - + +- - `stopped` + - Boolean + - + +- - `uid` + - String + - + +- - `usage_policy_id` + - String + - + +::: + + +### resources.database_instances._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### resources.database_instances._name_.permissions + +**`Type: Sequence`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `group_name` + - String + - The name of the group that has the permission set in level. + +- - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + +- - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + +- - `user_name` + - String + - The name of the user that has the permission set in level. + +::: + + +### resources.external_locations + +**`Type: Map`** + + + +```yaml +external_locations: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `comment` + - String + - + +- - `credential_name` + - String + - + +- - `effective_enable_file_events` + - Boolean + - + +- - `enable_file_events` + - Boolean + - + +- - `encryption_details` + - Map + - See [\_](#resourcesexternal_locationsnameencryption_details). + +- - `fallback` + - Boolean + - + +- - `file_event_queue` + - Map + - See [\_](#resourcesexternal_locationsnamefile_event_queue). + +- - `grants` + - Sequence + - See [\_](#resourcesexternal_locationsnamegrants). + +- - `lifecycle` + - Map + - See [\_](#resourcesexternal_locationsnamelifecycle). + +- - `name` + - String + - + +- - `read_only` + - Boolean + - + +- - `skip_validation` + - Boolean + - + +- - `url` + - String + - + +::: + + +### resources.external_locations._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### resources.postgres_branches + +**`Type: Map`** + + + +```yaml +postgres_branches: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `branch_id` + - String + - + +- - `expire_time` + - Map + - + +- - `is_protected` + - Boolean + - + +- - `lifecycle` + - Map + - See [\_](#resourcespostgres_branchesnamelifecycle). + +- - `no_expiry` + - Boolean + - + +- - `parent` + - String + - + +- - `source_branch` + - String + - + +- - `source_branch_lsn` + - String + - + +- - `source_branch_time` + - Map + - + +- - `ttl` + - String + - + +::: + + +### resources.postgres_branches._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### resources.postgres_endpoints + +**`Type: Map`** + + + +```yaml +postgres_endpoints: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `autoscaling_limit_max_cu` + - Any + - + +- - `autoscaling_limit_min_cu` + - Any + - + +- - `disabled` + - Boolean + - + +- - `endpoint_id` + - String + - + +- - `endpoint_type` + - String + - + +- - `group` + - Map + - See [\_](#resourcespostgres_endpointsnamegroup). + +- - `lifecycle` + - Map + - See [\_](#resourcespostgres_endpointsnamelifecycle). + +- - `no_suspension` + - Boolean + - + +- - `parent` + - String + - + +- - `settings` + - Map + - See [\_](#resourcespostgres_endpointsnamesettings). + +- - `suspend_timeout_duration` + - String + - + +::: + + +### resources.postgres_endpoints._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### resources.postgres_projects + +**`Type: Map`** + + + +```yaml +postgres_projects: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `budget_policy_id` + - String + - + +- - `custom_tags` + - Sequence + - See [\_](#resourcespostgres_projectsnamecustom_tags). + +- - `default_endpoint_settings` + - Map + - See [\_](#resourcespostgres_projectsnamedefault_endpoint_settings). + +- - `display_name` + - String + - + +- - `enable_pg_native_login` + - Boolean + - + +- - `history_retention_duration` + - String + - + +- - `lifecycle` + - Map + - See [\_](#resourcespostgres_projectsnamelifecycle). + +- - `permissions` + - Sequence + - See [\_](#resourcespostgres_projectsnamepermissions). + +- - `pg_version` + - Integer + - + +- - `project_id` + - String + - + +::: + + +### resources.postgres_projects._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### resources.postgres_projects._name_.permissions + +**`Type: Sequence`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `group_name` + - String + - The name of the group that has the permission set in level. + +- - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + +- - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + +- - `user_name` + - String + - The name of the user that has the permission set in level. + +::: + + +### resources.secret_scopes + +**`Type: Map`** + +The secret scope definitions for the bundle, where each key is the name of the secret scope. See [\_](/dev-tools/bundles/resources.md#secret_scopes). + +```yaml +secret_scopes: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `backend_type` + - String + - The backend type the scope will be created with. If not specified, will default to `DATABRICKS` + +- - `keyvault_metadata` + - Map + - The metadata for the secret scope if the `backend_type` is `AZURE_KEYVAULT`. See [\_](#resourcessecret_scopesnamekeyvault_metadata). + +- - `lifecycle` + - Map + - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#resourcessecret_scopesnamelifecycle). + +- - `name` + - String + - Scope name requested by the user. Scope names are unique. + +- - `permissions` + - Sequence + - The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. See [\_](#resourcessecret_scopesnamepermissions). + +::: + + +### resources.secret_scopes._name_.lifecycle + +**`Type: Map`** + +Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### resources.secret_scopes._name_.permissions + +**`Type: Sequence`** + +The permissions to apply to the secret scope. Permissions are managed via secret scope ACLs. + + + +:::list-table + +- - Key + - Type + - Description + +- - `group_name` + - String + - The name of the group that has the permission set in level. This field translates to a `principal` field in secret scope ACL. + +- - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + +- - `service_principal_name` + - String + - The application ID of an active service principal. This field translates to a `principal` field in secret scope ACL. + +- - `user_name` + - String + - The name of the user that has the permission set in level. This field translates to a `principal` field in secret scope ACL. + +::: + + +### resources.synced_database_tables + +**`Type: Map`** + + + +```yaml +synced_database_tables: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `data_synchronization_status` + - Map + - See [\_](#resourcessynced_database_tablesnamedata_synchronization_status). + +- - `database_instance_name` + - String + - + +- - `effective_database_instance_name` + - String + - + +- - `effective_logical_database_name` + - String + - + +- - `lifecycle` + - Map + - See [\_](#resourcessynced_database_tablesnamelifecycle). + +- - `logical_database_name` + - String + - + +- - `name` + - String + - + +- - `spec` + - Map + - See [\_](#resourcessynced_database_tablesnamespec). + +- - `unity_catalog_provisioning_state` + - String + - + +::: + + +### resources.synced_database_tables._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +## run_as + +**`Type: Map`** + +The identity to use when running Declarative Automation Bundles resources. See [\_](/dev-tools/bundles/run-as.md). + + + +:::list-table + +- - Key + - Type + - Description + +- - `group_name` + - String + - + +- - `service_principal_name` + - String + - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + +- - `user_name` + - String + - The email of an active workspace user. Non-admin users can only set this field to their own email. + +::: + + +## scripts + +**`Type: Map`** + + + +```yaml +scripts: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `content` + - String + - + +::: + + +## sync + +**`Type: Map`** + +The files and file paths to include or exclude in the bundle. See [\_](/dev-tools/bundles/settings.md#sync). + + + +:::list-table + +- - Key + - Type + - Description + +- - `exclude` + - Sequence + - A list of files or folders to exclude from the bundle. + +- - `include` + - Sequence + - A list of files or folders to include in the bundle. + +- - `paths` + - Sequence + - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. + +::: + + +## targets + +**`Type: Map`** + +Defines deployment targets for the bundle. See [\_](/dev-tools/bundles/settings.md#targets) + +```yaml +targets: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `artifacts` + - Map + - The artifacts to include in the target deployment. See [\_](#targetsnameartifacts). + +- - `bundle` + - Map + - The bundle attributes when deploying to this target. See [\_](#targetsnamebundle). + +- - `cluster_id` + - String + - The ID of the cluster to use for this target. + +- - `compute_id` + - String + - Deprecated: please use cluster_id instead + +- - `default` + - Boolean + - Whether this target is the default target. + +- - `git` + - Map + - The Git version control settings for the target. See [\_](#targetsnamegit). + +- - `mode` + - String + - The deployment mode for the target. Valid values are `development` or `production`. See [\_](/dev-tools/bundles/deployment-modes.md). + +- - `permissions` + - Sequence + - The permissions for deploying and running the bundle in the target. See [\_](#targetsnamepermissions). + +- - `presets` + - Map + - The deployment presets for the target. See [\_](#targetsnamepresets). + +- - `resources` + - Map + - The resource definitions for the target. See [\_](#targetsnameresources). + +- - `run_as` + - Map + - The identity to use to run the bundle, see [\_](/dev-tools/bundles/run-as.md). See [\_](#targetsnamerun_as). + +- - `sync` + - Map + - The local paths to sync to the target workspace when a bundle is run or deployed. See [\_](#targetsnamesync). + +- - `variables` + - Map + - The custom variable definitions for the target. See [\_](#targetsnamevariables). + +- - `workspace` + - Map + - The Databricks workspace for the target. See [\_](#targetsnameworkspace). + +::: + + +### targets._name_.artifacts + +**`Type: Map`** + +The artifacts to include in the target deployment. + +```yaml +artifacts: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `build` + - String + - An optional set of build commands to run locally before deployment. + +- - `dynamic_version` + - Boolean + - Whether to patch the wheel version dynamically based on the timestamp of the whl file. If this is set to `true`, new code can be deployed without having to update the version in `setup.py` or `pyproject.toml`. This setting is only valid when `type` is set to `whl`. See [\_](/dev-tools/bundles/settings.md#bundle-syntax-mappings-artifacts). + +- - `executable` + - String + - The executable type. Valid values are `bash`, `sh`, and `cmd`. + +- - `files` + - Sequence + - The relative or absolute path to the built artifact files. See [\_](#targetsnameartifactsnamefiles). + +- - `path` + - String + - The local path of the directory for the artifact. + +- - `type` + - String + - Required if the artifact is a Python wheel. The type of the artifact. Valid values are `whl` and `jar`. + +::: + + +### targets._name_.artifacts._name_.files + +**`Type: Sequence`** + +The relative or absolute path to the built artifact files. + + + +:::list-table + +- - Key + - Type + - Description + +- - `source` + - String + - Required. The artifact source file. + +::: + + +### targets._name_.bundle + +**`Type: Map`** + +The bundle attributes when deploying to this target. + + + +:::list-table + +- - Key + - Type + - Description + +- - `cluster_id` + - String + - The ID of a cluster to use to run the bundle. See [\_](/dev-tools/bundles/settings.md#cluster_id). + +- - `compute_id` + - String + - Deprecated. The ID of the compute to use to run the bundle. + +- - `databricks_cli_version` + - String + - The Databricks CLI version to use for the bundle. See [\_](/dev-tools/bundles/settings.md#databricks_cli_version). + +- - `deployment` + - Map + - The definition of the bundle deployment. For supported attributes see [\_](/dev-tools/bundles/deployment-modes.md). See [\_](#targetsnamebundledeployment). + +- - `engine` + - String + - The deployment engine to use. Valid values are `terraform` and `direct`. Takes priority over `DATABRICKS_BUNDLE_ENGINE` environment variable. Default is "terraform". + +- - `git` + - Map + - The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). See [\_](#targetsnamebundlegit). + +- - `name` + - String + - The name of the bundle. + +- - `uuid` + - String + - Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command). + +::: + + +### targets._name_.bundle.deployment + +**`Type: Map`** + +The definition of the bundle deployment. For supported attributes see [\_](/dev-tools/bundles/deployment-modes.md). + + + +:::list-table + +- - Key + - Type + - Description + +- - `fail_on_active_runs` + - Boolean + - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. + +- - `lock` + - Map + - The deployment lock attributes. See [\_](#targetsnamebundledeploymentlock). + +::: + + +### targets._name_.bundle.deployment.lock + +**`Type: Map`** + +The deployment lock attributes. + + + +:::list-table + +- - Key + - Type + - Description + +- - `enabled` + - Boolean + - Whether this lock is enabled. + +- - `force` + - Boolean + - Whether to force this lock if it is enabled. + +::: + + +### targets._name_.bundle.git + +**`Type: Map`** + +The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). + + + +:::list-table + +- - Key + - Type + - Description + +- - `branch` + - String + - The Git branch name. See [\_](/dev-tools/bundles/settings.md#git). + +- - `origin_url` + - String + - The origin URL of the repository. See [\_](/dev-tools/bundles/settings.md#git). + +::: + + +### targets._name_.git + +**`Type: Map`** + +The Git version control settings for the target. + + + +:::list-table + +- - Key + - Type + - Description + +- - `branch` + - String + - The Git branch name. See [\_](/dev-tools/bundles/settings.md#git). + +- - `origin_url` + - String + - The origin URL of the repository. See [\_](/dev-tools/bundles/settings.md#git). + +::: + + +### targets._name_.permissions + +**`Type: Sequence`** + +The permissions for deploying and running the bundle in the target. + + + +:::list-table + +- - Key + - Type + - Description + +- - `group_name` + - String + - The name of the group that has the permission set in level. + +- - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + +- - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + +- - `user_name` + - String + - The name of the user that has the permission set in level. + +::: + + +### targets._name_.presets + +**`Type: Map`** + +The deployment presets for the target. + + + +:::list-table + +- - Key + - Type + - Description + +- - `artifacts_dynamic_version` + - Boolean + - Whether to enable dynamic_version on all artifacts. + +- - `jobs_max_concurrent_runs` + - Integer + - The maximum concurrent runs for a job. + +- - `name_prefix` + - String + - The prefix for job runs of the bundle. + +- - `pipelines_development` + - Boolean + - Whether pipeline deployments should be locked in development mode. + +- - `source_linked_deployment` + - Boolean + - Whether to link the deployment to the bundle source. + +- - `tags` + - Map + - The tags for the bundle deployment. + +- - `trigger_pause_status` + - String + - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. + +::: + + +### targets._name_.resources + +**`Type: Map`** + +The resource definitions for the target. + + + +:::list-table + +- - Key + - Type + - Description + +- - `alerts` + - Map + - See [\_](#targetsnameresourcesalerts). + +- - `apps` + - Map + - The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [\_](/dev-tools/databricks-apps/index.md). See [\_](#targetsnameresourcesapps). + +- - `catalogs` + - Map + - See [\_](#targetsnameresourcescatalogs). + +- - `clusters` + - Map + - The cluster definitions for the bundle, where each key is the name of a cluster. See [\_](/dev-tools/bundles/resources.md#clusters). + +- - `dashboards` + - Map + - The dashboard definitions for the bundle, where each key is the name of the dashboard. See [\_](/dev-tools/bundles/resources.md#dashboards). See [\_](#targetsnameresourcesdashboards). + +- - `database_catalogs` + - Map + - + +- - `database_instances` + - Map + - See [\_](#targetsnameresourcesdatabase_instances). + +- - `experiments` + - Map + - The experiment definitions for the bundle, where each key is the name of the experiment. See [\_](/dev-tools/bundles/resources.md#experiments). + +- - `external_locations` + - Map + - See [\_](#targetsnameresourcesexternal_locations). + +- - `jobs` + - Map + - The job definitions for the bundle, where each key is the name of the job. See [\_](/dev-tools/bundles/resources.md#jobs). + +- - `model_serving_endpoints` + - Map + - The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. See [\_](/dev-tools/bundles/resources.md#model_serving_endpoints). + +- - `models` + - Map + - The model definitions for the bundle, where each key is the name of the model. See [\_](/dev-tools/bundles/resources.md#models). + +- - `pipelines` + - Map + - The pipeline definitions for the bundle, where each key is the name of the pipeline. See [\_](/dev-tools/bundles/resources.md#pipelines). + +- - `postgres_branches` + - Map + - See [\_](#targetsnameresourcespostgres_branches). + +- - `postgres_endpoints` + - Map + - See [\_](#targetsnameresourcespostgres_endpoints). + +- - `postgres_projects` + - Map + - See [\_](#targetsnameresourcespostgres_projects). + +- - `quality_monitors` + - Map + - The quality monitor definitions for the bundle, where each key is the name of the quality monitor. See [\_](/dev-tools/bundles/resources.md#quality_monitors). + +- - `registered_models` + - Map + - The registered model definitions for the bundle, where each key is the name of the Unity Catalog registered model. See [\_](/dev-tools/bundles/resources.md#registered_models) + +- - `schemas` + - Map + - The schema definitions for the bundle, where each key is the name of the schema. See [\_](/dev-tools/bundles/resources.md#schemas). + +- - `secret_scopes` + - Map + - The secret scope definitions for the bundle, where each key is the name of the secret scope. See [\_](/dev-tools/bundles/resources.md#secret_scopes). See [\_](#targetsnameresourcessecret_scopes). + +- - `sql_warehouses` + - Map + - The SQL warehouse definitions for the bundle, where each key is the name of the warehouse. See [\_](/dev-tools/bundles/resources.md#sql_warehouses). + +- - `synced_database_tables` + - Map + - See [\_](#targetsnameresourcessynced_database_tables). + +- - `volumes` + - Map + - The volume definitions for the bundle, where each key is the name of the volume. See [\_](/dev-tools/bundles/resources.md#volumes). + +::: + + +### targets._name_.resources.alerts + +**`Type: Map`** + + + +```yaml +alerts: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `create_time` + - String + - + +- - `custom_description` + - String + - + +- - `custom_summary` + - String + - + +- - `display_name` + - String + - + +- - `effective_run_as` + - Map + - See [\_](#targetsnameresourcesalertsnameeffective_run_as). + +- - `evaluation` + - Map + - See [\_](#targetsnameresourcesalertsnameevaluation). + +- - `file_path` + - String + - + +- - `id` + - String + - + +- - `lifecycle` + - Map + - See [\_](#targetsnameresourcesalertsnamelifecycle). + +- - `lifecycle_state` + - String + - + +- - `owner_user_name` + - String + - + +- - `parent_path` + - String + - + +- - `permissions` + - Sequence + - See [\_](#targetsnameresourcesalertsnamepermissions). + +- - `query_text` + - String + - + +- - `run_as` + - Map + - See [\_](#targetsnameresourcesalertsnamerun_as). + +- - `run_as_user_name` + - String + - + +- - `schedule` + - Map + - See [\_](#targetsnameresourcesalertsnameschedule). + +- - `update_time` + - String + - + +- - `warehouse_id` + - String + - + +::: + + +### targets._name_.resources.alerts._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### targets._name_.resources.alerts._name_.permissions + +**`Type: Sequence`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `group_name` + - String + - The name of the group that has the permission set in level. + +- - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + +- - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + +- - `user_name` + - String + - The name of the user that has the permission set in level. + +::: + + +### targets._name_.resources.apps + +**`Type: Map`** + +The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [\_](/dev-tools/databricks-apps/index.md). + +```yaml +apps: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `active_deployment` + - Map + - See [\_](#targetsnameresourcesappsnameactive_deployment). + +- - `app_status` + - Map + - See [\_](#targetsnameresourcesappsnameapp_status). + +- - `budget_policy_id` + - String + - + +- - `compute_size` + - String + - + +- - `compute_status` + - Map + - See [\_](#targetsnameresourcesappsnamecompute_status). + +- - `config` + - Map + - See [\_](#targetsnameresourcesappsnameconfig). + +- - `create_time` + - String + - + +- - `creator` + - String + - + +- - `default_source_code_path` + - String + - + +- - `description` + - String + - + +- - `effective_budget_policy_id` + - String + - + +- - `effective_usage_policy_id` + - String + - + +- - `effective_user_api_scopes` + - Sequence + - + +- - `git_repository` + - Map + - See [\_](#targetsnameresourcesappsnamegit_repository). + +- - `git_source` + - Map + - Git source configuration for app deployments. Specifies which git reference (branch, tag, or commit) to use when deploying the app. Used in conjunction with git_repository to deploy code directly from git. The source_code_path within git_source specifies the relative path to the app code within the repository. See [\_](#targetsnameresourcesappsnamegit_source). + +- - `id` + - String + - + +- - `lifecycle` + - Map + - See [\_](#targetsnameresourcesappsnamelifecycle). + +- - `name` + - String + - + +- - `oauth2_app_client_id` + - String + - + +- - `oauth2_app_integration_id` + - String + - + +- - `pending_deployment` + - Map + - See [\_](#targetsnameresourcesappsnamepending_deployment). + +- - `permissions` + - Sequence + - See [\_](#targetsnameresourcesappsnamepermissions). + +- - `resources` + - Sequence + - See [\_](#targetsnameresourcesappsnameresources). + +- - `service_principal_client_id` + - String + - + +- - `service_principal_id` + - Integer + - + +- - `service_principal_name` + - String + - + +- - `source_code_path` + - String + - + +- - `space` + - String + - + +- - `telemetry_export_destinations` + - Sequence + - See [\_](#targetsnameresourcesappsnametelemetry_export_destinations). + +- - `update_time` + - String + - + +- - `updater` + - String + - + +- - `url` + - String + - + +- - `usage_policy_id` + - String + - + +- - `user_api_scopes` + - Sequence + - + +::: + + +### targets._name_.resources.apps._name_.config + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `command` + - Sequence + - + +- - `env` + - Sequence + - See [\_](#targetsnameresourcesappsnameconfigenv). + +::: + + +### targets._name_.resources.apps._name_.config.env + +**`Type: Sequence`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `name` + - String + - + +- - `value` + - String + - + +- - `value_from` + - String + - + +::: + + +### targets._name_.resources.apps._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +- - `started` + - Boolean + - Lifecycle setting to deploy the resource in started mode. Only supported for apps, clusters, and sql_warehouses in direct deployment mode. + +::: + + +### targets._name_.resources.apps._name_.permissions + +**`Type: Sequence`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `group_name` + - String + - + +- - `level` + - String + - + +- - `service_principal_name` + - String + - + +- - `user_name` + - String + - + +::: + + +### targets._name_.resources.catalogs + +**`Type: Map`** + + + +```yaml +catalogs: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `comment` + - String + - + +- - `connection_name` + - String + - + +- - `grants` + - Sequence + - See [\_](#targetsnameresourcescatalogsnamegrants). + +- - `lifecycle` + - Map + - See [\_](#targetsnameresourcescatalogsnamelifecycle). + +- - `name` + - String + - + +- - `options` + - Map + - + +- - `properties` + - Map + - + +- - `provider_name` + - String + - + +- - `share_name` + - String + - + +- - `storage_root` + - String + - + +::: + + +### targets._name_.resources.catalogs._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### targets._name_.resources.dashboards + +**`Type: Map`** + +The dashboard definitions for the bundle, where each key is the name of the dashboard. See [\_](/dev-tools/bundles/resources.md#dashboards). + +```yaml +dashboards: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `create_time` + - String + - + +- - `dashboard_id` + - String + - + +- - `dataset_catalog` + - String + - Sets the default catalog for all datasets in this dashboard. When set, this overrides the catalog specified in individual dataset definitions. + +- - `dataset_schema` + - String + - Sets the default schema for all datasets in this dashboard. When set, this overrides the schema specified in individual dataset definitions. + +- - `display_name` + - String + - + +- - `embed_credentials` + - Boolean + - + +- - `etag` + - String + - + +- - `file_path` + - String + - + +- - `lifecycle` + - Map + - See [\_](#targetsnameresourcesdashboardsnamelifecycle). + +- - `lifecycle_state` + - String + - + +- - `parent_path` + - String + - + +- - `path` + - String + - + +- - `permissions` + - Sequence + - See [\_](#targetsnameresourcesdashboardsnamepermissions). + +- - `serialized_dashboard` + - Any + - + +- - `update_time` + - String + - + +- - `warehouse_id` + - String + - + +::: + + +### targets._name_.resources.dashboards._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### targets._name_.resources.dashboards._name_.permissions + +**`Type: Sequence`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `group_name` + - String + - The name of the group that has the permission set in level. + +- - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + +- - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + +- - `user_name` + - String + - The name of the user that has the permission set in level. + +::: + + +### targets._name_.resources.database_instances + +**`Type: Map`** + + + +```yaml +database_instances: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `capacity` + - String + - + +- - `child_instance_refs` + - Sequence + - See [\_](#targetsnameresourcesdatabase_instancesnamechild_instance_refs). + +- - `creation_time` + - String + - + +- - `creator` + - String + - + +- - `custom_tags` + - Sequence + - See [\_](#targetsnameresourcesdatabase_instancesnamecustom_tags). + +- - `effective_capacity` + - String + - + +- - `effective_custom_tags` + - Sequence + - See [\_](#targetsnameresourcesdatabase_instancesnameeffective_custom_tags). + +- - `effective_enable_pg_native_login` + - Boolean + - + +- - `effective_enable_readable_secondaries` + - Boolean + - + +- - `effective_node_count` + - Integer + - + +- - `effective_retention_window_in_days` + - Integer + - + +- - `effective_stopped` + - Boolean + - + +- - `effective_usage_policy_id` + - String + - + +- - `enable_pg_native_login` + - Boolean + - + +- - `enable_readable_secondaries` + - Boolean + - + +- - `lifecycle` + - Map + - See [\_](#targetsnameresourcesdatabase_instancesnamelifecycle). + +- - `name` + - String + - + +- - `node_count` + - Integer + - + +- - `parent_instance_ref` + - Map + - See [\_](#targetsnameresourcesdatabase_instancesnameparent_instance_ref). + +- - `permissions` + - Sequence + - See [\_](#targetsnameresourcesdatabase_instancesnamepermissions). + +- - `pg_version` + - String + - + +- - `read_only_dns` + - String + - + +- - `read_write_dns` + - String + - + +- - `retention_window_in_days` + - Integer + - + +- - `state` + - String + - + +- - `stopped` + - Boolean + - + +- - `uid` + - String + - + +- - `usage_policy_id` + - String + - + +::: + + +### targets._name_.resources.database_instances._name_.lifecycle + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### targets._name_.resources.database_instances._name_.permissions + +**`Type: Sequence`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `group_name` + - String + - The name of the group that has the permission set in level. + +- - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + +- - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + +- - `user_name` + - String + - The name of the user that has the permission set in level. + +::: + + +### targets._name_.resources.external_locations + +**`Type: Map`** + + + +```yaml +external_locations: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `comment` + - String + - + +- - `credential_name` + - String + - + +- - `effective_enable_file_events` + - Boolean + - + +- - `enable_file_events` + - Boolean + - + +- - `encryption_details` + - Map + - See [\_](#targetsnameresourcesexternal_locationsnameencryption_details). + +- - `fallback` + - Boolean + - + +- - `file_event_queue` + - Map + - See [\_](#targetsnameresourcesexternal_locationsnamefile_event_queue). + +- - `grants` + - Sequence + - See [\_](#targetsnameresourcesexternal_locationsnamegrants). + +- - `lifecycle` + - Map + - See [\_](#targetsnameresourcesexternal_locationsnamelifecycle). + +- - `name` + - String + - + +- - `read_only` + - Boolean + - + +- - `skip_validation` + - Boolean + - -- - `uuid` +- - `url` - String - - Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command). + - ::: -### targets._name_.bundle.deployment +### targets._name_.resources.external_locations._name_.lifecycle **`Type: Map`** -The definition of the bundle deployment. For supported attributes see [\_](/dev-tools/bundles/deployment-modes.md). + @@ -936,23 +3409,24 @@ The definition of the bundle deployment. For supported attributes see [\_](/dev- - Type - Description -- - `fail_on_active_runs` +- - `prevent_destroy` - Boolean - - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. - -- - `lock` - - Map - - The deployment lock attributes. See [\_](#targetsnamebundledeploymentlock). + - Lifecycle setting to prevent the resource from being destroyed. ::: -### targets._name_.bundle.deployment.lock +### targets._name_.resources.postgres_branches **`Type: Map`** -The deployment lock attributes. + +```yaml +postgres_branches: + : + : +``` :::list-table @@ -961,22 +3435,54 @@ The deployment lock attributes. - Type - Description -- - `enabled` +- - `branch_id` + - String + - + +- - `expire_time` + - Map + - + +- - `is_protected` - Boolean - - Whether this lock is enabled. + - -- - `force` +- - `lifecycle` + - Map + - See [\_](#targetsnameresourcespostgres_branchesnamelifecycle). + +- - `no_expiry` - Boolean - - Whether to force this lock if it is enabled. + - + +- - `parent` + - String + - + +- - `source_branch` + - String + - + +- - `source_branch_lsn` + - String + - + +- - `source_branch_time` + - Map + - + +- - `ttl` + - String + - ::: -### targets._name_.bundle.git +### targets._name_.resources.postgres_branches._name_.lifecycle **`Type: Map`** -The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). + @@ -986,23 +3492,24 @@ The Git version control details that are associated with your bundle. For suppor - Type - Description -- - `branch` - - String - - The Git branch name. See [\_](/dev-tools/bundles/settings.md#git). - -- - `origin_url` - - String - - The origin URL of the repository. See [\_](/dev-tools/bundles/settings.md#git). +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### targets._name_.git +### targets._name_.resources.postgres_endpoints **`Type: Map`** -The Git version control settings for the target. + +```yaml +postgres_endpoints: + : + : +``` :::list-table @@ -1011,22 +3518,58 @@ The Git version control settings for the target. - Type - Description -- - `branch` +- - `autoscaling_limit_max_cu` + - Any + - + +- - `autoscaling_limit_min_cu` + - Any + - + +- - `disabled` + - Boolean + - + +- - `endpoint_id` - String - - The Git branch name. See [\_](/dev-tools/bundles/settings.md#git). + - -- - `origin_url` +- - `endpoint_type` - String - - The origin URL of the repository. See [\_](/dev-tools/bundles/settings.md#git). + - + +- - `group` + - Map + - See [\_](#targetsnameresourcespostgres_endpointsnamegroup). + +- - `lifecycle` + - Map + - See [\_](#targetsnameresourcespostgres_endpointsnamelifecycle). + +- - `no_suspension` + - Boolean + - + +- - `parent` + - String + - + +- - `settings` + - Map + - See [\_](#targetsnameresourcespostgres_endpointsnamesettings). + +- - `suspend_timeout_duration` + - String + - ::: -### targets._name_.permissions +### targets._name_.resources.postgres_endpoints._name_.lifecycle -**`Type: Sequence`** +**`Type: Map`** -The permissions for deploying and running the bundle in the target. + @@ -1036,31 +3579,24 @@ The permissions for deploying and running the bundle in the target. - Type - Description -- - `group_name` - - String - - The name of the group that has the permission set in level. - -- - `level` - - String - - The allowed permission for user, group, service principal defined for this permission. - -- - `service_principal_name` - - String - - The name of the service principal that has the permission set in level. - -- - `user_name` - - String - - The name of the user that has the permission set in level. +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### targets._name_.presets +### targets._name_.resources.postgres_projects **`Type: Map`** -The deployment presets for the target. + +```yaml +postgres_projects: + : + : +``` :::list-table @@ -1069,42 +3605,54 @@ The deployment presets for the target. - Type - Description -- - `artifacts_dynamic_version` - - Boolean - - Whether to enable dynamic_version on all artifacts. +- - `budget_policy_id` + - String + - -- - `jobs_max_concurrent_runs` - - Integer - - The maximum concurrent runs for a job. +- - `custom_tags` + - Sequence + - See [\_](#targetsnameresourcespostgres_projectsnamecustom_tags). -- - `name_prefix` +- - `default_endpoint_settings` + - Map + - See [\_](#targetsnameresourcespostgres_projectsnamedefault_endpoint_settings). + +- - `display_name` - String - - The prefix for job runs of the bundle. + - -- - `pipelines_development` +- - `enable_pg_native_login` - Boolean - - Whether pipeline deployments should be locked in development mode. + - -- - `source_linked_deployment` - - Boolean - - Whether to link the deployment to the bundle source. +- - `history_retention_duration` + - String + - -- - `tags` +- - `lifecycle` - Map - - The tags for the bundle deployment. + - See [\_](#targetsnameresourcespostgres_projectsnamelifecycle). -- - `trigger_pause_status` +- - `permissions` + - Sequence + - See [\_](#targetsnameresourcespostgres_projectsnamepermissions). + +- - `pg_version` + - Integer + - + +- - `project_id` - String - - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. + - ::: -### targets._name_.resources +### targets._name_.resources.postgres_projects._name_.lifecycle **`Type: Map`** -The resource definitions for the target. + @@ -1114,73 +3662,42 @@ The resource definitions for the target. - Type - Description -- - `apps` - - Map - - The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [\_](/dev-tools/databricks-apps/index.md). - -- - `clusters` - - Map - - The cluster definitions for the bundle, where each key is the name of a cluster. See [\_](/dev-tools/bundles/resources.md#clusters). - -- - `dashboards` - - Map - - The dashboard definitions for the bundle, where each key is the name of the dashboard. See [\_](/dev-tools/bundles/resources.md#dashboards). - -- - `database_catalogs` - - Map - - +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. -- - `database_instances` - - Map - - +::: -- - `experiments` - - Map - - The experiment definitions for the bundle, where each key is the name of the experiment. See [\_](/dev-tools/bundles/resources.md#experiments). -- - `jobs` - - Map - - The job definitions for the bundle, where each key is the name of the job. See [\_](/dev-tools/bundles/resources.md#jobs). +### targets._name_.resources.postgres_projects._name_.permissions -- - `model_serving_endpoints` - - Map - - The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. See [\_](/dev-tools/bundles/resources.md#model_serving_endpoints). +**`Type: Sequence`** -- - `models` - - Map - - The model definitions for the bundle, where each key is the name of the model. See [\_](/dev-tools/bundles/resources.md#models). + -- - `pipelines` - - Map - - The pipeline definitions for the bundle, where each key is the name of the pipeline. See [\_](/dev-tools/bundles/resources.md#pipelines). -- - `quality_monitors` - - Map - - The quality monitor definitions for the bundle, where each key is the name of the quality monitor. See [\_](/dev-tools/bundles/resources.md#quality_monitors). -- - `registered_models` - - Map - - The registered model definitions for the bundle, where each key is the name of the Unity Catalog registered model. See [\_](/dev-tools/bundles/resources.md#registered_models) +:::list-table -- - `schemas` - - Map - - The schema definitions for the bundle, where each key is the name of the schema. See [\_](/dev-tools/bundles/resources.md#schemas). +- - Key + - Type + - Description -- - `secret_scopes` - - Map - - The secret scope definitions for the bundle, where each key is the name of the secret scope. See [\_](/dev-tools/bundles/resources.md#secret_scopes). See [\_](#targetsnameresourcessecret_scopes). +- - `group_name` + - String + - The name of the group that has the permission set in level. -- - `sql_warehouses` - - Map - - The SQL warehouse definitions for the bundle, where each key is the name of the warehouse. See [\_](/dev-tools/bundles/resources.md#sql_warehouses). +- - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. -- - `synced_database_tables` - - Map - - See [\_](#targetsnameresourcessynced_database_tables). +- - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. -- - `volumes` - - Map - - The volume definitions for the bundle, where each key is the name of the volume. See [\_](/dev-tools/bundles/resources.md#volumes). +- - `user_name` + - String + - The name of the user that has the permission set in level. ::: @@ -1318,7 +3835,7 @@ synced_database_tables: - - `lifecycle` - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#targetsnameresourcessynced_database_tablesnamelifecycle). + - See [\_](#targetsnameresourcessynced_database_tablesnamelifecycle). - - `logical_database_name` - String @@ -1343,7 +3860,7 @@ synced_database_tables: **`Type: Map`** -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + @@ -1374,6 +3891,10 @@ The identity to use to run the bundle, see [\_](/dev-tools/bundles/run-as.md). - Type - Description +- - `group_name` + - String + - + - - `service_principal_name` - String - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. @@ -1531,9 +4052,13 @@ The Databricks workspace for the target. - Type - Description +- - `account_id` + - String + - The Databricks account ID. + - - `artifact_path` - String - - The artifact path to use within the workspace for both deployments and workflow runs + - The artifact path to use within the workspace for both deployments and job runs - - `auth_type` - String @@ -1567,9 +4092,13 @@ The Databricks workspace for the target. - String - The client ID for the workspace +- - `experimental_is_unified_host` + - Boolean + - Experimental feature flag to indicate if the host is a unified host + - - `file_path` - String - - The file path to use within the workspace for both deployments and workflow runs + - The file path to use within the workspace for both deployments and job runs - - `google_service_account` - String @@ -1595,6 +4124,10 @@ The Databricks workspace for the target. - String - The workspace state path +- - `workspace_id` + - String + - The Databricks workspace ID + ::: @@ -1715,9 +4248,13 @@ Defines the Databricks workspace for the bundle. See [\_](/dev-tools/bundles/set - Type - Description +- - `account_id` + - String + - The Databricks account ID. + - - `artifact_path` - String - - The artifact path to use within the workspace for both deployments and workflow runs + - The artifact path to use within the workspace for both deployments and job runs - - `auth_type` - String @@ -1751,9 +4288,13 @@ Defines the Databricks workspace for the bundle. See [\_](/dev-tools/bundles/set - String - The client ID for the workspace +- - `experimental_is_unified_host` + - Boolean + - Experimental feature flag to indicate if the host is a unified host + - - `file_path` - String - - The file path to use within the workspace for both deployments and workflow runs + - The file path to use within the workspace for both deployments and job runs - - `google_service_account` - String @@ -1779,5 +4320,9 @@ Defines the Databricks workspace for the bundle. See [\_](/dev-tools/bundles/set - String - The workspace state path +- - `workspace_id` + - String + - The Databricks workspace ID + ::: \ No newline at end of file diff --git a/bundle/docsgen/output/resources.md b/bundle/docsgen/output/resources.md index 2075ceae55..464d3bc92b 100644 --- a/bundle/docsgen/output/resources.md +++ b/bundle/docsgen/output/resources.md @@ -1,7 +1,7 @@ --- description: 'Learn about resources supported by Declarative Automation Bundles and how to configure them.' last_update: - date: 2025-09-13 + date: 2026-04-16 --- @@ -124,16 +124,16 @@ The `databricks bundle validate` command returns warnings if unknown resource pr :::: -## apps +## alerts **`Type: Map`** -The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [\_](/dev-tools/databricks-apps/index.md). + ```yaml -apps: - : - : +alerts: + : + : ``` @@ -143,50 +143,66 @@ apps: - Type - Description -- - `budget_policy_id` +- - `custom_description` - String - -- - `config` - - Map +- - `custom_summary` + - String - -- - `description` +- - `display_name` - String - - The description of the app. + - + +- - `evaluation` + - Map + - See [\_](#alertsnameevaluation). + +- - `file_path` + - String + - - - `lifecycle` - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#appsnamelifecycle). + - See [\_](#alertsnamelifecycle). -- - `name` +- - `parent_path` - String - - The name of the app. The name must contain only lowercase alphanumeric characters and hyphens. It must be unique within the workspace. + - - - `permissions` - Sequence - - See [\_](#appsnamepermissions). - -- - `resources` - - Sequence - - Resources for the app. See [\_](#appsnameresources). + - See [\_](#alertsnamepermissions). -- - `source_code_path` +- - `query_text` - String - -- - `user_api_scopes` - - Sequence +- - `run_as` + - Map + - See [\_](#alertsnamerun_as). + +- - `run_as_user_name` + - String + - This field is deprecated + +- - `schedule` + - Map + - See [\_](#alertsnameschedule). + +- - `warehouse_id` + - String - ::: -### apps._name_.lifecycle +### alerts._name_.evaluation **`Type: Map`** -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + @@ -196,14 +212,59 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `prevent_destroy` +- - `comparison_operator` + - String + - Operator used for comparison in alert evaluation. + +- - `empty_result_state` + - String + - Alert state if result is empty. Please avoid setting this field to be `UNKNOWN` because `UNKNOWN` state is planned to be deprecated. + +- - `notification` + - Map + - User or Notification Destination to notify when alert is triggered. See [\_](#alertsnameevaluationnotification). + +- - `source` + - Map + - Source column from result to use to evaluate alert. See [\_](#alertsnameevaluationsource). + +- - `threshold` + - Map + - Threshold to user for alert evaluation, can be a column or a value. See [\_](#alertsnameevaluationthreshold). + +::: + + +### alerts._name_.evaluation.notification + +**`Type: Map`** + +User or Notification Destination to notify when alert is triggered. + + + +:::list-table + +- - Key + - Type + - Description + +- - `notify_on_ok` - Boolean - - Lifecycle setting to prevent the resource from being destroyed. + - Whether to notify alert subscribers when alert returns back to normal. + +- - `retrigger_seconds` + - Integer + - Number of seconds an alert waits after being triggered before it is allowed to send another notification. If set to 0 or omitted, the alert will not send any further notifications after the first trigger Setting this value to 1 allows the alert to send a notification on every evaluation where the condition is met, effectively making it always retrigger for notification purposes. + +- - `subscriptions` + - Sequence + - See [\_](#alertsnameevaluationnotificationsubscriptions). ::: -### apps._name_.permissions +### alerts._name_.evaluation.notification.subscriptions **`Type: Sequence`** @@ -217,30 +278,22 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` +- - `destination_id` - String - -- - `user_name` +- - `user_email` - String - ::: -### apps._name_.resources +### alerts._name_.evaluation.source -**`Type: Sequence`** +**`Type: Map`** -Resources for the app. +Source column from result to use to evaluate alert @@ -250,42 +303,47 @@ Resources for the app. - Type - Description -- - `database` - - Map - - See [\_](#appsnameresourcesdatabase). - -- - `description` +- - `aggregation` - String - - Description of the App Resource. + - -- - `job` - - Map - - See [\_](#appsnameresourcesjob). +- - `display` + - String + - - - `name` - String - - Name of the App Resource. + - -- - `secret` - - Map - - See [\_](#appsnameresourcessecret). +::: -- - `serving_endpoint` - - Map - - See [\_](#appsnameresourcesserving_endpoint). -- - `sql_warehouse` +### alerts._name_.evaluation.threshold + +**`Type: Map`** + +Threshold to user for alert evaluation, can be a column or a value. + + + +:::list-table + +- - Key + - Type + - Description + +- - `column` - Map - - See [\_](#appsnameresourcessql_warehouse). + - See [\_](#alertsnameevaluationthresholdcolumn). -- - `uc_securable` +- - `value` - Map - - See [\_](#appsnameresourcesuc_securable). + - See [\_](#alertsnameevaluationthresholdvalue). ::: -### apps._name_.resources.database +### alerts._name_.evaluation.threshold.column **`Type: Map`** @@ -299,22 +357,22 @@ Resources for the app. - Type - Description -- - `database_name` +- - `aggregation` - String - -- - `instance_name` +- - `display` - String - -- - `permission` +- - `name` - String - ::: -### apps._name_.resources.job +### alerts._name_.evaluation.threshold.value **`Type: Map`** @@ -328,18 +386,22 @@ Resources for the app. - Type - Description -- - `id` - - String +- - `bool_value` + - Boolean - -- - `permission` +- - `double_value` + - Any + - + +- - `string_value` - String - ::: -### apps._name_.resources.secret +### alerts._name_.lifecycle **`Type: Map`** @@ -353,24 +415,16 @@ Resources for the app. - Type - Description -- - `key` - - String - - - -- - `permission` - - String - - Permission to grant on the secret scope. Supported permissions are: "READ", "WRITE", "MANAGE". - -- - `scope` - - String - - +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### apps._name_.resources.serving_endpoint +### alerts._name_.permissions -**`Type: Map`** +**`Type: Sequence`** @@ -382,18 +436,26 @@ Resources for the app. - Type - Description -- - `name` +- - `group_name` - String - - + - The name of the group that has the permission set in level. -- - `permission` +- - `level` - String - - + - The allowed permission for user, group, service principal defined for this permission. + +- - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + +- - `user_name` + - String + - The name of the user that has the permission set in level. ::: -### apps._name_.resources.sql_warehouse +### alerts._name_.run_as **`Type: Map`** @@ -407,18 +469,18 @@ Resources for the app. - Type - Description -- - `id` +- - `service_principal_name` - String - - + - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. -- - `permission` +- - `user_name` - String - - + - The email of an active workspace user. Can only set this field to their own email. ::: -### apps._name_.resources.uc_securable +### alerts._name_.schedule **`Type: Map`** @@ -432,31 +494,31 @@ Resources for the app. - Type - Description -- - `permission` +- - `pause_status` - String - - + - Indicate whether this schedule is paused or not. -- - `securable_full_name` +- - `quartz_cron_schedule` - String - - + - A cron expression using quartz syntax that specifies the schedule for this pipeline. Should use the quartz format described here: http://www.quartz-scheduler.org/documentation/quartz-2.1.7/tutorials/tutorial-lesson-06.html -- - `securable_type` +- - `timezone_id` - String - - + - A Java timezone id. The schedule will be resolved using this timezone. This will be combined with the quartz_cron_schedule to determine the schedule. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. ::: -## clusters +## apps **`Type: Map`** -The cluster resource defines an [all-purpose cluster](/api/workspace/clusters/create). +The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [\_](/dev-tools/databricks-apps/index.md). ```yaml -clusters: - : - : +apps: + : + : ``` @@ -466,179 +528,91 @@ clusters: - Type - Description -- - `apply_policy_default_values` - - Boolean - - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. - -- - `autoscale` - - Map - - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#clustersnameautoscale). +- - `budget_policy_id` + - String + - -- - `autotermination_minutes` - - Integer - - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. +- - `compute_size` + - String + - -- - `aws_attributes` +- - `config` - Map - - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#clustersnameaws_attributes). + - See [\_](#appsnameconfig). -- - `azure_attributes` +- - `description` + - String + - The description of the app. + +- - `git_source` - Map - - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#clustersnameazure_attributes). + - Git source configuration for app deployments. Specifies which git reference (branch, tag, or commit) to use when deploying the app. Used in conjunction with git_repository to deploy code directly from git. The source_code_path within git_source specifies the relative path to the app code within the repository. See [\_](#appsnamegit_source). -- - `cluster_log_conf` +- - `lifecycle` - Map - - The configuration for delivering spark logs to a long-term storage destination. Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#clustersnamecluster_log_conf). + - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#appsnamelifecycle). -- - `cluster_name` +- - `name` - String - - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. For job clusters, the cluster name is automatically set based on the job and job run IDs. + - The name of the app. The name must contain only lowercase alphanumeric characters and hyphens. It must be unique within the workspace. -- - `custom_tags` - - Map - - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags +- - `permissions` + - Sequence + - See [\_](#appsnamepermissions). -- - `data_security_mode` +- - `resources` + - Sequence + - Resources for the app. See [\_](#appsnameresources). + +- - `source_code_path` - String - - Data security mode decides what data governance model to use when accessing data from a cluster. The following modes can only be used when `kind = CLASSIC_PREVIEW`. * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. The following modes can be used regardless of `kind`. * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for future Databricks Runtime versions: * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. + - -- - `docker_image` - - Map - - See [\_](#clustersnamedocker_image). +- - `telemetry_export_destinations` + - Sequence + - See [\_](#appsnametelemetry_export_destinations). -- - `driver_instance_pool_id` - - String - - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. - -- - `driver_node_type_id` +- - `usage_policy_id` - String - - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. - -- - `enable_elastic_disk` - - Boolean - - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. - -- - `enable_local_disk_encryption` - - Boolean - - Whether to enable LUKS on cluster VMs' local disks - -- - `gcp_attributes` - - Map - - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#clustersnamegcp_attributes). + - -- - `init_scripts` +- - `user_api_scopes` - Sequence - - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#clustersnameinit_scripts). - -- - `instance_pool_id` - - String - - The optional ID of the instance pool to which the cluster belongs. - -- - `is_single_node` - - Boolean - - This field can only be used when `kind = CLASSIC_PREVIEW`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` - -- - `kind` - - String - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#clustersnamelifecycle). - -- - `node_type_id` - - String - - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - -- - `num_workers` - - Integer - - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. +::: -- - `permissions` - - Sequence - - See [\_](#clustersnamepermissions). -- - `policy_id` - - String - - The ID of the cluster policy used to create the cluster if applicable. +### apps._name_.config -- - `remote_disk_throughput` - - Integer - - If set, what the configurable throughput (in Mb/s) for the remote disk is. Currently only supported for GCP HYPERDISK_BALANCED disks. +**`Type: Map`** -- - `runtime_engine` - - String - - + -- - `single_user_name` - - String - - Single user name if data_security_mode is `SINGLE_USER` -- - `spark_conf` - - Map - - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. -- - `spark_env_vars` - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` +:::list-table -- - `spark_version` - - String - - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. +- - Key + - Type + - Description -- - `ssh_public_keys` +- - `command` - Sequence - - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. - -- - `total_initial_remote_disk_size` - - Integer - - If set, what the total initial volume size (in GB) of the remote disks should be. Currently only supported for GCP HYPERDISK_BALANCED disks. - -- - `use_ml_runtime` - - Boolean - - This field can only be used when `kind = CLASSIC_PREVIEW`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. + - -- - `workload_type` - - Map - - Cluster Attributes showing for clusters workload types. See [\_](#clustersnameworkload_type). +- - `env` + - Sequence + - See [\_](#appsnameconfigenv). ::: -**Example** - -The following example creates a cluster named `my_cluster` and sets that as the cluster to use to run the notebook in `my_job`: - -```yaml -bundle: - name: clusters - -resources: - clusters: - my_cluster: - num_workers: 2 - node_type_id: "i3.xlarge" - autoscale: - min_workers: 2 - max_workers: 7 - spark_version: "13.3.x-scala2.12" - spark_conf: - "spark.executor.memory": "2g" - - jobs: - my_job: - tasks: - - task_key: test_task - notebook_task: - notebook_path: "./src/my_notebook.py" -``` - -### clusters._name_.autoscale +### apps._name_.config.env -**`Type: Map`** +**`Type: Sequence`** -Parameters needed in order to automatically scale clusters up and down based on load. -Note: autoscaling works best with DB runtime versions 3.0 or later. + @@ -648,23 +622,28 @@ Note: autoscaling works best with DB runtime versions 3.0 or later. - Type - Description -- - `max_workers` - - Integer - - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. +- - `name` + - String + - -- - `min_workers` - - Integer - - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. +- - `value` + - String + - + +- - `value_from` + - String + - ::: -### clusters._name_.aws_attributes +### apps._name_.git_source **`Type: Map`** -Attributes related to clusters running on Amazon Web Services. -If not specified at cluster creation, a set of default values will be used. +Git source configuration for app deployments. Specifies which git reference (branch, tag, or commit) +to use when deploying the app. Used in conjunction with git_repository to deploy code directly from git. +The source_code_path within git_source specifies the relative path to the app code within the repository. @@ -674,55 +653,30 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description -- - `availability` +- - `branch` - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - -- - `ebs_volume_count` - - Integer - - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. - -- - `ebs_volume_iops` - - Integer - - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - -- - `ebs_volume_size` - - Integer - - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. - -- - `ebs_volume_throughput` - - Integer - - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + - Git branch to checkout. -- - `ebs_volume_type` +- - `commit` - String - - All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + - Git commit SHA to checkout. -- - `instance_profile_arn` +- - `source_code_path` - String - - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. + - Relative path to the app source code within the Git repository. If not specified, the root of the repository is used. -- - `spot_bid_price_percent` - - Integer - - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. - -- - `zone_id` +- - `tag` - String - - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. + - Git tag to checkout. ::: -### clusters._name_.azure_attributes +### apps._name_.lifecycle **`Type: Map`** -Attributes related to clusters running on Microsoft Azure. -If not specified at cluster creation, a set of default values will be used. +Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. @@ -732,30 +686,22 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description -- - `availability` - - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `log_analytics_info` - - Map - - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#clustersnameazure_attributeslog_analytics_info). +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. -- - `spot_bid_max_price` - - Any - - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. +- - `started` + - Boolean + - Lifecycle setting to deploy the resource in started mode. Only supported for apps, clusters, and sql_warehouses in direct deployment mode. ::: -### clusters._name_.azure_attributes.log_analytics_info +### apps._name_.permissions -**`Type: Map`** +**`Type: Sequence`** -Defines values necessary to configure and run Azure Log Analytics agent + @@ -765,26 +711,30 @@ Defines values necessary to configure and run Azure Log Analytics agent - Type - Description -- - `log_analytics_primary_key` +- - `group_name` - String - - The primary key for the Azure Log Analytics agent configuration + - -- - `log_analytics_workspace_id` +- - `level` - String - - The workspace ID for the Azure Log Analytics agent configuration + - Permission level + +- - `service_principal_name` + - String + - + +- - `user_name` + - String + - ::: -### clusters._name_.cluster_log_conf +### apps._name_.resources -**`Type: Map`** +**`Type: Sequence`** -The configuration for delivering spark logs to a long-term storage destination. -Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified -for one cluster. If the conf is given, the logs will be delivered to the destination every -`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while -the destination of executor logs is `$destination/$clusterId/executor`. +Resources for the app. @@ -794,51 +744,65 @@ the destination of executor logs is `$destination/$clusterId/executor`. - Type - Description -- - `dbfs` +- - `app` - Map - - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#clustersnamecluster_log_confdbfs). + - -- - `s3` +- - `database` - Map - - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#clustersnamecluster_log_confs3). + - See [\_](#appsnameresourcesdatabase). -- - `volumes` - - Map - - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#clustersnamecluster_log_confvolumes). +- - `description` + - String + - Description of the App Resource. -::: +- - `experiment` + - Map + - See [\_](#appsnameresourcesexperiment). +- - `genie_space` + - Map + - See [\_](#appsnameresourcesgenie_space). -### clusters._name_.cluster_log_conf.dbfs +- - `job` + - Map + - See [\_](#appsnameresourcesjob). -**`Type: Map`** +- - `name` + - String + - Name of the App Resource. -destination needs to be provided. e.g. -`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` +- - `secret` + - Map + - See [\_](#appsnameresourcessecret). +- - `serving_endpoint` + - Map + - See [\_](#appsnameresourcesserving_endpoint). +- - `sql_warehouse` + - Map + - See [\_](#appsnameresourcessql_warehouse). -:::list-table +- - `uc_securable` + - Map + - See [\_](#appsnameresourcesuc_securable). -- - Key - - Type - - Description +::: -- - `destination` - - String - - dbfs destination, e.g. `dbfs:/my/path` -::: +### apps._name_.resources.app +**`Type: Map`** -### clusters._name_.cluster_log_conf.s3 + + + +### apps._name_.resources.database **`Type: Map`** -destination and either the region or endpoint need to be provided. e.g. -`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. + @@ -848,43 +812,26 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in - Type - Description -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - -- - `endpoint` +- - `database_name` - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + - -- - `kms_key` +- - `instance_name` - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + - -- - `region` +- - `permission` - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + - ::: -### clusters._name_.cluster_log_conf.volumes +### apps._name_.resources.experiment **`Type: Map`** -destination needs to be provided, e.g. -`{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` + @@ -894,14 +841,18 @@ destination needs to be provided, e.g. - Type - Description -- - `destination` +- - `experiment_id` - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` + - + +- - `permission` + - String + - ::: -### clusters._name_.docker_image +### apps._name_.resources.genie_space **`Type: Map`** @@ -915,18 +866,22 @@ destination needs to be provided, e.g. - Type - Description -- - `basic_auth` - - Map - - See [\_](#clustersnamedocker_imagebasic_auth). +- - `name` + - String + - -- - `url` +- - `permission` - String - - URL of the docker image. + - + +- - `space_id` + - String + - ::: -### clusters._name_.docker_image.basic_auth +### apps._name_.resources.job **`Type: Map`** @@ -940,23 +895,22 @@ destination needs to be provided, e.g. - Type - Description -- - `password` +- - `id` - String - - Password of the user + - -- - `username` +- - `permission` - String - - Name of the user + - ::: -### clusters._name_.gcp_attributes +### apps._name_.resources.secret **`Type: Map`** -Attributes related to clusters running on Google Cloud Platform. -If not specified at cluster creation, a set of default values will be used. + @@ -966,44 +920,26 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description -- - `availability` +- - `key` - String - - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. - -- - `boot_disk_size` - - Integer - - Boot disk size in GB - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + - -- - `google_service_account` +- - `permission` - String - - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. - -- - `local_ssd_count` - - Integer - - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. - -- - `use_preemptible_executors` - - Boolean - - This field is deprecated + - Permission to grant on the secret scope. Supported permissions are: "READ", "WRITE", "MANAGE". -- - `zone_id` +- - `scope` - String - - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + - ::: -### clusters._name_.init_scripts +### apps._name_.resources.serving_endpoint -**`Type: Sequence`** +**`Type: Map`** -The configuration for storing init scripts. Any number of destinations can be specified. -The scripts are executed sequentially in the order provided. -If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + @@ -1013,42 +949,22 @@ If `cluster_log_conf` is specified, init script logs are sent to `/ - Type - Description -- - `abfss` - - Map - - Contains the Azure Data Lake Storage destination path. See [\_](#clustersnameinit_scriptsabfss). - -- - `dbfs` - - Map - - This field is deprecated - -- - `file` - - Map - - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#clustersnameinit_scriptsfile). - -- - `gcs` - - Map - - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#clustersnameinit_scriptsgcs). - -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#clustersnameinit_scriptss3). - -- - `volumes` - - Map - - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#clustersnameinit_scriptsvolumes). +- - `name` + - String + - -- - `workspace` - - Map - - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#clustersnameinit_scriptsworkspace). +- - `permission` + - String + - ::: -### clusters._name_.init_scripts.abfss +### apps._name_.resources.sql_warehouse **`Type: Map`** -Contains the Azure Data Lake Storage destination path + @@ -1058,19 +974,22 @@ Contains the Azure Data Lake Storage destination path - Type - Description -- - `destination` +- - `id` - String - - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. + - + +- - `permission` + - String + - ::: -### clusters._name_.init_scripts.file +### apps._name_.resources.uc_securable **`Type: Map`** -destination needs to be provided, e.g. -`{ "file": { "destination": "file:/my/local/file.sh" } }` + @@ -1080,19 +999,26 @@ destination needs to be provided, e.g. - Type - Description -- - `destination` +- - `permission` - String - - local file destination, e.g. `file:/my/local/file.sh` + - + +- - `securable_full_name` + - String + - + +- - `securable_type` + - String + - ::: -### clusters._name_.init_scripts.gcs +### apps._name_.telemetry_export_destinations -**`Type: Map`** +**`Type: Sequence`** -destination needs to be provided, e.g. -`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` + @@ -1102,21 +1028,18 @@ destination needs to be provided, e.g. - Type - Description -- - `destination` - - String - - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` +- - `unity_catalog` + - Map + - Unity Catalog Destinations for OTEL telemetry export. See [\_](#appsnametelemetry_export_destinationsunity_catalog). ::: -### clusters._name_.init_scripts.s3 +### apps._name_.telemetry_export_destinations.unity_catalog **`Type: Map`** -destination and either the region or endpoint need to be provided. e.g. -`{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. +Unity Catalog Destinations for OTEL telemetry export. @@ -1126,44 +1049,32 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in - Type - Description -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - -- - `endpoint` +- - `logs_table` - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + - Unity Catalog table for OTEL logs. -- - `kms_key` +- - `metrics_table` - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + - Unity Catalog table for OTEL metrics. -- - `region` +- - `traces_table` - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + - Unity Catalog table for OTEL traces (spans). ::: -### clusters._name_.init_scripts.volumes +## catalogs **`Type: Map`** -destination needs to be provided. e.g. -`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` + +```yaml +catalogs: + : + : +``` :::list-table @@ -1172,57 +1083,50 @@ destination needs to be provided. e.g. - Type - Description -- - `destination` +- - `comment` - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` + - -::: +- - `connection_name` + - String + - +- - `grants` + - Sequence + - See [\_](#catalogsnamegrants). -### clusters._name_.init_scripts.workspace +- - `lifecycle` + - Map + - See [\_](#catalogsnamelifecycle). -**`Type: Map`** +- - `name` + - String + - -destination needs to be provided, e.g. -`{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` - - - -:::list-table +- - `options` + - Map + - -- - Key - - Type - - Description +- - `properties` + - Map + - -- - `destination` +- - `provider_name` - String - - wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` - -::: - - -### clusters._name_.lifecycle - -**`Type: Map`** - -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. - - - -:::list-table + - -- - Key - - Type - - Description +- - `share_name` + - String + - -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. +- - `storage_root` + - String + - ::: -### clusters._name_.permissions +### catalogs._name_.grants **`Type: Sequence`** @@ -1236,30 +1140,29 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `group_name` +- - `principal` - String - - + - The principal (user email address or group name). For deleted principals, `principal` is empty while `principal_id` is populated. -- - `level` - - String - - +- - `privileges` + - Sequence + - The privileges assigned to the principal. -- - `service_principal_name` - - String - - +::: -- - `user_name` - - String - - -::: +### catalogs._name_.grants.privileges + +**`Type: Sequence`** +The privileges assigned to the principal. -### clusters._name_.workload_type + +### catalogs._name_.lifecycle **`Type: Map`** -Cluster Attributes showing for clusters workload types. + @@ -1269,19 +1172,24 @@ Cluster Attributes showing for clusters workload types. - Type - Description -- - `clients` - - Map - - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [\_](#clustersnameworkload_typeclients). +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### clusters._name_.workload_type.clients +## clusters **`Type: Map`** -defined what type of clients can use the cluster. E.g. Notebooks, Jobs +The cluster resource defines an [all-purpose cluster](/api/workspace/clusters/create). +```yaml +clusters: + : + : +``` :::list-table @@ -1290,176 +1198,214 @@ defined what type of clients can use the cluster. E.g. Notebooks, Jobs - Type - Description -- - `jobs` - - Boolean - - With jobs set, the cluster can be used for jobs - -- - `notebooks` +- - `apply_policy_default_values` - Boolean - - With notebooks set, this cluster can be used for notebooks + - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. -::: +- - `autoscale` + - Map + - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#clustersnameautoscale). +- - `autotermination_minutes` + - Integer + - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. -## dashboards +- - `aws_attributes` + - Map + - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#clustersnameaws_attributes). -**`Type: Map`** +- - `azure_attributes` + - Map + - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#clustersnameazure_attributes). -The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](/dashboards/index.md). +- - `cluster_log_conf` + - Map + - The configuration for delivering spark logs to a long-term storage destination. Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#clustersnamecluster_log_conf). -```yaml -dashboards: - : - : -``` +- - `cluster_name` + - String + - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. For job clusters, the cluster name is automatically set based on the job and job run IDs. +- - `custom_tags` + - Map + - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags -:::list-table +- - `data_security_mode` + - String + - Data security mode decides what data governance model to use when accessing data from a cluster. The following modes can only be used when `kind = CLASSIC_PREVIEW`. * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. The following modes can be used regardless of `kind`. * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for future Databricks Runtime versions: * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. -- - Key - - Type - - Description +- - `docker_image` + - Map + - See [\_](#clustersnamedocker_image). -- - `create_time` +- - `driver_instance_pool_id` - String - - The timestamp of when the dashboard was created. + - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. -- - `dashboard_id` - - String - - UUID identifying the dashboard. +- - `driver_node_type_flexibility` + - Map + - Flexible node type configuration for the driver node. See [\_](#clustersnamedriver_node_type_flexibility). -- - `display_name` +- - `driver_node_type_id` - String - - The display name of the dashboard. + - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. -- - `embed_credentials` +- - `enable_elastic_disk` - Boolean - - + - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. -- - `etag` +- - `enable_local_disk_encryption` + - Boolean + - Whether to enable LUKS on cluster VMs' local disks + +- - `gcp_attributes` + - Map + - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#clustersnamegcp_attributes). + +- - `init_scripts` + - Sequence + - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#clustersnameinit_scripts). + +- - `instance_pool_id` - String - - The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. + - The optional ID of the instance pool to which the cluster belongs. -- - `file_path` +- - `is_single_node` + - Boolean + - This field can only be used when `kind = CLASSIC_PREVIEW`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` + +- - `kind` - String - - - `lifecycle` - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#dashboardsnamelifecycle). - -- - `lifecycle_state` - - String - - The state of the dashboard resource. Used for tracking trashed status. + - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#clustersnamelifecycle). -- - `parent_path` +- - `node_type_id` - String - - The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. + - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. -- - `path` - - String - - The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. +- - `num_workers` + - Integer + - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. - - `permissions` - Sequence - - See [\_](#dashboardsnamepermissions). - -- - `serialized_dashboard` - - Any - - The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. - -- - `update_time` - - String - - The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. + - See [\_](#clustersnamepermissions). -- - `warehouse_id` +- - `policy_id` - String - - The warehouse ID used to run the dashboard. - -::: + - The ID of the cluster policy used to create the cluster if applicable. +- - `remote_disk_throughput` + - Integer + - If set, what the configurable throughput (in Mb/s) for the remote disk is. Currently only supported for GCP HYPERDISK_BALANCED disks. -**Example** +- - `runtime_engine` + - String + - -The following example includes and deploys the sample __NYC Taxi Trip Analysis__ dashboard to the Databricks workspace. - -``` yaml -resources: - dashboards: - nyc_taxi_trip_analysis: - display_name: "NYC Taxi Trip Analysis" - file_path: ../src/nyc_taxi_trip_analysis.lvdash.json - warehouse_id: ${var.warehouse_id} -``` -If you use the UI to modify the dashboard, modifications made through the UI are not applied to the dashboard JSON file in the local bundle unless you explicitly update it using `bundle generate`. You can use the `--watch` option to continuously poll and retrieve changes to the dashboard. See [_](/dev-tools/cli/bundle-commands.md#generate). - -In addition, if you attempt to deploy a bundle that contains a dashboard JSON file that is different than the one in the remote workspace, an error will occur. To force the deploy and overwrite the dashboard in the remote workspace with the local one, use the `--force` option. See [_](/dev-tools/cli/bundle-commands.md#deploy). +- - `single_user_name` + - String + - Single user name if data_security_mode is `SINGLE_USER` -### dashboards._name_.lifecycle +- - `spark_conf` + - Map + - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. -**`Type: Map`** +- - `spark_env_vars` + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. +- - `spark_version` + - String + - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. +- - `ssh_public_keys` + - Sequence + - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. +- - `total_initial_remote_disk_size` + - Integer + - If set, what the total initial volume size (in GB) of the remote disks should be. Currently only supported for GCP HYPERDISK_BALANCED disks. -:::list-table +- - `use_ml_runtime` + - Boolean + - This field can only be used when `kind = CLASSIC_PREVIEW`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. -- - Key - - Type - - Description +- - `worker_node_type_flexibility` + - Map + - Flexible node type configuration for worker nodes. See [\_](#clustersnameworker_node_type_flexibility). -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. +- - `workload_type` + - Map + - Cluster Attributes showing for clusters workload types. See [\_](#clustersnameworkload_type). ::: -### dashboards._name_.permissions - -**`Type: Sequence`** +**Example** +The following example creates a cluster named `my_cluster` and sets that as the cluster to use to run the notebook in `my_job`: +```yaml +bundle: + name: clusters + +resources: + clusters: + my_cluster: + num_workers: 2 + node_type_id: "i3.xlarge" + autoscale: + min_workers: 2 + max_workers: 7 + spark_version: "13.3.x-scala2.12" + spark_conf: + "spark.executor.memory": "2g" + + jobs: + my_job: + tasks: + - task_key: test_task + notebook_task: + notebook_path: "./src/my_notebook.py" +``` + +### clusters._name_.autoscale + +**`Type: Map`** + +Parameters needed in order to automatically scale clusters up and down based on load. +Note: autoscaling works best with DB runtime versions 3.0 or later. + - :::list-table - - Key - Type - Description -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - +- - `max_workers` + - Integer + - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. -- - `user_name` - - String - - +- - `min_workers` + - Integer + - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. ::: -## database_catalogs +### clusters._name_.aws_attributes **`Type: Map`** - +Attributes related to clusters running on Amazon Web Services. +If not specified at cluster creation, a set of default values will be used. -```yaml -database_catalogs: - : - : -``` :::list-table @@ -1468,34 +1414,55 @@ database_catalogs: - Type - Description -- - `create_database_if_not_exists` - - Boolean - - +- - `availability` + - String + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. -- - `database_instance_name` +- - `ebs_volume_count` + - Integer + - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. + +- - `ebs_volume_iops` + - Integer + - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + +- - `ebs_volume_size` + - Integer + - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. + +- - `ebs_volume_throughput` + - Integer + - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + +- - `ebs_volume_type` - String - - The name of the DatabaseInstance housing the database. + - All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. -- - `database_name` +- - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + +- - `instance_profile_arn` - String - - The name of the database (in a instance) associated with the catalog. + - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#database_catalogsnamelifecycle). +- - `spot_bid_price_percent` + - Integer + - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. -- - `name` +- - `zone_id` - String - - The name of the catalog in UC. + - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, the zone "auto" will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. ::: -### database_catalogs._name_.lifecycle +### clusters._name_.azure_attributes **`Type: Map`** -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. +Attributes related to clusters running on Microsoft Azure. +If not specified at cluster creation, a set of default values will be used. @@ -1505,24 +1472,31 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. +- - `availability` + - String + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. + +- - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + +- - `log_analytics_info` + - Map + - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#clustersnameazure_attributeslog_analytics_info). + +- - `spot_bid_max_price` + - Any + - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. ::: -## database_instances +### clusters._name_.azure_attributes.log_analytics_info **`Type: Map`** -A DatabaseInstance represents a logical Postgres instance, comprised of both compute and storage. +Defines values necessary to configure and run Azure Log Analytics agent -```yaml -database_instances: - : - : -``` :::list-table @@ -1531,54 +1505,26 @@ database_instances: - Type - Description -- - `capacity` +- - `log_analytics_primary_key` - String - - The sku of the instance. Valid values are "CU_1", "CU_2", "CU_4", "CU_8". - -- - `enable_pg_native_login` - - Boolean - - Whether the instance has PG native password login enabled. Defaults to true. - -- - `enable_readable_secondaries` - - Boolean - - Whether to enable secondaries to serve read-only traffic. Defaults to false. - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#database_instancesnamelifecycle). + - The primary key for the Azure Log Analytics agent configuration -- - `name` +- - `log_analytics_workspace_id` - String - - The name of the instance. This is the unique identifier for the instance. - -- - `node_count` - - Integer - - The number of nodes in the instance, composed of 1 primary and 0 or more secondaries. Defaults to 1 primary and 0 secondaries. - -- - `parent_instance_ref` - - Map - - The ref of the parent instance. This is only available if the instance is child instance. Input: For specifying the parent instance to create a child instance. Optional. Output: Only populated if provided as input to create a child instance. See [\_](#database_instancesnameparent_instance_ref). - -- - `permissions` - - Sequence - - See [\_](#database_instancesnamepermissions). - -- - `retention_window_in_days` - - Integer - - The retention window for the instance. This is the time window in days for which the historical data is retained. The default value is 7 days. Valid values are 2 to 35 days. - -- - `stopped` - - Boolean - - Whether the instance is stopped. + - The workspace ID for the Azure Log Analytics agent configuration ::: -### database_instances._name_.lifecycle +### clusters._name_.cluster_log_conf **`Type: Map`** -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. +The configuration for delivering spark logs to a long-term storage destination. +Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified +for one cluster. If the conf is given, the logs will be delivered to the destination every +`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while +the destination of executor logs is `$destination/$clusterId/executor`. @@ -1588,21 +1534,27 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. +- - `dbfs` + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#clustersnamecluster_log_confdbfs). + +- - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#clustersnamecluster_log_confs3). + +- - `volumes` + - Map + - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#clustersnamecluster_log_confvolumes). ::: -### database_instances._name_.parent_instance_ref +### clusters._name_.cluster_log_conf.dbfs **`Type: Map`** -The ref of the parent instance. This is only available if the instance is -child instance. -Input: For specifying the parent instance to create a child instance. Optional. -Output: Only populated if provided as input to create a child instance. +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` @@ -1612,26 +1564,21 @@ Output: Only populated if provided as input to create a child instance. - Type - Description -- - `branch_time` - - String - - Branch time of the ref database instance. For a parent ref instance, this is the point in time on the parent instance from which the instance was created. For a child ref instance, this is the point in time on the instance from which the child instance was created. Input: For specifying the point in time to create a child instance. Optional. Output: Only populated if provided as input to create a child instance. - -- - `lsn` - - String - - User-specified WAL LSN of the ref database instance. Input: For specifying the WAL LSN to create a child instance. Optional. Output: Only populated if provided as input to create a child instance. - -- - `name` +- - `destination` - String - - Name of the ref database instance. + - dbfs destination, e.g. `dbfs:/my/path` ::: -### database_instances._name_.permissions +### clusters._name_.cluster_log_conf.s3 -**`Type: Sequence`** +**`Type: Map`** - +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. @@ -1641,36 +1588,44 @@ Output: Only populated if provided as input to create a child instance. - Type - Description -- - `group_name` +- - `canned_acl` - String - - + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. -- - `level` +- - `destination` - String - - + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. -- - `service_principal_name` +- - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + +- - `encryption_type` - String - - + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. -- - `user_name` +- - `endpoint` - String - - + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + +- - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + +- - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. ::: -## experiments +### clusters._name_.cluster_log_conf.volumes **`Type: Map`** -The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](/mlflow/experiments.md). - -```yaml -experiments: - : - : -``` +destination needs to be provided, e.g. +`{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` + :::list-table @@ -1679,65 +1634,18 @@ experiments: - Type - Description -- - `artifact_location` - - String - - Location where artifacts for the experiment are stored. - -- - `creation_time` - - Integer - - Creation time - -- - `experiment_id` - - String - - Unique identifier for the experiment. - -- - `last_update_time` - - Integer - - Last update time - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#experimentsnamelifecycle). - -- - `lifecycle_stage` - - String - - Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. - -- - `name` +- - `destination` - String - - Human readable name that identifies the experiment. - -- - `permissions` - - Sequence - - See [\_](#experimentsnamepermissions). - -- - `tags` - - Sequence - - Tags: Additional metadata key-value pairs. See [\_](#experimentsnametags). + - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` ::: -**Example** - -The following example defines an experiment that all users can view: - -```yaml -resources: - experiments: - experiment: - name: my_ml_experiment - permissions: - - level: CAN_READ - group_name: users - description: MLflow experiment used to track runs -``` - -### experiments._name_.lifecycle +### clusters._name_.docker_image **`Type: Map`** -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + @@ -1747,16 +1655,20 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. +- - `basic_auth` + - Map + - See [\_](#clustersnamedocker_imagebasic_auth). + +- - `url` + - String + - URL of the docker image. ::: -### experiments._name_.permissions +### clusters._name_.docker_image.basic_auth -**`Type: Sequence`** +**`Type: Map`** @@ -1768,30 +1680,22 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` +- - `password` - String - - + - Password of the user -- - `user_name` +- - `username` - String - - + - Name of the user ::: -### experiments._name_.tags +### clusters._name_.driver_node_type_flexibility -**`Type: Sequence`** +**`Type: Map`** -Tags: Additional metadata key-value pairs. +Flexible node type configuration for the driver node. @@ -1801,28 +1705,20 @@ Tags: Additional metadata key-value pairs. - Type - Description -- - `key` - - String - - The tag key. - -- - `value` - - String - - The tag value. +- - `alternate_node_type_ids` + - Sequence + - A list of node type IDs to use as fallbacks when the primary node type is unavailable. ::: -## jobs +### clusters._name_.gcp_attributes **`Type: Map`** -The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a Declarative Automation Bundles template to create a job, see [_](/dev-tools/bundles/jobs-tutorial.md). +Attributes related to clusters running on Google Cloud Platform. +If not specified at cluster creation, a set of default values will be used. -```yaml -jobs: - : - : -``` :::list-table @@ -1831,135 +1727,89 @@ jobs: - Type - Description -- - `budget_policy_id` +- - `availability` - String - - The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. - -- - `continuous` - - Map - - An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. See [\_](#jobsnamecontinuous). + - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. -- - `deployment` - - Map - - Deployment information for jobs managed by external sources. See [\_](#jobsnamedeployment). +- - `boot_disk_size` + - Integer + - Boot disk size in GB -- - `description` - - String - - An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. +- - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. -- - `edit_mode` +- - `google_service_account` - String - - Edit mode of the job. * `UI_LOCKED`: The job is in a locked UI state and cannot be modified. * `EDITABLE`: The job is in an editable state and can be modified. - -- - `email_notifications` - - Map - - An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. See [\_](#jobsnameemail_notifications). + - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. -- - `environments` - - Sequence - - A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. See [\_](#jobsnameenvironments). +- - `local_ssd_count` + - Integer + - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. -- - `format` - - String +- - `use_preemptible_executors` + - Boolean - This field is deprecated -- - `git_source` - - Map - - An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. See [\_](#jobsnamegit_source). +- - `zone_id` + - String + - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. -- - `health` - - Map - - An optional set of health rules that can be defined for this job. See [\_](#jobsnamehealth). +::: -- - `job_clusters` - - Sequence - - A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. See [\_](#jobsnamejob_clusters). -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#jobsnamelifecycle). +### clusters._name_.init_scripts -- - `max_concurrent_runs` - - Integer - - An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. +**`Type: Sequence`** -- - `name` - - String - - An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. +The configuration for storing init scripts. Any number of destinations can be specified. +The scripts are executed sequentially in the order provided. +If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. -- - `notification_settings` - - Map - - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. See [\_](#jobsnamenotification_settings). -- - `parameters` - - Sequence - - Job-level parameter definitions. See [\_](#jobsnameparameters). -- - `performance_target` - - String - - The performance mode on a serverless job. This field determines the level of compute performance or cost-efficiency for the run. * `STANDARD`: Enables cost-efficient execution of serverless workloads. * `PERFORMANCE_OPTIMIZED`: Prioritizes fast startup and execution times through rapid scaling and optimized cluster performance. +:::list-table -- - `permissions` - - Sequence - - See [\_](#jobsnamepermissions). +- - Key + - Type + - Description -- - `queue` +- - `abfss` - Map - - The queue settings of the job. See [\_](#jobsnamequeue). + - Contains the Azure Data Lake Storage destination path. See [\_](#clustersnameinit_scriptsabfss). -- - `run_as` +- - `dbfs` - Map - - Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job. Either `user_name` or `service_principal_name` should be specified. If not, an error is thrown. See [\_](#jobsnamerun_as). + - This field is deprecated -- - `schedule` +- - `file` - Map - - An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. See [\_](#jobsnameschedule). + - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#clustersnameinit_scriptsfile). -- - `tags` +- - `gcs` - Map - - A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. - -- - `tasks` - - Sequence - - A list of task specifications to be executed by this job. It supports up to 1000 elements in write endpoints (:method:jobs/create, :method:jobs/reset, :method:jobs/update, :method:jobs/submit). Read endpoints return only 100 tasks. If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. See [\_](#jobsnametasks). + - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#clustersnameinit_scriptsgcs). -- - `timeout_seconds` - - Integer - - An optional timeout applied to each run of this job. A value of `0` means no timeout. +- - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#clustersnameinit_scriptss3). -- - `trigger` +- - `volumes` - Map - - A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. See [\_](#jobsnametrigger). + - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#clustersnameinit_scriptsvolumes). -- - `webhook_notifications` +- - `workspace` - Map - - A collection of system notification IDs to notify when runs of this job begin or complete. See [\_](#jobsnamewebhook_notifications). + - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#clustersnameinit_scriptsworkspace). ::: -**Example** - -The following example defines a job with the resource key `hello-job` with one notebook task: - -```yaml -resources: - jobs: - hello-job: - name: hello-job - tasks: - - task_key: hello-task - notebook_task: - notebook_path: ./hello.py -``` - -For information about defining job tasks and overriding job settings, see [_](/dev-tools/bundles/job-task-types.md), [_](/dev-tools/bundles/job-task-override.md), and [_](/dev-tools/bundles/cluster-override.md). - -### jobs._name_.continuous +### clusters._name_.init_scripts.abfss **`Type: Map`** -An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. +Contains the Azure Data Lake Storage destination path @@ -1969,22 +1819,19 @@ An optional continuous property for this job. The continuous property will ensur - Type - Description -- - `pause_status` - - String - - Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. - -- - `task_retry_mode` +- - `destination` - String - - Indicate whether the continuous job is applying task level retries or not. Defaults to NEVER. + - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. ::: -### jobs._name_.deployment +### clusters._name_.init_scripts.file **`Type: Map`** -Deployment information for jobs managed by external sources. +destination needs to be provided, e.g. +`{ "file": { "destination": "file:/my/local/file.sh" } }` @@ -1994,22 +1841,19 @@ Deployment information for jobs managed by external sources. - Type - Description -- - `kind` +- - `destination` - String - - The kind of deployment that manages the job. * `BUNDLE`: The job is managed by Databricks Asset Bundle. - -- - `metadata_file_path` - - String - - Path of the file that contains deployment metadata. + - local file destination, e.g. `file:/my/local/file.sh` ::: -### jobs._name_.email_notifications +### clusters._name_.init_scripts.gcs **`Type: Map`** -An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. +destination needs to be provided, e.g. +`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` @@ -2019,41 +1863,21 @@ An optional set of email addresses that is notified when runs of this job begin - Type - Description -- - `no_alert_for_skipped_runs` - - Boolean - - This field is deprecated - -- - `on_duration_warning_threshold_exceeded` - - Sequence - - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. - -- - `on_failure` - - Sequence - - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. - -- - `on_start` - - Sequence - - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - -- - `on_streaming_backlog_exceeded` - - Sequence - - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - -- - `on_success` - - Sequence - - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. +- - `destination` + - String + - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` ::: -### jobs._name_.environments +### clusters._name_.init_scripts.s3 -**`Type: Sequence`** +**`Type: Map`** -A list of task execution environment specifications that can be referenced by serverless tasks of this job. -An environment is required to be present for serverless tasks. -For serverless notebook tasks, the environment is accessible in the notebook environment panel. -For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. +destination and either the region or endpoint need to be provided. e.g. +`{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. @@ -2063,23 +1887,43 @@ For other serverless tasks, the task environment is required to be specified usi - Type - Description -- - `environment_key` +- - `canned_acl` - String - - The key of an environment. It has to be unique within a job. + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. -- - `spec` - - Map - - The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. In this minimal environment spec, only pip dependencies are supported. See [\_](#jobsnameenvironmentsspec). +- - `destination` + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + +- - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + +- - `encryption_type` + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + +- - `endpoint` + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + +- - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + +- - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. ::: -### jobs._name_.environments.spec +### clusters._name_.init_scripts.volumes **`Type: Map`** -The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. -In this minimal environment spec, only pip dependencies are supported. +destination needs to be provided. e.g. +`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` @@ -2089,30 +1933,19 @@ In this minimal environment spec, only pip dependencies are supported. - Type - Description -- - `client` - - String - - This field is deprecated - -- - `dependencies` - - Sequence - - List of pip dependencies, as supported by the version of pip in this environment. - -- - `environment_version` +- - `destination` - String - - Required. Environment version used by the environment. Each version comes with a specific Python version and a set of Python packages. The version is a string, consisting of an integer. + - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` ::: -### jobs._name_.git_source +### clusters._name_.init_scripts.workspace **`Type: Map`** -An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. - -If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. - -Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. +destination needs to be provided, e.g. +`{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` @@ -2122,38 +1955,39 @@ Note: dbt and SQL File tasks support only version-controlled sources. If dbt or - Type - Description -- - `git_branch` +- - `destination` - String - - Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. + - wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` -- - `git_commit` - - String - - Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. +::: -- - `git_provider` - - String - - Unique identifier of the service used to host the Git repository. The value is case insensitive. -- - `git_snapshot` - - Map - - Read-only state of the remote repository at the time the job was run. This field is only included on job runs. See [\_](#jobsnamegit_sourcegit_snapshot). +### clusters._name_.lifecycle -- - `git_tag` - - String - - Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. +**`Type: Map`** -- - `git_url` - - String - - URL of the repository to be cloned by this job. +Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### jobs._name_.git_source.git_snapshot +### clusters._name_.permissions -**`Type: Map`** +**`Type: Sequence`** -Read-only state of the remote repository at the time the job was run. This field is only included on job runs. + @@ -2163,18 +1997,30 @@ Read-only state of the remote repository at the time the job was run. This field - Type - Description -- - `used_commit` +- - `group_name` - String - - Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to. + - + +- - `level` + - String + - Permission level + +- - `service_principal_name` + - String + - + +- - `user_name` + - String + - ::: -### jobs._name_.health +### clusters._name_.worker_node_type_flexibility **`Type: Map`** -An optional set of health rules that can be defined for this job. +Flexible node type configuration for worker nodes. @@ -2184,18 +2030,18 @@ An optional set of health rules that can be defined for this job. - Type - Description -- - `rules` +- - `alternate_node_type_ids` - Sequence - - See [\_](#jobsnamehealthrules). + - A list of node type IDs to use as fallbacks when the primary node type is unavailable. ::: -### jobs._name_.health.rules +### clusters._name_.workload_type -**`Type: Sequence`** +**`Type: Map`** - +Cluster Attributes showing for clusters workload types. @@ -2205,26 +2051,18 @@ An optional set of health rules that can be defined for this job. - Type - Description -- - `metric` - - String - - Specifies the health metric that is being evaluated for a particular health rule. * `RUN_DURATION_SECONDS`: Expected total time for a run in seconds. * `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview. - -- - `op` - - String - - Specifies the operator used to compare the health metric value with the specified threshold. - -- - `value` - - Integer - - Specifies the threshold value that the health metric should obey to satisfy the health rule. +- - `clients` + - Map + - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [\_](#clustersnameworkload_typeclients). ::: -### jobs._name_.job_clusters +### clusters._name_.workload_type.clients -**`Type: Sequence`** +**`Type: Map`** -A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. +defined what type of clients can use the cluster. E.g. Notebooks, Jobs @@ -2234,23 +2072,28 @@ A list of job cluster specifications that can be shared and reused by tasks of t - Type - Description -- - `job_cluster_key` - - String - - A unique name for the job cluster. This field is required and must be unique within the job. `JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution. +- - `jobs` + - Boolean + - With jobs set, the cluster can be used for jobs -- - `new_cluster` - - Map - - If new_cluster, a description of a cluster that is created for each task. See [\_](#jobsnamejob_clustersnew_cluster). +- - `notebooks` + - Boolean + - With notebooks set, this cluster can be used for notebooks ::: -### jobs._name_.job_clusters.new_cluster +## dashboards **`Type: Map`** -If new_cluster, a description of a cluster that is created for each task. +The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](/dashboards/index.md). +```yaml +dashboards: + : + : +``` :::list-table @@ -2259,143 +2102,94 @@ If new_cluster, a description of a cluster that is created for each task. - Type - Description -- - `apply_policy_default_values` - - Boolean - - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. +- - `create_time` + - String + - The timestamp of when the dashboard was created. -- - `autoscale` - - Map - - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#jobsnamejob_clustersnew_clusterautoscale). +- - `dashboard_id` + - String + - UUID identifying the dashboard. -- - `autotermination_minutes` - - Integer - - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. +- - `dataset_catalog` + - String + - Sets the default catalog for all datasets in this dashboard. When set, this overrides the catalog specified in individual dataset definitions. -- - `aws_attributes` - - Map - - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnamejob_clustersnew_clusteraws_attributes). +- - `dataset_schema` + - String + - Sets the default schema for all datasets in this dashboard. When set, this overrides the schema specified in individual dataset definitions. -- - `azure_attributes` - - Map - - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnamejob_clustersnew_clusterazure_attributes). +- - `display_name` + - String + - The display name of the dashboard. -- - `cluster_log_conf` - - Map - - The configuration for delivering spark logs to a long-term storage destination. Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#jobsnamejob_clustersnew_clustercluster_log_conf). +- - `embed_credentials` + - Boolean + - -- - `cluster_name` +- - `etag` - String - - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. For job clusters, the cluster name is automatically set based on the job and job run IDs. - -- - `custom_tags` - - Map - - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + - The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. -- - `data_security_mode` +- - `file_path` - String - - Data security mode decides what data governance model to use when accessing data from a cluster. The following modes can only be used when `kind = CLASSIC_PREVIEW`. * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. The following modes can be used regardless of `kind`. * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for future Databricks Runtime versions: * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. + - -- - `docker_image` +- - `lifecycle` - Map - - See [\_](#jobsnamejob_clustersnew_clusterdocker_image). + - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#dashboardsnamelifecycle). -- - `driver_instance_pool_id` +- - `lifecycle_state` - String - - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. + - The state of the dashboard resource. Used for tracking trashed status. -- - `driver_node_type_id` - - String - - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. - -- - `enable_elastic_disk` - - Boolean - - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. - -- - `enable_local_disk_encryption` - - Boolean - - Whether to enable LUKS on cluster VMs' local disks - -- - `gcp_attributes` - - Map - - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnamejob_clustersnew_clustergcp_attributes). - -- - `init_scripts` - - Sequence - - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#jobsnamejob_clustersnew_clusterinit_scripts). - -- - `instance_pool_id` - - String - - The optional ID of the instance pool to which the cluster belongs. - -- - `is_single_node` - - Boolean - - This field can only be used when `kind = CLASSIC_PREVIEW`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` - -- - `kind` - - String - - - -- - `node_type_id` +- - `parent_path` - String - - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - -- - `num_workers` - - Integer - - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. + - The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. -- - `policy_id` +- - `path` - String - - The ID of the cluster policy used to create the cluster if applicable. + - The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. -- - `remote_disk_throughput` - - Integer - - If set, what the configurable throughput (in Mb/s) for the remote disk is. Currently only supported for GCP HYPERDISK_BALANCED disks. +- - `permissions` + - Sequence + - See [\_](#dashboardsnamepermissions). -- - `runtime_engine` - - String - - +- - `serialized_dashboard` + - Any + - The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. -- - `single_user_name` +- - `update_time` - String - - Single user name if data_security_mode is `SINGLE_USER` - -- - `spark_conf` - - Map - - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - -- - `spark_env_vars` - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + - The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. -- - `spark_version` +- - `warehouse_id` - String - - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. - -- - `ssh_public_keys` - - Sequence - - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. - -- - `total_initial_remote_disk_size` - - Integer - - If set, what the total initial volume size (in GB) of the remote disks should be. Currently only supported for GCP HYPERDISK_BALANCED disks. + - The warehouse ID used to run the dashboard. -- - `use_ml_runtime` - - Boolean - - This field can only be used when `kind = CLASSIC_PREVIEW`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. +::: -- - `workload_type` - - Map - - Cluster Attributes showing for clusters workload types. See [\_](#jobsnamejob_clustersnew_clusterworkload_type). -::: +**Example** +The following example includes and deploys the sample __NYC Taxi Trip Analysis__ dashboard to the Databricks workspace. + +``` yaml +resources: + dashboards: + nyc_taxi_trip_analysis: + display_name: "NYC Taxi Trip Analysis" + file_path: ../src/nyc_taxi_trip_analysis.lvdash.json + warehouse_id: ${var.warehouse_id} +``` +If you use the UI to modify the dashboard, modifications made through the UI are not applied to the dashboard JSON file in the local bundle unless you explicitly update it using `bundle generate`. You can use the `--watch` option to continuously poll and retrieve changes to the dashboard. See [_](/dev-tools/cli/bundle-commands.md#generate). + +In addition, if you attempt to deploy a bundle that contains a dashboard JSON file that is different than the one in the remote workspace, an error will occur. To force the deploy and overwrite the dashboard in the remote workspace with the local one, use the `--force` option. See [_](/dev-tools/cli/bundle-commands.md#deploy). -### jobs._name_.job_clusters.new_cluster.autoscale +### dashboards._name_.lifecycle **`Type: Map`** -Parameters needed in order to automatically scale clusters up and down based on load. -Note: autoscaling works best with DB runtime versions 3.0 or later. +Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. @@ -2405,23 +2199,18 @@ Note: autoscaling works best with DB runtime versions 3.0 or later. - Type - Description -- - `max_workers` - - Integer - - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. - -- - `min_workers` - - Integer - - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### jobs._name_.job_clusters.new_cluster.aws_attributes +### dashboards._name_.permissions -**`Type: Map`** +**`Type: Sequence`** -Attributes related to clusters running on Amazon Web Services. -If not specified at cluster creation, a set of default values will be used. + @@ -2431,56 +2220,36 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description -- - `availability` +- - `group_name` - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - -- - `ebs_volume_count` - - Integer - - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. - -- - `ebs_volume_iops` - - Integer - - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - -- - `ebs_volume_size` - - Integer - - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. - -- - `ebs_volume_throughput` - - Integer - - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + - The name of the group that has the permission set in level. -- - `ebs_volume_type` +- - `level` - String - - All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. - -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + - The allowed permission for user, group, service principal defined for this permission. -- - `instance_profile_arn` +- - `service_principal_name` - String - - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. - -- - `spot_bid_price_percent` - - Integer - - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. + - The name of the service principal that has the permission set in level. -- - `zone_id` +- - `user_name` - String - - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. + - The name of the user that has the permission set in level. ::: -### jobs._name_.job_clusters.new_cluster.azure_attributes +## database_catalogs **`Type: Map`** -Attributes related to clusters running on Microsoft Azure. -If not specified at cluster creation, a set of default values will be used. + +```yaml +database_catalogs: + : + : +``` :::list-table @@ -2489,30 +2258,34 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description -- - `availability` +- - `create_database_if_not_exists` + - Boolean + - + +- - `database_instance_name` - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. + - The name of the DatabaseInstance housing the database. -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. +- - `database_name` + - String + - The name of the database (in a instance) associated with the catalog. -- - `log_analytics_info` +- - `lifecycle` - Map - - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#jobsnamejob_clustersnew_clusterazure_attributeslog_analytics_info). + - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#database_catalogsnamelifecycle). -- - `spot_bid_max_price` - - Any - - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. +- - `name` + - String + - The name of the catalog in UC. ::: -### jobs._name_.job_clusters.new_cluster.azure_attributes.log_analytics_info +### database_catalogs._name_.lifecycle **`Type: Map`** -Defines values necessary to configure and run Azure Log Analytics agent +Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. @@ -2522,27 +2295,24 @@ Defines values necessary to configure and run Azure Log Analytics agent - Type - Description -- - `log_analytics_primary_key` - - String - - The primary key for the Azure Log Analytics agent configuration - -- - `log_analytics_workspace_id` - - String - - The workspace ID for the Azure Log Analytics agent configuration +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### jobs._name_.job_clusters.new_cluster.cluster_log_conf +## database_instances **`Type: Map`** -The configuration for delivering spark logs to a long-term storage destination. -Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified -for one cluster. If the conf is given, the logs will be delivered to the destination every -`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while -the destination of executor logs is `$destination/$clusterId/executor`. +A DatabaseInstance represents a logical Postgres instance, comprised of both compute and storage. +```yaml +database_instances: + : + : +``` :::list-table @@ -2551,51 +2321,62 @@ the destination of executor logs is `$destination/$clusterId/executor`. - Type - Description -- - `dbfs` - - Map - - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#jobsnamejob_clustersnew_clustercluster_log_confdbfs). - -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnamejob_clustersnew_clustercluster_log_confs3). +- - `capacity` + - String + - The sku of the instance. Valid values are "CU_1", "CU_2", "CU_4", "CU_8". -- - `volumes` - - Map - - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#jobsnamejob_clustersnew_clustercluster_log_confvolumes). +- - `custom_tags` + - Sequence + - Custom tags associated with the instance. This field is only included on create and update responses. See [\_](#database_instancesnamecustom_tags). -::: +- - `enable_pg_native_login` + - Boolean + - Whether to enable PG native password login on the instance. Defaults to false. +- - `enable_readable_secondaries` + - Boolean + - Whether to enable secondaries to serve read-only traffic. Defaults to false. -### jobs._name_.job_clusters.new_cluster.cluster_log_conf.dbfs +- - `lifecycle` + - Map + - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#database_instancesnamelifecycle). -**`Type: Map`** +- - `name` + - String + - The name of the instance. This is the unique identifier for the instance. -destination needs to be provided. e.g. -`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` +- - `node_count` + - Integer + - The number of nodes in the instance, composed of 1 primary and 0 or more secondaries. Defaults to 1 primary and 0 secondaries. This field is input only, see effective_node_count for the output. +- - `parent_instance_ref` + - Map + - The ref of the parent instance. This is only available if the instance is child instance. Input: For specifying the parent instance to create a child instance. Optional. Output: Only populated if provided as input to create a child instance. See [\_](#database_instancesnameparent_instance_ref). +- - `permissions` + - Sequence + - See [\_](#database_instancesnamepermissions). -:::list-table +- - `retention_window_in_days` + - Integer + - The retention window for the instance. This is the time window in days for which the historical data is retained. The default value is 7 days. Valid values are 2 to 35 days. -- - Key - - Type - - Description +- - `stopped` + - Boolean + - Whether to stop the instance. An input only param, see effective_stopped for the output. -- - `destination` +- - `usage_policy_id` - String - - dbfs destination, e.g. `dbfs:/my/path` + - The desired usage policy to associate with the instance. ::: -### jobs._name_.job_clusters.new_cluster.cluster_log_conf.s3 +### database_instances._name_.custom_tags -**`Type: Map`** +**`Type: Sequence`** -destination and either the region or endpoint need to be provided. e.g. -`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. +Custom tags associated with the instance. This field is only included on create and update responses. @@ -2605,43 +2386,22 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in - Type - Description -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - -- - `kms_key` +- - `key` - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + - The key of the custom tag. -- - `region` +- - `value` - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + - The value of the custom tag. ::: -### jobs._name_.job_clusters.new_cluster.cluster_log_conf.volumes +### database_instances._name_.lifecycle **`Type: Map`** -destination needs to be provided, e.g. -`{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` +Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. @@ -2651,18 +2411,21 @@ destination needs to be provided, e.g. - Type - Description -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### jobs._name_.job_clusters.new_cluster.docker_image +### database_instances._name_.parent_instance_ref **`Type: Map`** - +The ref of the parent instance. This is only available if the instance is +child instance. +Input: For specifying the parent instance to create a child instance. Optional. +Output: Only populated if provided as input to create a child instance. @@ -2672,48 +2435,26 @@ destination needs to be provided, e.g. - Type - Description -- - `basic_auth` - - Map - - See [\_](#jobsnamejob_clustersnew_clusterdocker_imagebasic_auth). - -- - `url` +- - `branch_time` - String - - URL of the docker image. - -::: - - -### jobs._name_.job_clusters.new_cluster.docker_image.basic_auth - -**`Type: Map`** - - - - - -:::list-table - -- - Key - - Type - - Description + - Branch time of the ref database instance. For a parent ref instance, this is the point in time on the parent instance from which the instance was created. For a child ref instance, this is the point in time on the instance from which the child instance was created. Input: For specifying the point in time to create a child instance. Optional. Output: Only populated if provided as input to create a child instance. -- - `password` +- - `lsn` - String - - Password of the user + - User-specified WAL LSN of the ref database instance. Input: For specifying the WAL LSN to create a child instance. Optional. Output: Only populated if provided as input to create a child instance. -- - `username` +- - `name` - String - - Name of the user + - Name of the ref database instance. ::: -### jobs._name_.job_clusters.new_cluster.gcp_attributes +### database_instances._name_.permissions -**`Type: Map`** +**`Type: Sequence`** -Attributes related to clusters running on Google Cloud Platform. -If not specified at cluster creation, a set of default values will be used. + @@ -2723,45 +2464,36 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description -- - `availability` +- - `group_name` - String - - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. - -- - `boot_disk_size` - - Integer - - Boot disk size in GB + - The name of the group that has the permission set in level. -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - -- - `google_service_account` +- - `level` - String - - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. - -- - `local_ssd_count` - - Integer - - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + - The allowed permission for user, group, service principal defined for this permission. -- - `use_preemptible_executors` - - Boolean - - This field is deprecated +- - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. -- - `zone_id` +- - `user_name` - String - - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + - The name of the user that has the permission set in level. ::: -### jobs._name_.job_clusters.new_cluster.init_scripts +## experiments -**`Type: Sequence`** +**`Type: Map`** -The configuration for storing init scripts. Any number of destinations can be specified. -The scripts are executed sequentially in the order provided. -If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. +The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](/mlflow/experiments.md). +```yaml +experiments: + : + : +``` :::list-table @@ -2770,42 +2502,49 @@ If `cluster_log_conf` is specified, init script logs are sent to `/ - Type - Description -- - `abfss` - - Map - - Contains the Azure Data Lake Storage destination path. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsabfss). +- - `artifact_location` + - String + - Location where all artifacts for the experiment are stored. If not provided, the remote server will select an appropriate default. -- - `dbfs` +- - `lifecycle` - Map - - This field is deprecated + - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#experimentsnamelifecycle). -- - `file` - - Map - - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsfile). +- - `name` + - String + - Experiment name. -- - `gcs` - - Map - - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsgcs). +- - `permissions` + - Sequence + - See [\_](#experimentsnamepermissions). -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptss3). +- - `tags` + - Sequence + - A collection of tags to set on the experiment. Maximum tag size and number of tags per request depends on the storage backend. All storage backends are guaranteed to support tag keys up to 250 bytes in size and tag values up to 5000 bytes in size. All storage backends are also guaranteed to support up to 20 tags per request. See [\_](#experimentsnametags). -- - `volumes` - - Map - - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsvolumes). +::: -- - `workspace` - - Map - - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsworkspace). -::: +**Example** +The following example defines an experiment that all users can view: + +```yaml +resources: + experiments: + experiment: + name: my_ml_experiment + permissions: + - level: CAN_READ + group_name: users + description: MLflow experiment used to track runs +``` -### jobs._name_.job_clusters.new_cluster.init_scripts.abfss +### experiments._name_.lifecycle **`Type: Map`** -Contains the Azure Data Lake Storage destination path +Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. @@ -2815,19 +2554,18 @@ Contains the Azure Data Lake Storage destination path - Type - Description -- - `destination` - - String - - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### jobs._name_.job_clusters.new_cluster.init_scripts.file +### experiments._name_.permissions -**`Type: Map`** +**`Type: Sequence`** -destination needs to be provided, e.g. -`{ "file": { "destination": "file:/my/local/file.sh" } }` + @@ -2837,19 +2575,33 @@ destination needs to be provided, e.g. - Type - Description -- - `destination` +- - `group_name` - String - - local file destination, e.g. `file:/my/local/file.sh` + - + +- - `level` + - String + - Permission level + +- - `service_principal_name` + - String + - + +- - `user_name` + - String + - ::: -### jobs._name_.job_clusters.new_cluster.init_scripts.gcs +### experiments._name_.tags -**`Type: Map`** +**`Type: Sequence`** -destination needs to be provided, e.g. -`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` +A collection of tags to set on the experiment. Maximum tag size and number of tags per request +depends on the storage backend. All storage backends are guaranteed to support tag keys up +to 250 bytes in size and tag values up to 5000 bytes in size. All storage backends are also +guaranteed to support up to 20 tags per request. @@ -2859,22 +2611,28 @@ destination needs to be provided, e.g. - Type - Description -- - `destination` +- - `key` - String - - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` + - The tag key. + +- - `value` + - String + - The tag value. ::: -### jobs._name_.job_clusters.new_cluster.init_scripts.s3 +## external_locations **`Type: Map`** -destination and either the region or endpoint need to be provided. e.g. -`{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. + +```yaml +external_locations: + : + : +``` :::list-table @@ -2883,43 +2641,62 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in - Type - Description -- - `canned_acl` +- - `comment` - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + - -- - `destination` +- - `credential_name` - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + - -- - `enable_encryption` +- - `enable_file_events` - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. + - -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. +- - `encryption_details` + - Map + - Encryption options that apply to clients connecting to cloud storage. See [\_](#external_locationsnameencryption_details). -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. +- - `fallback` + - Boolean + - -- - `kms_key` +- - `file_event_queue` + - Map + - See [\_](#external_locationsnamefile_event_queue). + +- - `grants` + - Sequence + - See [\_](#external_locationsnamegrants). + +- - `lifecycle` + - Map + - See [\_](#external_locationsnamelifecycle). + +- - `name` - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + - -- - `region` +- - `read_only` + - Boolean + - + +- - `skip_validation` + - Boolean + - + +- - `url` - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + - ::: -### jobs._name_.job_clusters.new_cluster.init_scripts.volumes +### external_locations._name_.encryption_details **`Type: Map`** -destination needs to be provided. e.g. -`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` +Encryption options that apply to clients connecting to cloud storage. @@ -2929,19 +2706,18 @@ destination needs to be provided. e.g. - Type - Description -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` +- - `sse_encryption_details` + - Map + - Server-Side Encryption properties for clients communicating with AWS s3. See [\_](#external_locationsnameencryption_detailssse_encryption_details). ::: -### jobs._name_.job_clusters.new_cluster.init_scripts.workspace +### external_locations._name_.encryption_details.sse_encryption_details **`Type: Map`** -destination needs to be provided, e.g. -`{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` +Server-Side Encryption properties for clients communicating with AWS s3. @@ -2951,18 +2727,22 @@ destination needs to be provided, e.g. - Type - Description -- - `destination` +- - `algorithm` - String - - wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` + - SSE algorithm to use for encrypting S3 objects + +- - `aws_kms_key_arn` + - String + - ::: -### jobs._name_.job_clusters.new_cluster.workload_type +### external_locations._name_.file_event_queue **`Type: Map`** -Cluster Attributes showing for clusters workload types. + @@ -2972,18 +2752,38 @@ Cluster Attributes showing for clusters workload types. - Type - Description -- - `clients` +- - `managed_aqs` - Map - - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [\_](#jobsnamejob_clustersnew_clusterworkload_typeclients). + - See [\_](#external_locationsnamefile_event_queuemanaged_aqs). + +- - `managed_pubsub` + - Map + - See [\_](#external_locationsnamefile_event_queuemanaged_pubsub). + +- - `managed_sqs` + - Map + - See [\_](#external_locationsnamefile_event_queuemanaged_sqs). + +- - `provided_aqs` + - Map + - See [\_](#external_locationsnamefile_event_queueprovided_aqs). + +- - `provided_pubsub` + - Map + - See [\_](#external_locationsnamefile_event_queueprovided_pubsub). + +- - `provided_sqs` + - Map + - See [\_](#external_locationsnamefile_event_queueprovided_sqs). ::: -### jobs._name_.job_clusters.new_cluster.workload_type.clients +### external_locations._name_.file_event_queue.managed_aqs **`Type: Map`** -defined what type of clients can use the cluster. E.g. Notebooks, Jobs + @@ -2993,22 +2793,26 @@ defined what type of clients can use the cluster. E.g. Notebooks, Jobs - Type - Description -- - `jobs` - - Boolean - - With jobs set, the cluster can be used for jobs +- - `queue_url` + - String + - -- - `notebooks` - - Boolean - - With notebooks set, this cluster can be used for notebooks +- - `resource_group` + - String + - + +- - `subscription_id` + - String + - ::: -### jobs._name_.lifecycle +### external_locations._name_.file_event_queue.managed_pubsub **`Type: Map`** -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + @@ -3018,18 +2822,18 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. +- - `subscription_name` + - String + - ::: -### jobs._name_.notification_settings +### external_locations._name_.file_event_queue.managed_sqs **`Type: Map`** -Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. + @@ -3039,22 +2843,18 @@ Optional notification settings that are used when sending notifications to each - Type - Description -- - `no_alert_for_canceled_runs` - - Boolean - - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. - -- - `no_alert_for_skipped_runs` - - Boolean - - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. +- - `queue_url` + - String + - ::: -### jobs._name_.parameters +### external_locations._name_.file_event_queue.provided_aqs -**`Type: Sequence`** +**`Type: Map`** -Job-level parameter definitions + @@ -3064,20 +2864,24 @@ Job-level parameter definitions - Type - Description -- - `default` +- - `queue_url` - String - - Default value of the parameter. + - -- - `name` +- - `resource_group` - String - - The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.` + - + +- - `subscription_id` + - String + - ::: -### jobs._name_.permissions +### external_locations._name_.file_event_queue.provided_pubsub -**`Type: Sequence`** +**`Type: Map`** @@ -3089,30 +2893,18 @@ Job-level parameter definitions - Type - Description -- - `group_name` - - String - - - -- - `level` - - String - - - -- - `service_principal_name` - - String - - - -- - `user_name` +- - `subscription_name` - String - ::: -### jobs._name_.queue +### external_locations._name_.file_event_queue.provided_sqs **`Type: Map`** -The queue settings of the job. + @@ -3122,20 +2914,18 @@ The queue settings of the job. - Type - Description -- - `enabled` - - Boolean - - If true, enable queueing for the job. This is a required field. +- - `queue_url` + - String + - ::: -### jobs._name_.run_as +### external_locations._name_.grants -**`Type: Map`** +**`Type: Sequence`** -Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job. -Either `user_name` or `service_principal_name` should be specified. If not, an error is thrown. @@ -3145,22 +2935,29 @@ Either `user_name` or `service_principal_name` should be specified. If not, an e - Type - Description -- - `service_principal_name` +- - `principal` - String - - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + - The principal (user email address or group name). For deleted principals, `principal` is empty while `principal_id` is populated. -- - `user_name` - - String - - The email of an active workspace user. Non-admin users can only set this field to their own email. +- - `privileges` + - Sequence + - The privileges assigned to the principal. ::: -### jobs._name_.schedule +### external_locations._name_.grants.privileges + +**`Type: Sequence`** + +The privileges assigned to the principal. + + +### external_locations._name_.lifecycle **`Type: Map`** -An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + @@ -3170,29 +2967,24 @@ An optional periodic schedule for this job. The default behavior is that the job - Type - Description -- - `pause_status` - - String - - Indicate whether this schedule is paused or not. - -- - `quartz_cron_expression` - - String - - A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. - -- - `timezone_id` - - String - - A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### jobs._name_.tasks +## jobs -**`Type: Sequence`** +**`Type: Map`** -A list of task specifications to be executed by this job. -It supports up to 1000 elements in write endpoints (:method:jobs/create, :method:jobs/reset, :method:jobs/update, :method:jobs/submit). -Read endpoints return only 100 tasks. If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. +The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a Declarative Automation Bundles template to create a job, see [_](/dev-tools/bundles/jobs-tutorial.md). +```yaml +jobs: + : + : +``` :::list-table @@ -3201,143 +2993,135 @@ Read endpoints return only 100 tasks. If more than 100 tasks are available, you - Type - Description -- - `clean_rooms_notebook_task` - - Map - - The task runs a [clean rooms](https://docs.databricks.com/en/clean-rooms/index.html) notebook when the `clean_rooms_notebook_task` field is present. See [\_](#jobsnametasksclean_rooms_notebook_task). - -- - `condition_task` - - Map - - The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present. The condition task does not require a cluster to execute and does not support retries or notifications. See [\_](#jobsnametaskscondition_task). +- - `budget_policy_id` + - String + - The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. -- - `dashboard_task` +- - `continuous` - Map - - The task refreshes a dashboard and sends a snapshot to subscribers. See [\_](#jobsnametasksdashboard_task). + - An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. See [\_](#jobsnamecontinuous). -- - `dbt_task` +- - `deployment` - Map - - The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. See [\_](#jobsnametasksdbt_task). - -- - `depends_on` - - Sequence - - An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true. The key is `task_key`, and the value is the name assigned to the dependent task. See [\_](#jobsnametasksdepends_on). + - Deployment information for jobs managed by external sources. See [\_](#jobsnamedeployment). - - `description` - String - - An optional description for this task. + - An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. -- - `disable_auto_optimization` - - Boolean - - An option to disable auto optimization in serverless +- - `edit_mode` + - String + - Edit mode of the job. * `UI_LOCKED`: The job is in a locked UI state and cannot be modified. * `EDITABLE`: The job is in an editable state and can be modified. - - `email_notifications` - Map - - An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. See [\_](#jobsnametasksemail_notifications). + - An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. See [\_](#jobsnameemail_notifications). -- - `environment_key` - - String - - The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute. +- - `environments` + - Sequence + - A list of task execution environment specifications that can be referenced by serverless tasks of this job. For serverless notebook tasks, if the environment_key is not specified, the notebook environment will be used if present. If a jobs environment is specified, it will override the notebook environment. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. See [\_](#jobsnameenvironments). -- - `existing_cluster_id` +- - `format` - String - - If existing_cluster_id, the ID of an existing cluster that is used for all runs. When running jobs or tasks on an existing cluster, you may need to manually restart the cluster if it stops responding. We suggest running jobs and tasks on new clusters for greater reliability + - This field is deprecated -- - `for_each_task` +- - `git_source` - Map - - The task executes a nested task for every input provided when the `for_each_task` field is present. See [\_](#jobsnametasksfor_each_task). + - An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. See [\_](#jobsnamegit_source). - - `health` - Map - - An optional set of health rules that can be defined for this job. See [\_](#jobsnametaskshealth). - -- - `job_cluster_key` - - String - - If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`. + - An optional set of health rules that can be defined for this job. See [\_](#jobsnamehealth). -- - `libraries` +- - `job_clusters` - Sequence - - An optional list of libraries to be installed on the cluster. The default value is an empty list. See [\_](#jobsnametaskslibraries). + - A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. See [\_](#jobsnamejob_clusters). -- - `max_retries` - - Integer - - An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry. +- - `lifecycle` + - Map + - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#jobsnamelifecycle). -- - `min_retry_interval_millis` +- - `max_concurrent_runs` - Integer - - An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried. - -- - `new_cluster` - - Map - - If new_cluster, a description of a new cluster that is created for each run. See [\_](#jobsnametasksnew_cluster). + - An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. -- - `notebook_task` - - Map - - The task runs a notebook when the `notebook_task` field is present. See [\_](#jobsnametasksnotebook_task). +- - `name` + - String + - An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. - - `notification_settings` - Map - - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task. See [\_](#jobsnametasksnotification_settings). - -- - `pipeline_task` - - Map - - The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported. See [\_](#jobsnametaskspipeline_task). - -- - `power_bi_task` - - Map - - The task triggers a Power BI semantic model update when the `power_bi_task` field is present. See [\_](#jobsnametaskspower_bi_task). - -- - `python_wheel_task` - - Map - - The task runs a Python wheel when the `python_wheel_task` field is present. See [\_](#jobsnametaskspython_wheel_task). + - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. See [\_](#jobsnamenotification_settings). -- - `retry_on_timeout` - - Boolean - - An optional policy to specify whether to retry a job when it times out. The default behavior is to not retry on timeout. +- - `parameters` + - Sequence + - Job-level parameter definitions. See [\_](#jobsnameparameters). -- - `run_if` +- - `performance_target` - String - - An optional value specifying the condition determining whether the task is run once its dependencies have been completed. * `ALL_SUCCESS`: All dependencies have executed and succeeded * `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded * `NONE_FAILED`: None of the dependencies have failed and at least one was executed * `ALL_DONE`: All dependencies have been completed * `AT_LEAST_ONE_FAILED`: At least one dependency failed * `ALL_FAILED`: ALl dependencies have failed + - The performance mode on a serverless job. This field determines the level of compute performance or cost-efficiency for the run. The performance target does not apply to tasks that run on Serverless GPU compute. * `STANDARD`: Enables cost-efficient execution of serverless workloads. * `PERFORMANCE_OPTIMIZED`: Prioritizes fast startup and execution times through rapid scaling and optimized cluster performance. -- - `run_job_task` - - Map - - The task triggers another job when the `run_job_task` field is present. See [\_](#jobsnametasksrun_job_task). +- - `permissions` + - Sequence + - See [\_](#jobsnamepermissions). -- - `spark_jar_task` +- - `queue` - Map - - The task runs a JAR when the `spark_jar_task` field is present. See [\_](#jobsnametasksspark_jar_task). + - The queue settings of the job. See [\_](#jobsnamequeue). -- - `spark_python_task` +- - `run_as` - Map - - The task runs a Python file when the `spark_python_task` field is present. See [\_](#jobsnametasksspark_python_task). + - Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job. Either `user_name` or `service_principal_name` should be specified. If not, an error is thrown. See [\_](#jobsnamerun_as). -- - `spark_submit_task` +- - `schedule` - Map - - (Legacy) The task runs the spark-submit script when the `spark_submit_task` field is present. This task can run only on new clusters and is not compatible with serverless compute. In the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations. `master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters. By default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage. The `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths. See [\_](#jobsnametasksspark_submit_task). + - An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. See [\_](#jobsnameschedule). -- - `sql_task` +- - `tags` - Map - - The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present. See [\_](#jobsnametaskssql_task). + - A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. -- - `task_key` - - String - - A unique name for the task. This field is used to refer to this task from other tasks. This field is required and must be unique within its parent job. On Update or Reset, this field is used to reference the tasks to be updated or reset. +- - `tasks` + - Sequence + - A list of task specifications to be executed by this job. It supports up to 1000 elements in write endpoints (:method:jobs/create, :method:jobs/reset, :method:jobs/update, :method:jobs/submit). Read endpoints return only 100 tasks. If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. See [\_](#jobsnametasks). - - `timeout_seconds` - Integer - - An optional timeout applied to each run of this job task. A value of `0` means no timeout. + - An optional timeout applied to each run of this job. A value of `0` means no timeout. + +- - `trigger` + - Map + - A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. See [\_](#jobsnametrigger). - - `webhook_notifications` - Map - - A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications. See [\_](#jobsnametaskswebhook_notifications). + - A collection of system notification IDs to notify when runs of this job begin or complete. See [\_](#jobsnamewebhook_notifications). ::: -### jobs._name_.tasks.clean_rooms_notebook_task +**Example** + +The following example defines a job with the resource key `hello-job` with one notebook task: + +```yaml +resources: + jobs: + hello-job: + name: hello-job + tasks: + - task_key: hello-task + notebook_task: + notebook_path: ./hello.py +``` + +For information about defining job tasks and overriding job settings, see [_](/dev-tools/bundles/job-task-types.md), [_](/dev-tools/bundles/job-task-override.md), and [_](/dev-tools/bundles/cluster-override.md). + +### jobs._name_.continuous **`Type: Map`** -The task runs a [clean rooms](https://docs.databricks.com/en/clean-rooms/index.html) notebook -when the `clean_rooms_notebook_task` field is present. +An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. @@ -3347,31 +3131,22 @@ when the `clean_rooms_notebook_task` field is present. - Type - Description -- - `clean_room_name` - - String - - The clean room that the notebook belongs to. - -- - `etag` +- - `pause_status` - String - - Checksum to validate the freshness of the notebook resource (i.e. the notebook being run is the latest version). It can be fetched by calling the :method:cleanroomassets/get API. - -- - `notebook_base_parameters` - - Map - - Base parameters to be used for the clean room notebook job. + - Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. -- - `notebook_name` +- - `task_retry_mode` - String - - Name of the notebook being run. + - Indicate whether the continuous job is applying task level retries or not. Defaults to NEVER. ::: -### jobs._name_.tasks.condition_task +### jobs._name_.deployment **`Type: Map`** -The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present. -The condition task does not require a cluster to execute and does not support retries or notifications. +Deployment information for jobs managed by external sources. @@ -3381,26 +3156,22 @@ The condition task does not require a cluster to execute and does not support re - Type - Description -- - `left` - - String - - The left operand of the condition task. Can be either a string value or a job state or parameter reference. - -- - `op` +- - `kind` - String - - * `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`. * `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`. The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison. + - The kind of deployment that manages the job. * `BUNDLE`: The job is managed by Databricks Asset Bundle. * `SYSTEM_MANAGED`: The job is managed by Databricks and is read-only. -- - `right` +- - `metadata_file_path` - String - - The right operand of the condition task. Can be either a string value or a job state or parameter reference. + - Path of the file that contains deployment metadata. ::: -### jobs._name_.tasks.dashboard_task +### jobs._name_.email_notifications **`Type: Map`** -The task refreshes a dashboard and sends a snapshot to subscribers. +An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. @@ -3410,26 +3181,40 @@ The task refreshes a dashboard and sends a snapshot to subscribers. - Type - Description -- - `dashboard_id` - - String - - +- - `no_alert_for_skipped_runs` + - Boolean + - This field is deprecated -- - `subscription` - - Map - - See [\_](#jobsnametasksdashboard_tasksubscription). +- - `on_duration_warning_threshold_exceeded` + - Sequence + - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. -- - `warehouse_id` - - String - - Optional: The warehouse id to execute the dashboard with for the schedule. If not specified, the default warehouse of the dashboard will be used. +- - `on_failure` + - Sequence + - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. + +- - `on_start` + - Sequence + - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + +- - `on_streaming_backlog_exceeded` + - Sequence + - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. + +- - `on_success` + - Sequence + - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. ::: -### jobs._name_.tasks.dashboard_task.subscription +### jobs._name_.environments -**`Type: Map`** +**`Type: Sequence`** - +A list of task execution environment specifications that can be referenced by serverless tasks of this job. +For serverless notebook tasks, if the environment_key is not specified, the notebook environment will be used if present. If a jobs environment is specified, it will override the notebook environment. +For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. @@ -3439,26 +3224,23 @@ The task refreshes a dashboard and sends a snapshot to subscribers. - Type - Description -- - `custom_subject` +- - `environment_key` - String - - Optional: Allows users to specify a custom subject line on the email sent to subscribers. - -- - `paused` - - Boolean - - When true, the subscription will not send emails. + - The key of an environment. It has to be unique within a job. -- - `subscribers` - - Sequence - - See [\_](#jobsnametasksdashboard_tasksubscriptionsubscribers). +- - `spec` + - Map + - The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. In this minimal environment spec, only pip and java dependencies are supported. See [\_](#jobsnameenvironmentsspec). ::: -### jobs._name_.tasks.dashboard_task.subscription.subscribers +### jobs._name_.environments.spec -**`Type: Sequence`** +**`Type: Map`** - +The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines. +In this minimal environment spec, only pip and java dependencies are supported. @@ -3468,22 +3250,38 @@ The task refreshes a dashboard and sends a snapshot to subscribers. - Type - Description -- - `destination_id` +- - `base_environment` - String - - + - The `base_environment` key refers to an `env.yaml` file that specifies an environment version and a collection of dependencies required for the environment setup. This `env.yaml` file may itself include a `base_environment` reference pointing to another `env_1.yaml` file. However, when used as a base environment, `env_1.yaml` (or further nested references) will not be processed or included in the final environment, meaning that the resolution of `base_environment` references is not recursive. -- - `user_name` +- - `client` + - String + - This field is deprecated + +- - `dependencies` + - Sequence + - List of pip dependencies, as supported by the version of pip in this environment. + +- - `environment_version` - String + - Either `environment_version` or `base_environment` needs to be provided. Environment version used by the environment. Each version comes with a specific Python version and a set of Python packages. The version is a string, consisting of an integer. + +- - `java_dependencies` + - Sequence - ::: -### jobs._name_.tasks.dbt_task +### jobs._name_.git_source **`Type: Map`** -The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. +An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. + +If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. + +Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. @@ -3493,43 +3291,42 @@ The task runs one or more dbt commands when the `dbt_task` field is present. The - Type - Description -- - `catalog` +- - `git_branch` - String - - Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks >= 1.1.1. - -- - `commands` - - Sequence - - A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided. + - Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. -- - `profiles_directory` +- - `git_commit` - String - - Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used. + - Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. -- - `project_directory` +- - `git_provider` - String - - Path to the project directory. Optional for Git sourced tasks, in which case if no value is provided, the root of the Git repository is used. + - Unique identifier of the service used to host the Git repository. The value is case insensitive. -- - `schema` - - String - - Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used. +- - `git_snapshot` + - Map + - Read-only state of the remote repository at the time the job was run. This field is only included on job runs. See [\_](#jobsnamegit_sourcegit_snapshot). -- - `source` +- - `git_tag` - String - - Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved from the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: Project is located in Databricks workspace. * `GIT`: Project is located in cloud Git provider. + - Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. -- - `warehouse_id` +- - `git_url` - String - - ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument. + - URL of the repository to be cloned by this job. + +- - `sparse_checkout` + - Map + - See [\_](#jobsnamegit_sourcesparse_checkout). ::: -### jobs._name_.tasks.depends_on +### jobs._name_.git_source.git_snapshot -**`Type: Sequence`** +**`Type: Map`** -An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true. -The key is `task_key`, and the value is the name assigned to the dependent task. +Read-only state of the remote repository at the time the job was run. This field is only included on job runs. @@ -3539,22 +3336,18 @@ The key is `task_key`, and the value is the name assigned to the dependent task. - Type - Description -- - `outcome` - - String - - Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run. - -- - `task_key` +- - `used_commit` - String - - The name of the task this task depends on. + - Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to. ::: -### jobs._name_.tasks.email_notifications +### jobs._name_.git_source.sparse_checkout **`Type: Map`** -An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. + @@ -3564,63 +3357,14 @@ An optional set of email addresses that is notified when runs of this task begin - Type - Description -- - `no_alert_for_skipped_runs` - - Boolean - - This field is deprecated - -- - `on_duration_warning_threshold_exceeded` +- - `patterns` - Sequence - - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. + - List of patterns to include for sparse checkout. -- - `on_failure` - - Sequence - - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. +::: -- - `on_start` - - Sequence - - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. -- - `on_streaming_backlog_exceeded` - - Sequence - - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - -- - `on_success` - - Sequence - - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - -::: - - -### jobs._name_.tasks.for_each_task - -**`Type: Map`** - -The task executes a nested task for every input provided when the `for_each_task` field is present. - - - -:::list-table - -- - Key - - Type - - Description - -- - `concurrency` - - Integer - - An optional maximum allowed number of concurrent runs of the task. Set this value if you want to be able to execute multiple runs of the task concurrently. - -- - `inputs` - - String - - Array for task to iterate on. This can be a JSON string or a reference to an array parameter. - -- - `task` - - Map - - Configuration for the task that will be run for each element in the array - -::: - - -### jobs._name_.tasks.health +### jobs._name_.health **`Type: Map`** @@ -3636,12 +3380,12 @@ An optional set of health rules that can be defined for this job. - - `rules` - Sequence - - See [\_](#jobsnametaskshealthrules). + - See [\_](#jobsnamehealthrules). ::: -### jobs._name_.tasks.health.rules +### jobs._name_.health.rules **`Type: Sequence`** @@ -3670,12 +3414,11 @@ An optional set of health rules that can be defined for this job. ::: -### jobs._name_.tasks.libraries +### jobs._name_.job_clusters **`Type: Sequence`** -An optional list of libraries to be installed on the cluster. -The default value is an empty list. +A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. @@ -3685,123 +3428,22 @@ The default value is an empty list. - Type - Description -- - `cran` - - Map - - Specification of a CRAN library to be installed as part of the library. See [\_](#jobsnametaskslibrariescran). - -- - `egg` - - String - - This field is deprecated - -- - `jar` +- - `job_cluster_key` - String - - URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. For example: `{ "jar": "/Workspace/path/to/library.jar" }`, `{ "jar" : "/Volumes/path/to/library.jar" }` or `{ "jar": "s3://my-bucket/library.jar" }`. If S3 is used, please make sure the cluster has read access on the library. You may need to launch the cluster with an IAM role to access the S3 URI. - -- - `maven` - - Map - - Specification of a maven library to be installed. For example: `{ "coordinates": "org.jsoup:jsoup:1.7.2" }`. See [\_](#jobsnametaskslibrariesmaven). + - A unique name for the job cluster. This field is required and must be unique within the job. `JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution. -- - `pypi` +- - `new_cluster` - Map - - Specification of a PyPi library to be installed. For example: `{ "package": "simplejson" }`. See [\_](#jobsnametaskslibrariespypi). - -- - `requirements` - - String - - URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported. For example: `{ "requirements": "/Workspace/path/to/requirements.txt" }` or `{ "requirements" : "/Volumes/path/to/requirements.txt" }` - -- - `whl` - - String - - URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. For example: `{ "whl": "/Workspace/path/to/library.whl" }`, `{ "whl" : "/Volumes/path/to/library.whl" }` or `{ "whl": "s3://my-bucket/library.whl" }`. If S3 is used, please make sure the cluster has read access on the library. You may need to launch the cluster with an IAM role to access the S3 URI. - -::: - - -### jobs._name_.tasks.libraries.cran - -**`Type: Map`** - -Specification of a CRAN library to be installed as part of the library - - - -:::list-table - -- - Key - - Type - - Description - -- - `package` - - String - - The name of the CRAN package to install. - -- - `repo` - - String - - The repository where the package can be found. If not specified, the default CRAN repo is used. - -::: - - -### jobs._name_.tasks.libraries.maven - -**`Type: Map`** - -Specification of a maven library to be installed. For example: -`{ "coordinates": "org.jsoup:jsoup:1.7.2" }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `coordinates` - - String - - Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2". - -- - `exclusions` - - Sequence - - List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`. Maven dependency exclusions: https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. - -- - `repo` - - String - - Maven repo to install the Maven package from. If omitted, both Maven Central Repository and Spark Packages are searched. - -::: - - -### jobs._name_.tasks.libraries.pypi - -**`Type: Map`** - -Specification of a PyPi library to be installed. For example: -`{ "package": "simplejson" }` - - - -:::list-table - -- - Key - - Type - - Description - -- - `package` - - String - - The name of the pypi package to install. An optional exact version specification is also supported. Examples: "simplejson" and "simplejson==3.8.0". - -- - `repo` - - String - - The repository where the package can be found. If not specified, the default pip index is used. + - If new_cluster, a description of a cluster that is created for each task. See [\_](#jobsnamejob_clustersnew_cluster). ::: -### jobs._name_.tasks.new_cluster +### jobs._name_.job_clusters.new_cluster **`Type: Map`** -If new_cluster, a description of a new cluster that is created for each run. +If new_cluster, a description of a cluster that is created for each task. @@ -3817,7 +3459,7 @@ If new_cluster, a description of a new cluster that is created for each run. - - `autoscale` - Map - - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#jobsnametasksnew_clusterautoscale). + - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#jobsnamejob_clustersnew_clusterautoscale). - - `autotermination_minutes` - Integer @@ -3825,15 +3467,15 @@ If new_cluster, a description of a new cluster that is created for each run. - - `aws_attributes` - Map - - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnametasksnew_clusteraws_attributes). + - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnamejob_clustersnew_clusteraws_attributes). - - `azure_attributes` - Map - - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnametasksnew_clusterazure_attributes). + - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnamejob_clustersnew_clusterazure_attributes). - - `cluster_log_conf` - Map - - The configuration for delivering spark logs to a long-term storage destination. Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#jobsnametasksnew_clustercluster_log_conf). + - The configuration for delivering spark logs to a long-term storage destination. Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#jobsnamejob_clustersnew_clustercluster_log_conf). - - `cluster_name` - String @@ -3849,19 +3491,23 @@ If new_cluster, a description of a new cluster that is created for each run. - - `docker_image` - Map - - See [\_](#jobsnametasksnew_clusterdocker_image). + - See [\_](#jobsnamejob_clustersnew_clusterdocker_image). - - `driver_instance_pool_id` - String - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. +- - `driver_node_type_flexibility` + - Map + - Flexible node type configuration for the driver node. See [\_](#jobsnamejob_clustersnew_clusterdriver_node_type_flexibility). + - - `driver_node_type_id` - String - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. - - `enable_elastic_disk` - Boolean - - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. + - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. - - `enable_local_disk_encryption` - Boolean @@ -3869,11 +3515,11 @@ If new_cluster, a description of a new cluster that is created for each run. - - `gcp_attributes` - Map - - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnametasksnew_clustergcp_attributes). + - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnamejob_clustersnew_clustergcp_attributes). - - `init_scripts` - Sequence - - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#jobsnametasksnew_clusterinit_scripts). + - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#jobsnamejob_clustersnew_clusterinit_scripts). - - `instance_pool_id` - String @@ -3935,14 +3581,18 @@ If new_cluster, a description of a new cluster that is created for each run. - Boolean - This field can only be used when `kind = CLASSIC_PREVIEW`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. +- - `worker_node_type_flexibility` + - Map + - Flexible node type configuration for worker nodes. See [\_](#jobsnamejob_clustersnew_clusterworker_node_type_flexibility). + - - `workload_type` - Map - - Cluster Attributes showing for clusters workload types. See [\_](#jobsnametasksnew_clusterworkload_type). + - Cluster Attributes showing for clusters workload types. See [\_](#jobsnamejob_clustersnew_clusterworkload_type). ::: -### jobs._name_.tasks.new_cluster.autoscale +### jobs._name_.job_clusters.new_cluster.autoscale **`Type: Map`** @@ -3968,7 +3618,7 @@ Note: autoscaling works best with DB runtime versions 3.0 or later. ::: -### jobs._name_.tasks.new_cluster.aws_attributes +### jobs._name_.job_clusters.new_cluster.aws_attributes **`Type: Map`** @@ -4021,12 +3671,12 @@ If not specified at cluster creation, a set of default values will be used. - - `zone_id` - String - - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. + - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, the zone "auto" will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. ::: -### jobs._name_.tasks.new_cluster.azure_attributes +### jobs._name_.job_clusters.new_cluster.azure_attributes **`Type: Map`** @@ -4051,7 +3701,7 @@ If not specified at cluster creation, a set of default values will be used. - - `log_analytics_info` - Map - - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#jobsnametasksnew_clusterazure_attributeslog_analytics_info). + - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#jobsnamejob_clustersnew_clusterazure_attributeslog_analytics_info). - - `spot_bid_max_price` - Any @@ -4060,7 +3710,7 @@ If not specified at cluster creation, a set of default values will be used. ::: -### jobs._name_.tasks.new_cluster.azure_attributes.log_analytics_info +### jobs._name_.job_clusters.new_cluster.azure_attributes.log_analytics_info **`Type: Map`** @@ -4085,7 +3735,7 @@ Defines values necessary to configure and run Azure Log Analytics agent ::: -### jobs._name_.tasks.new_cluster.cluster_log_conf +### jobs._name_.job_clusters.new_cluster.cluster_log_conf **`Type: Map`** @@ -4105,20 +3755,20 @@ the destination of executor logs is `$destination/$clusterId/executor`. - - `dbfs` - Map - - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#jobsnametasksnew_clustercluster_log_confdbfs). + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#jobsnamejob_clustersnew_clustercluster_log_confdbfs). - - `s3` - Map - - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnametasksnew_clustercluster_log_confs3). + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnamejob_clustersnew_clustercluster_log_confs3). - - `volumes` - Map - - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#jobsnametasksnew_clustercluster_log_confvolumes). + - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#jobsnamejob_clustersnew_clustercluster_log_confvolumes). ::: -### jobs._name_.tasks.new_cluster.cluster_log_conf.dbfs +### jobs._name_.job_clusters.new_cluster.cluster_log_conf.dbfs **`Type: Map`** @@ -4140,7 +3790,7 @@ destination needs to be provided. e.g. ::: -### jobs._name_.tasks.new_cluster.cluster_log_conf.s3 +### jobs._name_.job_clusters.new_cluster.cluster_log_conf.s3 **`Type: Map`** @@ -4188,7 +3838,7 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in ::: -### jobs._name_.tasks.new_cluster.cluster_log_conf.volumes +### jobs._name_.job_clusters.new_cluster.cluster_log_conf.volumes **`Type: Map`** @@ -4210,7 +3860,7 @@ destination needs to be provided, e.g. ::: -### jobs._name_.tasks.new_cluster.docker_image +### jobs._name_.job_clusters.new_cluster.docker_image **`Type: Map`** @@ -4226,7 +3876,7 @@ destination needs to be provided, e.g. - - `basic_auth` - Map - - See [\_](#jobsnametasksnew_clusterdocker_imagebasic_auth). + - See [\_](#jobsnamejob_clustersnew_clusterdocker_imagebasic_auth). - - `url` - String @@ -4235,7 +3885,7 @@ destination needs to be provided, e.g. ::: -### jobs._name_.tasks.new_cluster.docker_image.basic_auth +### jobs._name_.job_clusters.new_cluster.docker_image.basic_auth **`Type: Map`** @@ -4260,7 +3910,28 @@ destination needs to be provided, e.g. ::: -### jobs._name_.tasks.new_cluster.gcp_attributes +### jobs._name_.job_clusters.new_cluster.driver_node_type_flexibility + +**`Type: Map`** + +Flexible node type configuration for the driver node. + + + +:::list-table + +- - Key + - Type + - Description + +- - `alternate_node_type_ids` + - Sequence + - A list of node type IDs to use as fallbacks when the primary node type is unavailable. + +::: + + +### jobs._name_.job_clusters.new_cluster.gcp_attributes **`Type: Map`** @@ -4306,7 +3977,7 @@ If not specified at cluster creation, a set of default values will be used. ::: -### jobs._name_.tasks.new_cluster.init_scripts +### jobs._name_.job_clusters.new_cluster.init_scripts **`Type: Sequence`** @@ -4324,7 +3995,7 @@ If `cluster_log_conf` is specified, init script logs are sent to `/ - - `abfss` - Map - - Contains the Azure Data Lake Storage destination path. See [\_](#jobsnametasksnew_clusterinit_scriptsabfss). + - Contains the Azure Data Lake Storage destination path. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsabfss). - - `dbfs` - Map @@ -4332,28 +4003,28 @@ If `cluster_log_conf` is specified, init script logs are sent to `/ - - `file` - Map - - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsfile). + - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsfile). - - `gcs` - Map - - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsgcs). + - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsgcs). - - `s3` - Map - - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnametasksnew_clusterinit_scriptss3). + - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptss3). - - `volumes` - Map - - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsvolumes). + - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsvolumes). - - `workspace` - Map - - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsworkspace). + - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#jobsnamejob_clustersnew_clusterinit_scriptsworkspace). ::: -### jobs._name_.tasks.new_cluster.init_scripts.abfss +### jobs._name_.job_clusters.new_cluster.init_scripts.abfss **`Type: Map`** @@ -4374,7 +4045,7 @@ Contains the Azure Data Lake Storage destination path ::: -### jobs._name_.tasks.new_cluster.init_scripts.file +### jobs._name_.job_clusters.new_cluster.init_scripts.file **`Type: Map`** @@ -4396,7 +4067,7 @@ destination needs to be provided, e.g. ::: -### jobs._name_.tasks.new_cluster.init_scripts.gcs +### jobs._name_.job_clusters.new_cluster.init_scripts.gcs **`Type: Map`** @@ -4418,7 +4089,7 @@ destination needs to be provided, e.g. ::: -### jobs._name_.tasks.new_cluster.init_scripts.s3 +### jobs._name_.job_clusters.new_cluster.init_scripts.s3 **`Type: Map`** @@ -4466,7 +4137,7 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in ::: -### jobs._name_.tasks.new_cluster.init_scripts.volumes +### jobs._name_.job_clusters.new_cluster.init_scripts.volumes **`Type: Map`** @@ -4488,7 +4159,7 @@ destination needs to be provided. e.g. ::: -### jobs._name_.tasks.new_cluster.init_scripts.workspace +### jobs._name_.job_clusters.new_cluster.init_scripts.workspace **`Type: Map`** @@ -4510,7 +4181,28 @@ destination needs to be provided, e.g. ::: -### jobs._name_.tasks.new_cluster.workload_type +### jobs._name_.job_clusters.new_cluster.worker_node_type_flexibility + +**`Type: Map`** + +Flexible node type configuration for worker nodes. + + + +:::list-table + +- - Key + - Type + - Description + +- - `alternate_node_type_ids` + - Sequence + - A list of node type IDs to use as fallbacks when the primary node type is unavailable. + +::: + + +### jobs._name_.job_clusters.new_cluster.workload_type **`Type: Map`** @@ -4526,12 +4218,12 @@ Cluster Attributes showing for clusters workload types. - - `clients` - Map - - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [\_](#jobsnametasksnew_clusterworkload_typeclients). + - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [\_](#jobsnamejob_clustersnew_clusterworkload_typeclients). ::: -### jobs._name_.tasks.new_cluster.workload_type.clients +### jobs._name_.job_clusters.new_cluster.workload_type.clients **`Type: Map`** @@ -4556,11 +4248,11 @@ defined what type of clients can use the cluster. E.g. Notebooks, Jobs ::: -### jobs._name_.tasks.notebook_task +### jobs._name_.lifecycle **`Type: Map`** -The task runs a notebook when the `notebook_task` field is present. +Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. @@ -4570,30 +4262,18 @@ The task runs a notebook when the `notebook_task` field is present. - Type - Description -- - `base_parameters` - - Map - - Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run Now with parameters specified, the two parameters maps are merged. If the same key is specified in `base_parameters` and in `run-now`, the value from `run-now` is used. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. If the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters, the default value from the notebook is used. Retrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets). The JSON representation of this field cannot exceed 1MB. - -- - `notebook_path` - - String - - The path of the notebook to be run in the Databricks workspace or remote repository. For notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash. For notebooks stored in a remote repository, the path must be relative. This field is required. - -- - `source` - - String - - Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: Notebook is located in Databricks workspace. * `GIT`: Notebook is located in cloud Git provider. - -- - `warehouse_id` - - String - - Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses. Note that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail. +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### jobs._name_.tasks.notification_settings +### jobs._name_.notification_settings **`Type: Map`** -Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task. +Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. @@ -4603,10 +4283,6 @@ Optional notification settings that are used when sending notifications to each - Type - Description -- - `alert_on_last_attempt` - - Boolean - - If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run. - - - `no_alert_for_canceled_runs` - Boolean - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. @@ -4618,11 +4294,11 @@ Optional notification settings that are used when sending notifications to each ::: -### jobs._name_.tasks.pipeline_task +### jobs._name_.parameters -**`Type: Map`** +**`Type: Sequence`** -The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported. +Job-level parameter definitions @@ -4632,22 +4308,22 @@ The task triggers a pipeline update when the `pipeline_task` field is present. O - Type - Description -- - `full_refresh` - - Boolean - - If true, triggers a full refresh on the delta live table. +- - `default` + - String + - Default value of the parameter. -- - `pipeline_id` +- - `name` - String - - The full name of the pipeline task to execute. + - The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.` ::: -### jobs._name_.tasks.power_bi_task +### jobs._name_.permissions -**`Type: Map`** +**`Type: Sequence`** -The task triggers a Power BI semantic model update when the `power_bi_task` field is present. + @@ -4657,34 +4333,30 @@ The task triggers a Power BI semantic model update when the `power_bi_task` fiel - Type - Description -- - `connection_resource_name` +- - `group_name` - String - - The resource name of the UC connection to authenticate from Databricks to Power BI - -- - `power_bi_model` - - Map - - The semantic model to update. See [\_](#jobsnametaskspower_bi_taskpower_bi_model). + - -- - `refresh_after_update` - - Boolean - - Whether the model should be refreshed after the update +- - `level` + - String + - Permission level -- - `tables` - - Sequence - - The tables to be exported to Power BI. See [\_](#jobsnametaskspower_bi_tasktables). +- - `service_principal_name` + - String + - -- - `warehouse_id` +- - `user_name` - String - - The SQL warehouse ID to use as the Power BI data source + - ::: -### jobs._name_.tasks.power_bi_task.power_bi_model +### jobs._name_.queue **`Type: Map`** -The semantic model to update +The queue settings of the job. @@ -4694,34 +4366,20 @@ The semantic model to update - Type - Description -- - `authentication_method` - - String - - How the published Power BI model authenticates to Databricks - -- - `model_name` - - String - - The name of the Power BI model - -- - `overwrite_existing` +- - `enabled` - Boolean - - Whether to overwrite existing Power BI models - -- - `storage_mode` - - String - - The default storage mode of the Power BI model - -- - `workspace_name` - - String - - The name of the Power BI workspace of the model + - If true, enable queueing for the job. This is a required field. ::: -### jobs._name_.tasks.power_bi_task.tables +### jobs._name_.run_as -**`Type: Sequence`** +**`Type: Map`** -The tables to be exported to Power BI +Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job. + +Either `user_name` or `service_principal_name` should be specified. If not, an error is thrown. @@ -4731,30 +4389,22 @@ The tables to be exported to Power BI - Type - Description -- - `catalog` - - String - - The catalog name in Databricks - -- - `name` - - String - - The table name in Databricks - -- - `schema` +- - `service_principal_name` - String - - The schema name in Databricks + - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. -- - `storage_mode` +- - `user_name` - String - - The Power BI storage mode of the table + - The email of an active workspace user. Non-admin users can only set this field to their own email. ::: -### jobs._name_.tasks.python_wheel_task +### jobs._name_.schedule **`Type: Map`** -The task runs a Python wheel when the `python_wheel_task` field is present. +An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. @@ -4764,30 +4414,28 @@ The task runs a Python wheel when the `python_wheel_task` field is present. - Type - Description -- - `entry_point` +- - `pause_status` - String - - Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()` - -- - `named_parameters` - - Map - - Command-line parameters passed to Python wheel task in the form of `["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty if `parameters` is not null. + - Indicate whether this schedule is paused or not. -- - `package_name` +- - `quartz_cron_expression` - String - - Name of the package to execute + - A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. -- - `parameters` - - Sequence - - Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null. +- - `timezone_id` + - String + - A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. ::: -### jobs._name_.tasks.run_job_task +### jobs._name_.tasks -**`Type: Map`** +**`Type: Sequence`** -The task triggers another job when the `run_job_task` field is present. +A list of task specifications to be executed by this job. +It supports up to 1000 elements in write endpoints (:method:jobs/create, :method:jobs/reset, :method:jobs/update, :method:jobs/submit). +Read endpoints return only 100 tasks. If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. @@ -4797,80 +4445,150 @@ The task triggers another job when the `run_job_task` field is present. - Type - Description -- - `job_id` - - Integer - - ID of the job to trigger. - -- - `job_parameters` +- - `alert_task` - Map - - Job-level parameters used to trigger the job. + - New alert v2 task. See [\_](#jobsnametasksalert_task). -- - `pipeline_params` +- - `clean_rooms_notebook_task` - Map - - Controls whether the pipeline should perform a full refresh. See [\_](#jobsnametasksrun_job_taskpipeline_params). + - The task runs a [clean rooms](https://docs.databricks.com/clean-rooms/index.html) notebook when the `clean_rooms_notebook_task` field is present. See [\_](#jobsnametasksclean_rooms_notebook_task). -::: +- - `compute` + - Map + - Task level compute configuration. See [\_](#jobsnametaskscompute). +- - `condition_task` + - Map + - The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present. The condition task does not require a cluster to execute and does not support retries or notifications. See [\_](#jobsnametaskscondition_task). -### jobs._name_.tasks.run_job_task.pipeline_params +- - `dashboard_task` + - Map + - The task refreshes a dashboard and sends a snapshot to subscribers. See [\_](#jobsnametasksdashboard_task). -**`Type: Map`** +- - `dbt_task` + - Map + - The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. See [\_](#jobsnametasksdbt_task). -Controls whether the pipeline should perform a full refresh +- - `depends_on` + - Sequence + - An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true. The key is `task_key`, and the value is the name assigned to the dependent task. See [\_](#jobsnametasksdepends_on). +- - `description` + - String + - An optional description for this task. +- - `disable_auto_optimization` + - Boolean + - An option to disable auto optimization in serverless -:::list-table +- - `email_notifications` + - Map + - An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. See [\_](#jobsnametasksemail_notifications). -- - Key - - Type - - Description +- - `environment_key` + - String + - The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute. -- - `full_refresh` - - Boolean - - If true, triggers a full refresh on the delta live table. +- - `existing_cluster_id` + - String + - If existing_cluster_id, the ID of an existing cluster that is used for all runs. When running jobs or tasks on an existing cluster, you may need to manually restart the cluster if it stops responding. We suggest running jobs and tasks on new clusters for greater reliability -::: +- - `for_each_task` + - Map + - The task executes a nested task for every input provided when the `for_each_task` field is present. See [\_](#jobsnametasksfor_each_task). +- - `health` + - Map + - An optional set of health rules that can be defined for this job. See [\_](#jobsnametaskshealth). -### jobs._name_.tasks.spark_jar_task +- - `job_cluster_key` + - String + - If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`. -**`Type: Map`** +- - `libraries` + - Sequence + - An optional list of libraries to be installed on the cluster. The default value is an empty list. See [\_](#jobsnametaskslibraries). -The task runs a JAR when the `spark_jar_task` field is present. +- - `max_retries` + - Integer + - An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry. +- - `min_retry_interval_millis` + - Integer + - An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried. +- - `new_cluster` + - Map + - If new_cluster, a description of a new cluster that is created for each run. See [\_](#jobsnametasksnew_cluster). -:::list-table +- - `notebook_task` + - Map + - The task runs a notebook when the `notebook_task` field is present. See [\_](#jobsnametasksnotebook_task). -- - Key - - Type - - Description +- - `notification_settings` + - Map + - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task. See [\_](#jobsnametasksnotification_settings). -- - `jar_uri` +- - `pipeline_task` + - Map + - The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported. See [\_](#jobsnametaskspipeline_task). + +- - `power_bi_task` + - Map + - The task triggers a Power BI semantic model update when the `power_bi_task` field is present. See [\_](#jobsnametaskspower_bi_task). + +- - `python_wheel_task` + - Map + - The task runs a Python wheel when the `python_wheel_task` field is present. See [\_](#jobsnametaskspython_wheel_task). + +- - `retry_on_timeout` + - Boolean + - An optional policy to specify whether to retry a job when it times out. The default behavior is to not retry on timeout. + +- - `run_if` - String + - An optional value specifying the condition determining whether the task is run once its dependencies have been completed. * `ALL_SUCCESS`: All dependencies have executed and succeeded * `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded * `NONE_FAILED`: None of the dependencies have failed and at least one was executed * `ALL_DONE`: All dependencies have been completed * `AT_LEAST_ONE_FAILED`: At least one dependency failed * `ALL_FAILED`: ALl dependencies have failed + +- - `run_job_task` + - Map + - The task triggers another job when the `run_job_task` field is present. See [\_](#jobsnametasksrun_job_task). + +- - `spark_jar_task` + - Map + - The task runs a JAR when the `spark_jar_task` field is present. See [\_](#jobsnametasksspark_jar_task). + +- - `spark_python_task` + - Map + - The task runs a Python file when the `spark_python_task` field is present. See [\_](#jobsnametasksspark_python_task). + +- - `spark_submit_task` + - Map - This field is deprecated -- - `main_class_name` +- - `sql_task` + - Map + - The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present. See [\_](#jobsnametaskssql_task). + +- - `task_key` - String - - The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library. The code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail. + - A unique name for the task. This field is used to refer to this task from other tasks. This field is required and must be unique within its parent job. On Update or Reset, this field is used to reference the tasks to be updated or reset. -- - `parameters` - - Sequence - - Parameters passed to the main method. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. +- - `timeout_seconds` + - Integer + - An optional timeout applied to each run of this job task. A value of `0` means no timeout. -- - `run_as_repl` - - Boolean - - This field is deprecated +- - `webhook_notifications` + - Map + - A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications. See [\_](#jobsnametaskswebhook_notifications). ::: -### jobs._name_.tasks.spark_python_task +### jobs._name_.tasks.alert_task **`Type: Map`** -The task runs a Python file when the `spark_python_task` field is present. +New alert v2 task @@ -4880,34 +4598,31 @@ The task runs a Python file when the `spark_python_task` field is present. - Type - Description -- - `parameters` +- - `alert_id` + - String + - The alert_id is the canonical identifier of the alert. + +- - `subscribers` - Sequence - - Command line parameters passed to the Python file. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + - The subscribers receive alert evaluation result notifications after the alert task is completed. The number of subscriptions is limited to 100. See [\_](#jobsnametasksalert_tasksubscribers). -- - `python_file` +- - `warehouse_id` - String - - The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required. + - The warehouse_id identifies the warehouse settings used by the alert task. -- - `source` +- - `workspace_path` - String - - Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local Databricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`, the Python file will be retrieved from a Git repository defined in `git_source`. * `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI. * `GIT`: The Python file is located in a remote Git repository. + - The workspace_path is the path to the alert file in the workspace. The path: * must start with "/Workspace" * must be a normalized path. User has to select only one of alert_id or workspace_path to identify the alert. ::: -### jobs._name_.tasks.spark_submit_task +### jobs._name_.tasks.alert_task.subscribers -**`Type: Map`** +**`Type: Sequence`** -(Legacy) The task runs the spark-submit script when the `spark_submit_task` field is present. This task can run only on new clusters and is not compatible with serverless compute. - -In the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations. - -`master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters. - -By default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage. - -The `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths. +The subscribers receive alert evaluation result notifications after the alert task is completed. +The number of subscriptions is limited to 100. @@ -4917,18 +4632,23 @@ The `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths. - Type - Description -- - `parameters` - - Sequence - - Command-line parameters passed to spark submit. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. +- - `destination_id` + - String + - + +- - `user_name` + - String + - A valid workspace email address. ::: -### jobs._name_.tasks.sql_task +### jobs._name_.tasks.clean_rooms_notebook_task **`Type: Map`** -The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present. +The task runs a [clean rooms](https://docs.databricks.com/clean-rooms/index.html) notebook +when the `clean_rooms_notebook_task` field is present. @@ -4938,38 +4658,52 @@ The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL d - Type - Description -- - `alert` - - Map - - If alert, indicates that this job must refresh a SQL alert. See [\_](#jobsnametaskssql_taskalert). +- - `clean_room_name` + - String + - The clean room that the notebook belongs to. -- - `dashboard` - - Map - - If dashboard, indicates that this job must refresh a SQL dashboard. See [\_](#jobsnametaskssql_taskdashboard). +- - `etag` + - String + - Checksum to validate the freshness of the notebook resource (i.e. the notebook being run is the latest version). It can be fetched by calling the :method:cleanroomassets/get API. -- - `file` +- - `notebook_base_parameters` - Map - - If file, indicates that this job runs a SQL file in a remote Git repository. See [\_](#jobsnametaskssql_taskfile). + - Base parameters to be used for the clean room notebook job. -- - `parameters` - - Map - - Parameters to be used for each run of this job. The SQL alert task does not support custom parameters. +- - `notebook_name` + - String + - Name of the notebook being run. -- - `query` - - Map - - If query, indicates that this job must execute a SQL query. See [\_](#jobsnametaskssql_taskquery). +::: -- - `warehouse_id` + +### jobs._name_.tasks.compute + +**`Type: Map`** + +Task level compute configuration. + + + +:::list-table + +- - Key + - Type + - Description + +- - `hardware_accelerator` - String - - The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs. + - Hardware accelerator configuration for Serverless GPU workloads. ::: -### jobs._name_.tasks.sql_task.alert +### jobs._name_.tasks.condition_task **`Type: Map`** -If alert, indicates that this job must refresh a SQL alert. +The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present. +The condition task does not require a cluster to execute and does not support retries or notifications. @@ -4979,26 +4713,26 @@ If alert, indicates that this job must refresh a SQL alert. - Type - Description -- - `alert_id` +- - `left` - String - - The canonical identifier of the SQL alert. + - The left operand of the condition task. Can be either a string value or a job state or parameter reference. -- - `pause_subscriptions` - - Boolean - - If true, the alert notifications are not sent to subscribers. +- - `op` + - String + - * `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`. * `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`. The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison. -- - `subscriptions` - - Sequence - - If specified, alert notifications are sent to subscribers. See [\_](#jobsnametaskssql_taskalertsubscriptions). +- - `right` + - String + - The right operand of the condition task. Can be either a string value or a job state or parameter reference. ::: -### jobs._name_.tasks.sql_task.alert.subscriptions +### jobs._name_.tasks.dashboard_task -**`Type: Sequence`** +**`Type: Map`** -If specified, alert notifications are sent to subscribers. +The task refreshes a dashboard and sends a snapshot to subscribers. @@ -5008,22 +4742,26 @@ If specified, alert notifications are sent to subscribers. - Type - Description -- - `destination_id` +- - `dashboard_id` - String - - The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications. + - -- - `user_name` +- - `subscription` + - Map + - See [\_](#jobsnametasksdashboard_tasksubscription). + +- - `warehouse_id` - String - - The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications. + - Optional: The warehouse id to execute the dashboard with for the schedule. If not specified, the default warehouse of the dashboard will be used. ::: -### jobs._name_.tasks.sql_task.dashboard +### jobs._name_.tasks.dashboard_task.subscription **`Type: Map`** -If dashboard, indicates that this job must refresh a SQL dashboard. + @@ -5035,28 +4773,24 @@ If dashboard, indicates that this job must refresh a SQL dashboard. - - `custom_subject` - String - - Subject of the email sent to subscribers of this task. - -- - `dashboard_id` - - String - - The canonical identifier of the SQL dashboard. + - Optional: Allows users to specify a custom subject line on the email sent to subscribers. -- - `pause_subscriptions` +- - `paused` - Boolean - - If true, the dashboard snapshot is not taken, and emails are not sent to subscribers. + - When true, the subscription will not send emails. -- - `subscriptions` +- - `subscribers` - Sequence - - If specified, dashboard snapshots are sent to subscriptions. See [\_](#jobsnametaskssql_taskdashboardsubscriptions). + - See [\_](#jobsnametasksdashboard_tasksubscriptionsubscribers). ::: -### jobs._name_.tasks.sql_task.dashboard.subscriptions +### jobs._name_.tasks.dashboard_task.subscription.subscribers **`Type: Sequence`** -If specified, dashboard snapshots are sent to subscriptions. + @@ -5068,20 +4802,20 @@ If specified, dashboard snapshots are sent to subscriptions. - - `destination_id` - String - - The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications. + - - - `user_name` - String - - The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications. + - ::: -### jobs._name_.tasks.sql_task.file +### jobs._name_.tasks.dbt_task **`Type: Map`** -If file, indicates that this job runs a SQL file in a remote Git repository. +The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. @@ -5091,22 +4825,43 @@ If file, indicates that this job runs a SQL file in a remote Git repository. - Type - Description -- - `path` +- - `catalog` - String - - Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths. + - Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks >= 1.1.1. + +- - `commands` + - Sequence + - A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided. + +- - `profiles_directory` + - String + - Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used. + +- - `project_directory` + - String + - Path to the project directory. Optional for Git sourced tasks, in which case if no value is provided, the root of the Git repository is used. + +- - `schema` + - String + - Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used. - - `source` - String - - Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved from the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: SQL file is located in Databricks workspace. * `GIT`: SQL file is located in cloud Git provider. + - Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved from the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: Project is located in Databricks workspace. * `GIT`: Project is located in cloud Git provider. + +- - `warehouse_id` + - String + - ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument. ::: -### jobs._name_.tasks.sql_task.query +### jobs._name_.tasks.depends_on -**`Type: Map`** +**`Type: Sequence`** -If query, indicates that this job must execute a SQL query. +An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true. +The key is `task_key`, and the value is the name assigned to the dependent task. @@ -5116,18 +4871,22 @@ If query, indicates that this job must execute a SQL query. - Type - Description -- - `query_id` +- - `outcome` - String - - The canonical identifier of the SQL query. + - Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run. + +- - `task_key` + - String + - The name of the task this task depends on. ::: -### jobs._name_.tasks.webhook_notifications +### jobs._name_.tasks.email_notifications **`Type: Map`** -A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications. +An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. @@ -5137,34 +4896,38 @@ A collection of system notification IDs to notify when runs of this task begin o - Type - Description +- - `no_alert_for_skipped_runs` + - Boolean + - This field is deprecated + - - `on_duration_warning_threshold_exceeded` - Sequence - - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. See [\_](#jobsnametaskswebhook_notificationson_duration_warning_threshold_exceeded). + - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. - - `on_failure` - Sequence - - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. See [\_](#jobsnametaskswebhook_notificationson_failure). + - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. - - `on_start` - Sequence - - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. See [\_](#jobsnametaskswebhook_notificationson_start). + - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - - `on_streaming_backlog_exceeded` - Sequence - - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. See [\_](#jobsnametaskswebhook_notificationson_streaming_backlog_exceeded). + - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - - `on_success` - Sequence - - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. See [\_](#jobsnametaskswebhook_notificationson_success). + - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. ::: -### jobs._name_.tasks.webhook_notifications.on_duration_warning_threshold_exceeded +### jobs._name_.tasks.for_each_task -**`Type: Sequence`** +**`Type: Map`** -An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. +The task executes a nested task for every input provided when the `for_each_task` field is present. @@ -5174,18 +4937,26 @@ An optional list of system notification IDs to call when the duration of a run e - Type - Description -- - `id` +- - `concurrency` + - Integer + - An optional maximum allowed number of concurrent runs of the task. Set this value if you want to be able to execute multiple runs of the task concurrently. + +- - `inputs` - String - - + - Array for task to iterate on. This can be a JSON string or a reference to an array parameter. + +- - `task` + - Map + - Configuration for the task that will be run for each element in the array ::: -### jobs._name_.tasks.webhook_notifications.on_failure +### jobs._name_.tasks.health -**`Type: Sequence`** +**`Type: Map`** -An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. +An optional set of health rules that can be defined for this job. @@ -5195,18 +4966,18 @@ An optional list of system notification IDs to call when the run fails. A maximu - Type - Description -- - `id` - - String - - +- - `rules` + - Sequence + - See [\_](#jobsnametaskshealthrules). ::: -### jobs._name_.tasks.webhook_notifications.on_start +### jobs._name_.tasks.health.rules **`Type: Sequence`** -An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. + @@ -5216,21 +4987,27 @@ An optional list of system notification IDs to call when the run starts. A maxim - Type - Description -- - `id` +- - `metric` - String - - + - Specifies the health metric that is being evaluated for a particular health rule. * `RUN_DURATION_SECONDS`: Expected total time for a run in seconds. * `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview. + +- - `op` + - String + - Specifies the operator used to compare the health metric value with the specified threshold. + +- - `value` + - Integer + - Specifies the threshold value that the health metric should obey to satisfy the health rule. ::: -### jobs._name_.tasks.webhook_notifications.on_streaming_backlog_exceeded +### jobs._name_.tasks.libraries **`Type: Sequence`** -An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. -Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. -Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. -A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. +An optional list of libraries to be installed on the cluster. +The default value is an empty list. @@ -5240,39 +5017,42 @@ A maximum of 3 destinations can be specified for the `on_streaming_backlog_excee - Type - Description -- - `id` - - String - - - -::: - - -### jobs._name_.tasks.webhook_notifications.on_success - -**`Type: Sequence`** +- - `cran` + - Map + - Specification of a CRAN library to be installed as part of the library. See [\_](#jobsnametaskslibrariescran). -An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. +- - `egg` + - String + - This field is deprecated +- - `jar` + - String + - URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. For example: `{ "jar": "/Workspace/path/to/library.jar" }`, `{ "jar" : "/Volumes/path/to/library.jar" }` or `{ "jar": "s3://my-bucket/library.jar" }`. If S3 is used, please make sure the cluster has read access on the library. You may need to launch the cluster with an IAM role to access the S3 URI. +- - `maven` + - Map + - Specification of a maven library to be installed. For example: `{ "coordinates": "org.jsoup:jsoup:1.7.2" }`. See [\_](#jobsnametaskslibrariesmaven). -:::list-table +- - `pypi` + - Map + - Specification of a PyPi library to be installed. For example: `{ "package": "simplejson" }`. See [\_](#jobsnametaskslibrariespypi). -- - Key - - Type - - Description +- - `requirements` + - String + - URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported. For example: `{ "requirements": "/Workspace/path/to/requirements.txt" }` or `{ "requirements" : "/Volumes/path/to/requirements.txt" }` -- - `id` +- - `whl` - String - - + - URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. For example: `{ "whl": "/Workspace/path/to/library.whl" }`, `{ "whl" : "/Volumes/path/to/library.whl" }` or `{ "whl": "s3://my-bucket/library.whl" }`. If S3 is used, please make sure the cluster has read access on the library. You may need to launch the cluster with an IAM role to access the S3 URI. ::: -### jobs._name_.trigger +### jobs._name_.tasks.libraries.cran **`Type: Map`** -A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. +Specification of a CRAN library to be installed as part of the library @@ -5282,26 +5062,23 @@ A configuration to trigger a run when certain conditions are met. The default be - Type - Description -- - `file_arrival` - - Map - - File arrival trigger settings. See [\_](#jobsnametriggerfile_arrival). - -- - `pause_status` +- - `package` - String - - Whether this trigger is paused or not. + - The name of the CRAN package to install. -- - `periodic` - - Map - - Periodic trigger settings. See [\_](#jobsnametriggerperiodic). +- - `repo` + - String + - The repository where the package can be found. If not specified, the default CRAN repo is used. ::: -### jobs._name_.trigger.file_arrival +### jobs._name_.tasks.libraries.maven **`Type: Map`** -File arrival trigger settings. +Specification of a maven library to be installed. For example: +`{ "coordinates": "org.jsoup:jsoup:1.7.2" }` @@ -5311,26 +5088,27 @@ File arrival trigger settings. - Type - Description -- - `min_time_between_triggers_seconds` - - Integer - - If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds - -- - `url` +- - `coordinates` - String - - URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. + - Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2". -- - `wait_after_last_change_seconds` - - Integer - - If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. +- - `exclusions` + - Sequence + - List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`. Maven dependency exclusions: https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. + +- - `repo` + - String + - Maven repo to install the Maven package from. If omitted, both Maven Central Repository and Spark Packages are searched. ::: -### jobs._name_.trigger.periodic +### jobs._name_.tasks.libraries.pypi **`Type: Map`** -Periodic trigger settings. +Specification of a PyPi library to be installed. For example: +`{ "package": "simplejson" }` @@ -5340,22 +5118,22 @@ Periodic trigger settings. - Type - Description -- - `interval` - - Integer - - The interval at which the trigger should run. +- - `package` + - String + - The name of the pypi package to install. An optional exact version specification is also supported. Examples: "simplejson" and "simplejson==3.8.0". -- - `unit` +- - `repo` - String - - The unit of time for the interval. + - The repository where the package can be found. If not specified, the default pip index is used. ::: -### jobs._name_.webhook_notifications +### jobs._name_.tasks.new_cluster **`Type: Map`** -A collection of system notification IDs to notify when runs of this job begin or complete. +If new_cluster, a description of a new cluster that is created for each run. @@ -5365,121 +5143,151 @@ A collection of system notification IDs to notify when runs of this job begin or - Type - Description -- - `on_duration_warning_threshold_exceeded` - - Sequence - - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. See [\_](#jobsnamewebhook_notificationson_duration_warning_threshold_exceeded). - -- - `on_failure` - - Sequence - - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. See [\_](#jobsnamewebhook_notificationson_failure). - -- - `on_start` - - Sequence - - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. See [\_](#jobsnamewebhook_notificationson_start). - -- - `on_streaming_backlog_exceeded` - - Sequence - - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. See [\_](#jobsnamewebhook_notificationson_streaming_backlog_exceeded). - -- - `on_success` - - Sequence - - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. See [\_](#jobsnamewebhook_notificationson_success). +- - `apply_policy_default_values` + - Boolean + - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. -::: +- - `autoscale` + - Map + - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#jobsnametasksnew_clusterautoscale). +- - `autotermination_minutes` + - Integer + - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. -### jobs._name_.webhook_notifications.on_duration_warning_threshold_exceeded +- - `aws_attributes` + - Map + - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnametasksnew_clusteraws_attributes). -**`Type: Sequence`** +- - `azure_attributes` + - Map + - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnametasksnew_clusterazure_attributes). -An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. +- - `cluster_log_conf` + - Map + - The configuration for delivering spark logs to a long-term storage destination. Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#jobsnametasksnew_clustercluster_log_conf). +- - `cluster_name` + - String + - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. For job clusters, the cluster name is automatically set based on the job and job run IDs. +- - `custom_tags` + - Map + - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags -:::list-table +- - `data_security_mode` + - String + - Data security mode decides what data governance model to use when accessing data from a cluster. The following modes can only be used when `kind = CLASSIC_PREVIEW`. * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. The following modes can be used regardless of `kind`. * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for future Databricks Runtime versions: * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. -- - Key - - Type - - Description +- - `docker_image` + - Map + - See [\_](#jobsnametasksnew_clusterdocker_image). -- - `id` +- - `driver_instance_pool_id` - String - - - -::: + - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. +- - `driver_node_type_flexibility` + - Map + - Flexible node type configuration for the driver node. See [\_](#jobsnametasksnew_clusterdriver_node_type_flexibility). -### jobs._name_.webhook_notifications.on_failure +- - `driver_node_type_id` + - String + - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence. -**`Type: Sequence`** +- - `enable_elastic_disk` + - Boolean + - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. -An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. +- - `enable_local_disk_encryption` + - Boolean + - Whether to enable LUKS on cluster VMs' local disks +- - `gcp_attributes` + - Map + - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#jobsnametasksnew_clustergcp_attributes). +- - `init_scripts` + - Sequence + - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#jobsnametasksnew_clusterinit_scripts). -:::list-table +- - `instance_pool_id` + - String + - The optional ID of the instance pool to which the cluster belongs. -- - Key - - Type - - Description +- - `is_single_node` + - Boolean + - This field can only be used when `kind = CLASSIC_PREVIEW`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` -- - `id` +- - `kind` - String - -::: +- - `node_type_id` + - String + - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. +- - `num_workers` + - Integer + - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. -### jobs._name_.webhook_notifications.on_start - -**`Type: Sequence`** - -An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. - - - -:::list-table +- - `policy_id` + - String + - The ID of the cluster policy used to create the cluster if applicable. -- - Key - - Type - - Description +- - `remote_disk_throughput` + - Integer + - If set, what the configurable throughput (in Mb/s) for the remote disk is. Currently only supported for GCP HYPERDISK_BALANCED disks. -- - `id` +- - `runtime_engine` - String - -::: - +- - `single_user_name` + - String + - Single user name if data_security_mode is `SINGLE_USER` -### jobs._name_.webhook_notifications.on_streaming_backlog_exceeded +- - `spark_conf` + - Map + - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. -**`Type: Sequence`** +- - `spark_env_vars` + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` -An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. -Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. -Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. -A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. +- - `spark_version` + - String + - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. +- - `ssh_public_keys` + - Sequence + - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. +- - `total_initial_remote_disk_size` + - Integer + - If set, what the total initial volume size (in GB) of the remote disks should be. Currently only supported for GCP HYPERDISK_BALANCED disks. -:::list-table +- - `use_ml_runtime` + - Boolean + - This field can only be used when `kind = CLASSIC_PREVIEW`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. -- - Key - - Type - - Description +- - `worker_node_type_flexibility` + - Map + - Flexible node type configuration for worker nodes. See [\_](#jobsnametasksnew_clusterworker_node_type_flexibility). -- - `id` - - String - - +- - `workload_type` + - Map + - Cluster Attributes showing for clusters workload types. See [\_](#jobsnametasksnew_clusterworkload_type). ::: -### jobs._name_.webhook_notifications.on_success +### jobs._name_.tasks.new_cluster.autoscale -**`Type: Sequence`** +**`Type: Map`** -An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. +Parameters needed in order to automatically scale clusters up and down based on load. +Note: autoscaling works best with DB runtime versions 3.0 or later. @@ -5489,24 +5297,24 @@ An optional list of system notification IDs to call when the run completes succe - Type - Description -- - `id` - - String - - +- - `max_workers` + - Integer + - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. + +- - `min_workers` + - Integer + - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. ::: -## model_serving_endpoints +### jobs._name_.tasks.new_cluster.aws_attributes **`Type: Map`** -The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](/machine-learning/model-serving/manage-serving-endpoints.md). +Attributes related to clusters running on Amazon Web Services. +If not specified at cluster creation, a set of default values will be used. -```yaml -model_serving_endpoints: - : - : -``` :::list-table @@ -5515,82 +5323,55 @@ model_serving_endpoints: - Type - Description -- - `ai_gateway` - - Map - - The AI Gateway configuration for the serving endpoint. NOTE: External model, provisioned throughput, and pay-per-token endpoints are fully supported; agent endpoints currently only support inference tables. See [\_](#model_serving_endpointsnameai_gateway). - -- - `budget_policy_id` +- - `availability` - String - - The budget policy to be applied to the serving endpoint. + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. -- - `config` - - Map - - The core config of the serving endpoint. See [\_](#model_serving_endpointsnameconfig). +- - `ebs_volume_count` + - Integer + - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. -- - `description` - - String - - +- - `ebs_volume_iops` + - Integer + - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. -- - `email_notifications` - - Map - - Email notification settings. See [\_](#model_serving_endpointsnameemail_notifications). +- - `ebs_volume_size` + - Integer + - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#model_serving_endpointsnamelifecycle). +- - `ebs_volume_throughput` + - Integer + - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. -- - `name` +- - `ebs_volume_type` - String - - The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. + - All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. -- - `permissions` - - Sequence - - See [\_](#model_serving_endpointsnamepermissions). +- - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. -- - `rate_limits` - - Sequence - - This field is deprecated +- - `instance_profile_arn` + - String + - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. -- - `route_optimized` - - Boolean - - Enable route optimization for the serving endpoint. +- - `spot_bid_price_percent` + - Integer + - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. -- - `tags` - - Sequence - - Tags to be attached to the serving endpoint and automatically propagated to billing logs. See [\_](#model_serving_endpointsnametags). +- - `zone_id` + - String + - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, the zone "auto" will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. ::: -**Example** - -The following example defines a Unity Catalog model serving endpoint: - -```yaml -resources: - model_serving_endpoints: - uc_model_serving_endpoint: - name: "uc-model-endpoint" - config: - served_entities: - - entity_name: "myCatalog.mySchema.my-ads-model" - entity_version: "10" - workload_size: "Small" - scale_to_zero_enabled: "true" - traffic_config: - routes: - - served_model_name: "my-ads-model-10" - traffic_percentage: "100" - tags: - - key: "team" - value: "data science" -``` - -### model_serving_endpoints._name_.ai_gateway +### jobs._name_.tasks.new_cluster.azure_attributes **`Type: Map`** -The AI Gateway configuration for the serving endpoint. NOTE: External model, provisioned throughput, and pay-per-token endpoints are fully supported; agent endpoints currently only support inference tables. +Attributes related to clusters running on Microsoft Azure. +If not specified at cluster creation, a set of default values will be used. @@ -5600,35 +5381,30 @@ The AI Gateway configuration for the serving endpoint. NOTE: External model, pro - Type - Description -- - `fallback_config` - - Map - - Configuration for traffic fallback which auto fallbacks to other served entities if the request to a served entity fails with certain error codes, to increase availability. See [\_](#model_serving_endpointsnameai_gatewayfallback_config). +- - `availability` + - String + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. -- - `guardrails` - - Map - - Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. See [\_](#model_serving_endpointsnameai_gatewayguardrails). +- - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. -- - `inference_table_config` +- - `log_analytics_info` - Map - - Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. See [\_](#model_serving_endpointsnameai_gatewayinference_table_config). - -- - `rate_limits` - - Sequence - - Configuration for rate limits which can be set to limit endpoint traffic. See [\_](#model_serving_endpointsnameai_gatewayrate_limits). + - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#jobsnametasksnew_clusterazure_attributeslog_analytics_info). -- - `usage_tracking_config` - - Map - - Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. See [\_](#model_serving_endpointsnameai_gatewayusage_tracking_config). +- - `spot_bid_max_price` + - Any + - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. ::: -### model_serving_endpoints._name_.ai_gateway.fallback_config +### jobs._name_.tasks.new_cluster.azure_attributes.log_analytics_info **`Type: Map`** -Configuration for traffic fallback which auto fallbacks to other served entities if the request to a served -entity fails with certain error codes, to increase availability. +Defines values necessary to configure and run Azure Log Analytics agent @@ -5638,18 +5414,26 @@ entity fails with certain error codes, to increase availability. - Type - Description -- - `enabled` - - Boolean - - Whether to enable traffic fallback. When a served entity in the serving endpoint returns specific error codes (e.g. 500), the request will automatically be round-robin attempted with other served entities in the same endpoint, following the order of served entity list, until a successful response is returned. If all attempts fail, return the last response with the error code. +- - `log_analytics_primary_key` + - String + - The primary key for the Azure Log Analytics agent configuration + +- - `log_analytics_workspace_id` + - String + - The workspace ID for the Azure Log Analytics agent configuration ::: -### model_serving_endpoints._name_.ai_gateway.guardrails +### jobs._name_.tasks.new_cluster.cluster_log_conf **`Type: Map`** -Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. +The configuration for delivering spark logs to a long-term storage destination. +Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified +for one cluster. If the conf is given, the logs will be delivered to the destination every +`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while +the destination of executor logs is `$destination/$clusterId/executor`. @@ -5659,22 +5443,27 @@ Configuration for AI Guardrails to prevent unwanted data and unsafe data in requ - Type - Description -- - `input` +- - `dbfs` - Map - - Configuration for input guardrail filters. See [\_](#model_serving_endpointsnameai_gatewayguardrailsinput). + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#jobsnametasksnew_clustercluster_log_confdbfs). -- - `output` +- - `s3` - Map - - Configuration for output guardrail filters. See [\_](#model_serving_endpointsnameai_gatewayguardrailsoutput). + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnametasksnew_clustercluster_log_confs3). + +- - `volumes` + - Map + - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#jobsnametasksnew_clustercluster_log_confvolumes). ::: -### model_serving_endpoints._name_.ai_gateway.guardrails.input +### jobs._name_.tasks.new_cluster.cluster_log_conf.dbfs **`Type: Map`** -Configuration for input guardrail filters. +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` @@ -5684,30 +5473,67 @@ Configuration for input guardrail filters. - Type - Description -- - `invalid_keywords` - - Sequence - - This field is deprecated +- - `destination` + - String + - dbfs destination, e.g. `dbfs:/my/path` -- - `pii` - - Map - - Configuration for guardrail PII filter. See [\_](#model_serving_endpointsnameai_gatewayguardrailsinputpii). +::: -- - `safety` - - Boolean - - Indicates whether the safety filter is enabled. -- - `valid_topics` - - Sequence - - This field is deprecated +### jobs._name_.tasks.new_cluster.cluster_log_conf.s3 + +**`Type: Map`** + +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. + + + +:::list-table + +- - Key + - Type + - Description + +- - `canned_acl` + - String + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + +- - `destination` + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + +- - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + +- - `encryption_type` + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + +- - `endpoint` + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + +- - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + +- - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. ::: -### model_serving_endpoints._name_.ai_gateway.guardrails.input.pii +### jobs._name_.tasks.new_cluster.cluster_log_conf.volumes **`Type: Map`** -Configuration for guardrail PII filter. +destination needs to be provided, e.g. +`{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` @@ -5717,18 +5543,18 @@ Configuration for guardrail PII filter. - Type - Description -- - `behavior` +- - `destination` - String - - Configuration for input guardrail filters. + - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` ::: -### model_serving_endpoints._name_.ai_gateway.guardrails.output +### jobs._name_.tasks.new_cluster.docker_image **`Type: Map`** -Configuration for output guardrail filters. + @@ -5738,30 +5564,22 @@ Configuration for output guardrail filters. - Type - Description -- - `invalid_keywords` - - Sequence - - This field is deprecated - -- - `pii` +- - `basic_auth` - Map - - Configuration for guardrail PII filter. See [\_](#model_serving_endpointsnameai_gatewayguardrailsoutputpii). - -- - `safety` - - Boolean - - Indicates whether the safety filter is enabled. + - See [\_](#jobsnametasksnew_clusterdocker_imagebasic_auth). -- - `valid_topics` - - Sequence - - This field is deprecated +- - `url` + - String + - URL of the docker image. ::: -### model_serving_endpoints._name_.ai_gateway.guardrails.output.pii +### jobs._name_.tasks.new_cluster.docker_image.basic_auth **`Type: Map`** -Configuration for guardrail PII filter. + @@ -5771,19 +5589,22 @@ Configuration for guardrail PII filter. - Type - Description -- - `behavior` +- - `password` - String - - Configuration for input guardrail filters. + - Password of the user + +- - `username` + - String + - Name of the user ::: -### model_serving_endpoints._name_.ai_gateway.inference_table_config +### jobs._name_.tasks.new_cluster.driver_node_type_flexibility **`Type: Map`** -Configuration for payload logging using inference tables. -Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. +Flexible node type configuration for the driver node. @@ -5793,30 +5614,66 @@ Use these tables to monitor and audit data being sent to and received from model - Type - Description -- - `catalog_name` +- - `alternate_node_type_ids` + - Sequence + - A list of node type IDs to use as fallbacks when the primary node type is unavailable. + +::: + + +### jobs._name_.tasks.new_cluster.gcp_attributes + +**`Type: Map`** + +Attributes related to clusters running on Google Cloud Platform. +If not specified at cluster creation, a set of default values will be used. + + + +:::list-table + +- - Key + - Type + - Description + +- - `availability` - String - - The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. + - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. -- - `enabled` - - Boolean - - Indicates whether the inference table is enabled. +- - `boot_disk_size` + - Integer + - Boot disk size in GB -- - `schema_name` +- - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + +- - `google_service_account` - String - - The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. + - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. -- - `table_name_prefix` +- - `local_ssd_count` + - Integer + - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + +- - `use_preemptible_executors` + - Boolean + - This field is deprecated + +- - `zone_id` - String - - The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. + - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. ::: -### model_serving_endpoints._name_.ai_gateway.rate_limits +### jobs._name_.tasks.new_cluster.init_scripts **`Type: Sequence`** -Configuration for rate limits which can be set to limit endpoint traffic. +The configuration for storing init scripts. Any number of destinations can be specified. +The scripts are executed sequentially in the order provided. +If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. @@ -5826,35 +5683,42 @@ Configuration for rate limits which can be set to limit endpoint traffic. - Type - Description -- - `calls` - - Integer - - Used to specify how many calls are allowed for a key within the renewal_period. +- - `abfss` + - Map + - Contains the Azure Data Lake Storage destination path. See [\_](#jobsnametasksnew_clusterinit_scriptsabfss). -- - `key` - - String - - Key field for a rate limit. Currently, 'user', 'user_group, 'service_principal', and 'endpoint' are supported, with 'endpoint' being the default if not specified. +- - `dbfs` + - Map + - This field is deprecated -- - `principal` - - String - - Principal field for a user, user group, or service principal to apply rate limiting to. Accepts a user email, group name, or service principal application ID. +- - `file` + - Map + - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsfile). -- - `renewal_period` - - String - - Renewal period field for a rate limit. Currently, only 'minute' is supported. +- - `gcs` + - Map + - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsgcs). -- - `tokens` - - Integer - - Used to specify how many tokens are allowed for a key within the renewal_period. +- - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#jobsnametasksnew_clusterinit_scriptss3). + +- - `volumes` + - Map + - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsvolumes). + +- - `workspace` + - Map + - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#jobsnametasksnew_clusterinit_scriptsworkspace). ::: -### model_serving_endpoints._name_.ai_gateway.usage_tracking_config +### jobs._name_.tasks.new_cluster.init_scripts.abfss **`Type: Map`** -Configuration to enable usage tracking using system tables. -These tables allow you to monitor operational usage on endpoints and their associated costs. +Contains the Azure Data Lake Storage destination path @@ -5864,18 +5728,19 @@ These tables allow you to monitor operational usage on endpoints and their assoc - Type - Description -- - `enabled` - - Boolean - - Whether to enable usage tracking. +- - `destination` + - String + - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. ::: -### model_serving_endpoints._name_.config +### jobs._name_.tasks.new_cluster.init_scripts.file **`Type: Map`** -The core config of the serving endpoint. +destination needs to be provided, e.g. +`{ "file": { "destination": "file:/my/local/file.sh" } }` @@ -5885,33 +5750,19 @@ The core config of the serving endpoint. - Type - Description -- - `auto_capture_config` - - Map - - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or updating existing provisioned throughput endpoints that never have inference table configured; in these cases please use AI Gateway to manage inference tables. See [\_](#model_serving_endpointsnameconfigauto_capture_config). - -- - `served_entities` - - Sequence - - The list of served entities under the serving endpoint config. See [\_](#model_serving_endpointsnameconfigserved_entities). - -- - `served_models` - - Sequence - - (Deprecated, use served_entities instead) The list of served models under the serving endpoint config. See [\_](#model_serving_endpointsnameconfigserved_models). - -- - `traffic_config` - - Map - - The traffic configuration associated with the serving endpoint config. See [\_](#model_serving_endpointsnameconfigtraffic_config). +- - `destination` + - String + - local file destination, e.g. `file:/my/local/file.sh` ::: -### model_serving_endpoints._name_.config.auto_capture_config +### jobs._name_.tasks.new_cluster.init_scripts.gcs **`Type: Map`** -Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. -Note: this field is deprecated for creating new provisioned throughput endpoints, -or updating existing provisioned throughput endpoints that never have inference table configured; -in these cases please use AI Gateway to manage inference tables. +destination needs to be provided, e.g. +`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` @@ -5921,30 +5772,21 @@ in these cases please use AI Gateway to manage inference tables. - Type - Description -- - `catalog_name` - - String - - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. - -- - `enabled` - - Boolean - - Indicates whether the inference table is enabled. - -- - `schema_name` - - String - - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. - -- - `table_name_prefix` +- - `destination` - String - - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. + - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` ::: -### model_serving_endpoints._name_.config.served_entities +### jobs._name_.tasks.new_cluster.init_scripts.s3 -**`Type: Sequence`** +**`Type: Map`** -The list of served entities under the serving endpoint config. +destination and either the region or endpoint need to be provided. e.g. +`{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. @@ -5954,70 +5796,43 @@ The list of served entities under the serving endpoint config. - Type - Description -- - `entity_name` +- - `canned_acl` - String - - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of **catalog_name.schema_name.model_name**. + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. -- - `entity_version` +- - `destination` - String - - - -- - `environment_vars` - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. -- - `external_model` - - Map - - The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_model). +- - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. -- - `instance_profile_arn` +- - `encryption_type` - String - - ARN of the instance profile that the served entity uses to access AWS resources. - -- - `max_provisioned_concurrency` - - Integer - - The maximum provisioned concurrency that the endpoint can scale up to. Do not use if workload_size is specified. - -- - `max_provisioned_throughput` - - Integer - - The maximum tokens per second that the endpoint can scale up to. - -- - `min_provisioned_concurrency` - - Integer - - The minimum provisioned concurrency that the endpoint can scale down to. Do not use if workload_size is specified. - -- - `min_provisioned_throughput` - - Integer - - The minimum tokens per second that the endpoint can scale down to. + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. -- - `name` +- - `endpoint` - String - - The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. - -- - `provisioned_model_units` - - Integer - - The number of model units provisioned. - -- - `scale_to_zero_enabled` - - Boolean - - Whether the compute resources for the served entity should scale down to zero. + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. -- - `workload_size` +- - `kms_key` - String - - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). Additional custom workload sizes can also be used when available in the workspace. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified. + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. -- - `workload_type` +- - `region` - String - - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. ::: -### model_serving_endpoints._name_.config.served_entities.external_model +### jobs._name_.tasks.new_cluster.init_scripts.volumes **`Type: Map`** -The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same. +destination needs to be provided. e.g. +`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` @@ -6027,62 +5842,40 @@ The external model to be served. NOTE: Only one of external_model and (entity_na - Type - Description -- - `ai21labs_config` - - Map - - AI21Labs Config. Only required if the provider is 'ai21labs'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelai21labs_config). - -- - `amazon_bedrock_config` - - Map - - Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelamazon_bedrock_config). +- - `destination` + - String + - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` -- - `anthropic_config` - - Map - - Anthropic Config. Only required if the provider is 'anthropic'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelanthropic_config). +::: -- - `cohere_config` - - Map - - Cohere Config. Only required if the provider is 'cohere'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcohere_config). -- - `custom_provider_config` - - Map - - Custom Provider Config. Only required if the provider is 'custom'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcustom_provider_config). +### jobs._name_.tasks.new_cluster.init_scripts.workspace -- - `databricks_model_serving_config` - - Map - - Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modeldatabricks_model_serving_config). +**`Type: Map`** -- - `google_cloud_vertex_ai_config` - - Map - - Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelgoogle_cloud_vertex_ai_config). +destination needs to be provided, e.g. +`{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` -- - `name` - - String - - The name of the external model. -- - `openai_config` - - Map - - OpenAI Config. Only required if the provider is 'openai'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelopenai_config). -- - `palm_config` - - Map - - PaLM Config. Only required if the provider is 'palm'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelpalm_config). +:::list-table -- - `provider` - - String - - The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', 'palm', and 'custom'. +- - Key + - Type + - Description -- - `task` +- - `destination` - String - - The task type of the external model. + - wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` ::: -### model_serving_endpoints._name_.config.served_entities.external_model.ai21labs_config +### jobs._name_.tasks.new_cluster.worker_node_type_flexibility **`Type: Map`** -AI21Labs Config. Only required if the provider is 'ai21labs'. +Flexible node type configuration for worker nodes. @@ -6092,22 +5885,18 @@ AI21Labs Config. Only required if the provider is 'ai21labs'. - Type - Description -- - `ai21labs_api_key` - - String - - The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. - -- - `ai21labs_api_key_plaintext` - - String - - An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. +- - `alternate_node_type_ids` + - Sequence + - A list of node type IDs to use as fallbacks when the primary node type is unavailable. ::: -### model_serving_endpoints._name_.config.served_entities.external_model.amazon_bedrock_config +### jobs._name_.tasks.new_cluster.workload_type **`Type: Map`** -Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. +Cluster Attributes showing for clusters workload types. @@ -6117,42 +5906,18 @@ Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. - Type - Description -- - `aws_access_key_id` - - String - - The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id_plaintext`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. - -- - `aws_access_key_id_plaintext` - - String - - An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. - -- - `aws_region` - - String - - The AWS region to use. Bedrock has to be enabled there. - -- - `aws_secret_access_key` - - String - - The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. - -- - `aws_secret_access_key_plaintext` - - String - - An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. - -- - `bedrock_provider` - - String - - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon. - -- - `instance_profile_arn` - - String - - ARN of the instance profile that the external model will use to access AWS resources. You must authenticate using an instance profile or access keys. If you prefer to authenticate using access keys, see `aws_access_key_id`, `aws_access_key_id_plaintext`, `aws_secret_access_key` and `aws_secret_access_key_plaintext`. +- - `clients` + - Map + - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [\_](#jobsnametasksnew_clusterworkload_typeclients). ::: -### model_serving_endpoints._name_.config.served_entities.external_model.anthropic_config +### jobs._name_.tasks.new_cluster.workload_type.clients **`Type: Map`** -Anthropic Config. Only required if the provider is 'anthropic'. +defined what type of clients can use the cluster. E.g. Notebooks, Jobs @@ -6162,22 +5927,22 @@ Anthropic Config. Only required if the provider is 'anthropic'. - Type - Description -- - `anthropic_api_key` - - String - - The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. +- - `jobs` + - Boolean + - With jobs set, the cluster can be used for jobs -- - `anthropic_api_key_plaintext` - - String - - The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. +- - `notebooks` + - Boolean + - With notebooks set, this cluster can be used for notebooks ::: -### model_serving_endpoints._name_.config.served_entities.external_model.cohere_config +### jobs._name_.tasks.notebook_task **`Type: Map`** -Cohere Config. Only required if the provider is 'cohere'. +The task runs a notebook when the `notebook_task` field is present. @@ -6187,26 +5952,30 @@ Cohere Config. Only required if the provider is 'cohere'. - Type - Description -- - `cohere_api_base` +- - `base_parameters` + - Map + - Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run Now with parameters specified, the two parameters maps are merged. If the same key is specified in `base_parameters` and in `run-now`, the value from `run-now` is used. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. If the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters, the default value from the notebook is used. Retrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets). The JSON representation of this field cannot exceed 1MB. + +- - `notebook_path` - String - - This is an optional field to provide a customized base URL for the Cohere API. If left unspecified, the standard Cohere base URL is used. + - The path of the notebook to be run in the Databricks workspace or remote repository. For notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash. For notebooks stored in a remote repository, the path must be relative. This field is required. -- - `cohere_api_key` +- - `source` - String - - The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. + - Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: Notebook is located in Databricks workspace. * `GIT`: Notebook is located in cloud Git provider. -- - `cohere_api_key_plaintext` +- - `warehouse_id` - String - - The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. + - Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses. Note that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail. ::: -### model_serving_endpoints._name_.config.served_entities.external_model.custom_provider_config +### jobs._name_.tasks.notification_settings **`Type: Map`** -Custom Provider Config. Only required if the provider is 'custom'. +Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task. @@ -6216,27 +5985,26 @@ Custom Provider Config. Only required if the provider is 'custom'. - Type - Description -- - `api_key_auth` - - Map - - This is a field to provide API key authentication for the custom provider API. You can only specify one authentication method. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcustom_provider_configapi_key_auth). +- - `alert_on_last_attempt` + - Boolean + - If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run. -- - `bearer_token_auth` - - Map - - This is a field to provide bearer token authentication for the custom provider API. You can only specify one authentication method. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcustom_provider_configbearer_token_auth). +- - `no_alert_for_canceled_runs` + - Boolean + - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. -- - `custom_provider_url` - - String - - This is a field to provide the URL of the custom provider API. +- - `no_alert_for_skipped_runs` + - Boolean + - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. ::: -### model_serving_endpoints._name_.config.served_entities.external_model.custom_provider_config.api_key_auth +### jobs._name_.tasks.pipeline_task **`Type: Map`** -This is a field to provide API key authentication for the custom provider API. -You can only specify one authentication method. +The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported. @@ -6246,27 +6014,22 @@ You can only specify one authentication method. - Type - Description -- - `key` - - String - - The name of the API key parameter used for authentication. - -- - `value` - - String - - The Databricks secret key reference for an API Key. If you prefer to paste your token directly, see `value_plaintext`. +- - `full_refresh` + - Boolean + - If true, triggers a full refresh on the delta live table. -- - `value_plaintext` +- - `pipeline_id` - String - - The API Key provided as a plaintext string. If you prefer to reference your token using Databricks Secrets, see `value`. + - The full name of the pipeline task to execute. ::: -### model_serving_endpoints._name_.config.served_entities.external_model.custom_provider_config.bearer_token_auth +### jobs._name_.tasks.power_bi_task **`Type: Map`** -This is a field to provide bearer token authentication for the custom provider API. -You can only specify one authentication method. +The task triggers a Power BI semantic model update when the `power_bi_task` field is present. @@ -6276,22 +6039,34 @@ You can only specify one authentication method. - Type - Description -- - `token` +- - `connection_resource_name` - String - - The Databricks secret key reference for a token. If you prefer to paste your token directly, see `token_plaintext`. + - The resource name of the UC connection to authenticate from Databricks to Power BI -- - `token_plaintext` +- - `power_bi_model` + - Map + - The semantic model to update. See [\_](#jobsnametaskspower_bi_taskpower_bi_model). + +- - `refresh_after_update` + - Boolean + - Whether the model should be refreshed after the update + +- - `tables` + - Sequence + - The tables to be exported to Power BI. See [\_](#jobsnametaskspower_bi_tasktables). + +- - `warehouse_id` - String - - The token provided as a plaintext string. If you prefer to reference your token using Databricks Secrets, see `token`. + - The SQL warehouse ID to use as the Power BI data source ::: -### model_serving_endpoints._name_.config.served_entities.external_model.databricks_model_serving_config +### jobs._name_.tasks.power_bi_task.power_bi_model **`Type: Map`** -Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'. +The semantic model to update @@ -6301,26 +6076,67 @@ Databricks Model Serving Config. Only required if the provider is 'databricks-mo - Type - Description -- - `databricks_api_token` +- - `authentication_method` - String - - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model. If you prefer to paste your API key directly, see `databricks_api_token_plaintext`. You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. + - How the published Power BI model authenticates to Databricks -- - `databricks_api_token_plaintext` +- - `model_name` - String - - The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `databricks_api_token`. You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. + - The name of the Power BI model -- - `databricks_workspace_url` +- - `overwrite_existing` + - Boolean + - Whether to overwrite existing Power BI models + +- - `storage_mode` - String - - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model. + - The default storage mode of the Power BI model + +- - `workspace_name` + - String + - The name of the Power BI workspace of the model ::: -### model_serving_endpoints._name_.config.served_entities.external_model.google_cloud_vertex_ai_config +### jobs._name_.tasks.power_bi_task.tables + +**`Type: Sequence`** + +The tables to be exported to Power BI + + + +:::list-table + +- - Key + - Type + - Description + +- - `catalog` + - String + - The catalog name in Databricks + +- - `name` + - String + - The table name in Databricks + +- - `schema` + - String + - The schema name in Databricks + +- - `storage_mode` + - String + - The Power BI storage mode of the table + +::: + + +### jobs._name_.tasks.python_wheel_task **`Type: Map`** -Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'. +The task runs a Python wheel when the `python_wheel_task` field is present. @@ -6330,30 +6146,2330 @@ Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-ve - Type - Description -- - `private_key` +- - `entry_point` - String - - The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys]. If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext` [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys + - Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()` -- - `private_key_plaintext` +- - `named_parameters` + - Map + - Command-line parameters passed to Python wheel task in the form of `["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty if `parameters` is not null. + +- - `package_name` - String - - The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys]. If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`. [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys + - Name of the package to execute -- - `project_id` +- - `parameters` + - Sequence + - Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null. + +::: + + +### jobs._name_.tasks.run_job_task + +**`Type: Map`** + +The task triggers another job when the `run_job_task` field is present. + + + +:::list-table + +- - Key + - Type + - Description + +- - `job_id` + - Integer + - ID of the job to trigger. + +- - `job_parameters` + - Map + - Job-level parameters used to trigger the job. + +- - `pipeline_params` + - Map + - Controls whether the pipeline should perform a full refresh. See [\_](#jobsnametasksrun_job_taskpipeline_params). + +::: + + +### jobs._name_.tasks.run_job_task.pipeline_params + +**`Type: Map`** + +Controls whether the pipeline should perform a full refresh + + + +:::list-table + +- - Key + - Type + - Description + +- - `full_refresh` + - Boolean + - If true, triggers a full refresh on the delta live table. + +::: + + +### jobs._name_.tasks.spark_jar_task + +**`Type: Map`** + +The task runs a JAR when the `spark_jar_task` field is present. + + + +:::list-table + +- - Key + - Type + - Description + +- - `jar_uri` - String - - This is the Google Cloud project id that the service account is associated with. + - This field is deprecated -- - `region` +- - `main_class_name` + - String + - The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library. The code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail. + +- - `parameters` + - Sequence + - Parameters passed to the main method. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + +- - `run_as_repl` + - Boolean + - This field is deprecated + +::: + + +### jobs._name_.tasks.spark_python_task + +**`Type: Map`** + +The task runs a Python file when the `spark_python_task` field is present. + + + +:::list-table + +- - Key + - Type + - Description + +- - `parameters` + - Sequence + - Command line parameters passed to the Python file. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + +- - `python_file` + - String + - The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required. + +- - `source` + - String + - Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local Databricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`, the Python file will be retrieved from a Git repository defined in `git_source`. * `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI. * `GIT`: The Python file is located in a remote Git repository. + +::: + + +### jobs._name_.tasks.sql_task + +**`Type: Map`** + +The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present. + + + +:::list-table + +- - Key + - Type + - Description + +- - `alert` + - Map + - If alert, indicates that this job must refresh a SQL alert. See [\_](#jobsnametaskssql_taskalert). + +- - `dashboard` + - Map + - If dashboard, indicates that this job must refresh a SQL dashboard. See [\_](#jobsnametaskssql_taskdashboard). + +- - `file` + - Map + - If file, indicates that this job runs a SQL file in a remote Git repository. See [\_](#jobsnametaskssql_taskfile). + +- - `parameters` + - Map + - Parameters to be used for each run of this job. The SQL alert task does not support custom parameters. + +- - `query` + - Map + - If query, indicates that this job must execute a SQL query. See [\_](#jobsnametaskssql_taskquery). + +- - `warehouse_id` + - String + - The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs. + +::: + + +### jobs._name_.tasks.sql_task.alert + +**`Type: Map`** + +If alert, indicates that this job must refresh a SQL alert. + + + +:::list-table + +- - Key + - Type + - Description + +- - `alert_id` + - String + - The canonical identifier of the SQL alert. + +- - `pause_subscriptions` + - Boolean + - If true, the alert notifications are not sent to subscribers. + +- - `subscriptions` + - Sequence + - If specified, alert notifications are sent to subscribers. See [\_](#jobsnametaskssql_taskalertsubscriptions). + +::: + + +### jobs._name_.tasks.sql_task.alert.subscriptions + +**`Type: Sequence`** + +If specified, alert notifications are sent to subscribers. + + + +:::list-table + +- - Key + - Type + - Description + +- - `destination_id` + - String + - The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications. + +- - `user_name` + - String + - The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications. + +::: + + +### jobs._name_.tasks.sql_task.dashboard + +**`Type: Map`** + +If dashboard, indicates that this job must refresh a SQL dashboard. + + + +:::list-table + +- - Key + - Type + - Description + +- - `custom_subject` + - String + - Subject of the email sent to subscribers of this task. + +- - `dashboard_id` + - String + - The canonical identifier of the SQL dashboard. + +- - `pause_subscriptions` + - Boolean + - If true, the dashboard snapshot is not taken, and emails are not sent to subscribers. + +- - `subscriptions` + - Sequence + - If specified, dashboard snapshots are sent to subscriptions. See [\_](#jobsnametaskssql_taskdashboardsubscriptions). + +::: + + +### jobs._name_.tasks.sql_task.dashboard.subscriptions + +**`Type: Sequence`** + +If specified, dashboard snapshots are sent to subscriptions. + + + +:::list-table + +- - Key + - Type + - Description + +- - `destination_id` + - String + - The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications. + +- - `user_name` + - String + - The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications. + +::: + + +### jobs._name_.tasks.sql_task.file + +**`Type: Map`** + +If file, indicates that this job runs a SQL file in a remote Git repository. + + + +:::list-table + +- - Key + - Type + - Description + +- - `path` + - String + - Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths. + +- - `source` + - String + - Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved from the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: SQL file is located in Databricks workspace. * `GIT`: SQL file is located in cloud Git provider. + +::: + + +### jobs._name_.tasks.sql_task.query + +**`Type: Map`** + +If query, indicates that this job must execute a SQL query. + + + +:::list-table + +- - Key + - Type + - Description + +- - `query_id` + - String + - The canonical identifier of the SQL query. + +::: + + +### jobs._name_.tasks.webhook_notifications + +**`Type: Map`** + +A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications. + + + +:::list-table + +- - Key + - Type + - Description + +- - `on_duration_warning_threshold_exceeded` + - Sequence + - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. See [\_](#jobsnametaskswebhook_notificationson_duration_warning_threshold_exceeded). + +- - `on_failure` + - Sequence + - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. See [\_](#jobsnametaskswebhook_notificationson_failure). + +- - `on_start` + - Sequence + - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. See [\_](#jobsnametaskswebhook_notificationson_start). + +- - `on_streaming_backlog_exceeded` + - Sequence + - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. See [\_](#jobsnametaskswebhook_notificationson_streaming_backlog_exceeded). + +- - `on_success` + - Sequence + - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. See [\_](#jobsnametaskswebhook_notificationson_success). + +::: + + +### jobs._name_.tasks.webhook_notifications.on_duration_warning_threshold_exceeded + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. + + + +:::list-table + +- - Key + - Type + - Description + +- - `id` + - String + - + +::: + + +### jobs._name_.tasks.webhook_notifications.on_failure + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. + + + +:::list-table + +- - Key + - Type + - Description + +- - `id` + - String + - + +::: + + +### jobs._name_.tasks.webhook_notifications.on_start + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. + + + +:::list-table + +- - Key + - Type + - Description + +- - `id` + - String + - + +::: + + +### jobs._name_.tasks.webhook_notifications.on_streaming_backlog_exceeded + +**`Type: Sequence`** + +An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. +Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. +Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. +A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. + + + +:::list-table + +- - Key + - Type + - Description + +- - `id` + - String + - + +::: + + +### jobs._name_.tasks.webhook_notifications.on_success + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. + + + +:::list-table + +- - Key + - Type + - Description + +- - `id` + - String + - + +::: + + +### jobs._name_.trigger + +**`Type: Map`** + +A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + + + +:::list-table + +- - Key + - Type + - Description + +- - `file_arrival` + - Map + - File arrival trigger settings. See [\_](#jobsnametriggerfile_arrival). + +- - `pause_status` + - String + - Whether this trigger is paused or not. + +- - `periodic` + - Map + - Periodic trigger settings. See [\_](#jobsnametriggerperiodic). + +- - `table_update` + - Map + - See [\_](#jobsnametriggertable_update). + +::: + + +### jobs._name_.trigger.file_arrival + +**`Type: Map`** + +File arrival trigger settings. + + + +:::list-table + +- - Key + - Type + - Description + +- - `min_time_between_triggers_seconds` + - Integer + - If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds + +- - `url` + - String + - URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. + +- - `wait_after_last_change_seconds` + - Integer + - If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. + +::: + + +### jobs._name_.trigger.periodic + +**`Type: Map`** + +Periodic trigger settings. + + + +:::list-table + +- - Key + - Type + - Description + +- - `interval` + - Integer + - The interval at which the trigger should run. + +- - `unit` + - String + - The unit of time for the interval. + +::: + + +### jobs._name_.trigger.table_update + +**`Type: Map`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `condition` + - String + - The table(s) condition based on which to trigger a job run. + +- - `min_time_between_triggers_seconds` + - Integer + - If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. + +- - `table_names` + - Sequence + - A list of tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. + +- - `wait_after_last_change_seconds` + - Integer + - If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. + +::: + + +### jobs._name_.webhook_notifications + +**`Type: Map`** + +A collection of system notification IDs to notify when runs of this job begin or complete. + + + +:::list-table + +- - Key + - Type + - Description + +- - `on_duration_warning_threshold_exceeded` + - Sequence + - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. See [\_](#jobsnamewebhook_notificationson_duration_warning_threshold_exceeded). + +- - `on_failure` + - Sequence + - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. See [\_](#jobsnamewebhook_notificationson_failure). + +- - `on_start` + - Sequence + - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. See [\_](#jobsnamewebhook_notificationson_start). + +- - `on_streaming_backlog_exceeded` + - Sequence + - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. See [\_](#jobsnamewebhook_notificationson_streaming_backlog_exceeded). + +- - `on_success` + - Sequence + - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. See [\_](#jobsnamewebhook_notificationson_success). + +::: + + +### jobs._name_.webhook_notifications.on_duration_warning_threshold_exceeded + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. + + + +:::list-table + +- - Key + - Type + - Description + +- - `id` + - String + - + +::: + + +### jobs._name_.webhook_notifications.on_failure + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. + + + +:::list-table + +- - Key + - Type + - Description + +- - `id` + - String + - + +::: + + +### jobs._name_.webhook_notifications.on_start + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. + + + +:::list-table + +- - Key + - Type + - Description + +- - `id` + - String + - + +::: + + +### jobs._name_.webhook_notifications.on_streaming_backlog_exceeded + +**`Type: Sequence`** + +An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. +Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. +Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. +A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. + + + +:::list-table + +- - Key + - Type + - Description + +- - `id` + - String + - + +::: + + +### jobs._name_.webhook_notifications.on_success + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. + + + +:::list-table + +- - Key + - Type + - Description + +- - `id` + - String + - + +::: + + +## model_serving_endpoints + +**`Type: Map`** + +The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](/machine-learning/model-serving/manage-serving-endpoints.md). + +```yaml +model_serving_endpoints: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `ai_gateway` + - Map + - The AI Gateway configuration for the serving endpoint. NOTE: External model, provisioned throughput, and pay-per-token endpoints are fully supported; agent endpoints currently only support inference tables. See [\_](#model_serving_endpointsnameai_gateway). + +- - `budget_policy_id` + - String + - The budget policy to be applied to the serving endpoint. + +- - `config` + - Map + - The core config of the serving endpoint. See [\_](#model_serving_endpointsnameconfig). + +- - `description` + - String + - + +- - `email_notifications` + - Map + - Email notification settings. See [\_](#model_serving_endpointsnameemail_notifications). + +- - `lifecycle` + - Map + - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#model_serving_endpointsnamelifecycle). + +- - `name` + - String + - The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. + +- - `permissions` + - Sequence + - See [\_](#model_serving_endpointsnamepermissions). + +- - `rate_limits` + - Sequence + - This field is deprecated + +- - `route_optimized` + - Boolean + - Enable route optimization for the serving endpoint. + +- - `tags` + - Sequence + - Tags to be attached to the serving endpoint and automatically propagated to billing logs. See [\_](#model_serving_endpointsnametags). + +::: + + +**Example** + +The following example defines a Unity Catalog model serving endpoint: + +```yaml +resources: + model_serving_endpoints: + uc_model_serving_endpoint: + name: "uc-model-endpoint" + config: + served_entities: + - entity_name: "myCatalog.mySchema.my-ads-model" + entity_version: "10" + workload_size: "Small" + scale_to_zero_enabled: "true" + traffic_config: + routes: + - served_model_name: "my-ads-model-10" + traffic_percentage: "100" + tags: + - key: "team" + value: "data science" +``` + +### model_serving_endpoints._name_.ai_gateway + +**`Type: Map`** + +The AI Gateway configuration for the serving endpoint. NOTE: External model, provisioned throughput, and pay-per-token endpoints are fully supported; agent endpoints currently only support inference tables. + + + +:::list-table + +- - Key + - Type + - Description + +- - `fallback_config` + - Map + - Configuration for traffic fallback which auto fallbacks to other served entities if the request to a served entity fails with certain error codes, to increase availability. See [\_](#model_serving_endpointsnameai_gatewayfallback_config). + +- - `guardrails` + - Map + - Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. See [\_](#model_serving_endpointsnameai_gatewayguardrails). + +- - `inference_table_config` + - Map + - Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. See [\_](#model_serving_endpointsnameai_gatewayinference_table_config). + +- - `rate_limits` + - Sequence + - Configuration for rate limits which can be set to limit endpoint traffic. See [\_](#model_serving_endpointsnameai_gatewayrate_limits). + +- - `usage_tracking_config` + - Map + - Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. See [\_](#model_serving_endpointsnameai_gatewayusage_tracking_config). + +::: + + +### model_serving_endpoints._name_.ai_gateway.fallback_config + +**`Type: Map`** + +Configuration for traffic fallback which auto fallbacks to other served entities if the request to a served +entity fails with certain error codes, to increase availability. + + + +:::list-table + +- - Key + - Type + - Description + +- - `enabled` + - Boolean + - Whether to enable traffic fallback. When a served entity in the serving endpoint returns specific error codes (e.g. 500), the request will automatically be round-robin attempted with other served entities in the same endpoint, following the order of served entity list, until a successful response is returned. If all attempts fail, return the last response with the error code. + +::: + + +### model_serving_endpoints._name_.ai_gateway.guardrails + +**`Type: Map`** + +Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. + + + +:::list-table + +- - Key + - Type + - Description + +- - `input` + - Map + - Configuration for input guardrail filters. See [\_](#model_serving_endpointsnameai_gatewayguardrailsinput). + +- - `output` + - Map + - Configuration for output guardrail filters. See [\_](#model_serving_endpointsnameai_gatewayguardrailsoutput). + +::: + + +### model_serving_endpoints._name_.ai_gateway.guardrails.input + +**`Type: Map`** + +Configuration for input guardrail filters. + + + +:::list-table + +- - Key + - Type + - Description + +- - `invalid_keywords` + - Sequence + - This field is deprecated + +- - `pii` + - Map + - Configuration for guardrail PII filter. See [\_](#model_serving_endpointsnameai_gatewayguardrailsinputpii). + +- - `safety` + - Boolean + - Indicates whether the safety filter is enabled. + +- - `valid_topics` + - Sequence + - This field is deprecated + +::: + + +### model_serving_endpoints._name_.ai_gateway.guardrails.input.pii + +**`Type: Map`** + +Configuration for guardrail PII filter. + + + +:::list-table + +- - Key + - Type + - Description + +- - `behavior` + - String + - Configuration for input guardrail filters. + +::: + + +### model_serving_endpoints._name_.ai_gateway.guardrails.output + +**`Type: Map`** + +Configuration for output guardrail filters. + + + +:::list-table + +- - Key + - Type + - Description + +- - `invalid_keywords` + - Sequence + - This field is deprecated + +- - `pii` + - Map + - Configuration for guardrail PII filter. See [\_](#model_serving_endpointsnameai_gatewayguardrailsoutputpii). + +- - `safety` + - Boolean + - Indicates whether the safety filter is enabled. + +- - `valid_topics` + - Sequence + - This field is deprecated + +::: + + +### model_serving_endpoints._name_.ai_gateway.guardrails.output.pii + +**`Type: Map`** + +Configuration for guardrail PII filter. + + + +:::list-table + +- - Key + - Type + - Description + +- - `behavior` + - String + - Configuration for input guardrail filters. + +::: + + +### model_serving_endpoints._name_.ai_gateway.inference_table_config + +**`Type: Map`** + +Configuration for payload logging using inference tables. +Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. + + + +:::list-table + +- - Key + - Type + - Description + +- - `catalog_name` + - String + - The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. + +- - `enabled` + - Boolean + - Indicates whether the inference table is enabled. + +- - `schema_name` + - String + - The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. + +- - `table_name_prefix` + - String + - The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. + +::: + + +### model_serving_endpoints._name_.ai_gateway.rate_limits + +**`Type: Sequence`** + +Configuration for rate limits which can be set to limit endpoint traffic. + + + +:::list-table + +- - Key + - Type + - Description + +- - `calls` + - Integer + - Used to specify how many calls are allowed for a key within the renewal_period. + +- - `key` + - String + - Key field for a rate limit. Currently, 'user', 'user_group, 'service_principal', and 'endpoint' are supported, with 'endpoint' being the default if not specified. + +- - `principal` + - String + - Principal field for a user, user group, or service principal to apply rate limiting to. Accepts a user email, group name, or service principal application ID. + +- - `renewal_period` + - String + - Renewal period field for a rate limit. Currently, only 'minute' is supported. + +- - `tokens` + - Integer + - Used to specify how many tokens are allowed for a key within the renewal_period. + +::: + + +### model_serving_endpoints._name_.ai_gateway.usage_tracking_config + +**`Type: Map`** + +Configuration to enable usage tracking using system tables. +These tables allow you to monitor operational usage on endpoints and their associated costs. + + + +:::list-table + +- - Key + - Type + - Description + +- - `enabled` + - Boolean + - Whether to enable usage tracking. + +::: + + +### model_serving_endpoints._name_.config + +**`Type: Map`** + +The core config of the serving endpoint. + + + +:::list-table + +- - Key + - Type + - Description + +- - `auto_capture_config` + - Map + - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or updating existing provisioned throughput endpoints that never have inference table configured; in these cases please use AI Gateway to manage inference tables. See [\_](#model_serving_endpointsnameconfigauto_capture_config). + +- - `served_entities` + - Sequence + - The list of served entities under the serving endpoint config. See [\_](#model_serving_endpointsnameconfigserved_entities). + +- - `served_models` + - Sequence + - (Deprecated, use served_entities instead) The list of served models under the serving endpoint config. See [\_](#model_serving_endpointsnameconfigserved_models). + +- - `traffic_config` + - Map + - The traffic configuration associated with the serving endpoint config. See [\_](#model_serving_endpointsnameconfigtraffic_config). + +::: + + +### model_serving_endpoints._name_.config.auto_capture_config + +**`Type: Map`** + +Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. +Note: this field is deprecated for creating new provisioned throughput endpoints, +or updating existing provisioned throughput endpoints that never have inference table configured; +in these cases please use AI Gateway to manage inference tables. + + + +:::list-table + +- - Key + - Type + - Description + +- - `catalog_name` + - String + - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. + +- - `enabled` + - Boolean + - Indicates whether the inference table is enabled. + +- - `schema_name` + - String + - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. + +- - `table_name_prefix` + - String + - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. + +::: + + +### model_serving_endpoints._name_.config.served_entities + +**`Type: Sequence`** + +The list of served entities under the serving endpoint config. + + + +:::list-table + +- - Key + - Type + - Description + +- - `burst_scaling_enabled` + - Boolean + - Whether burst scaling is enabled. When enabled (default), the endpoint can automatically scale up beyond provisioned capacity to handle traffic spikes. When disabled, the endpoint maintains fixed capacity at provisioned_model_units. + +- - `entity_name` + - String + - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of **catalog_name.schema_name.model_name**. + +- - `entity_version` + - String + - + +- - `environment_vars` + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` + +- - `external_model` + - Map + - The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_model). + +- - `instance_profile_arn` + - String + - ARN of the instance profile that the served entity uses to access AWS resources. + +- - `max_provisioned_concurrency` + - Integer + - The maximum provisioned concurrency that the endpoint can scale up to. Do not use if workload_size is specified. + +- - `max_provisioned_throughput` + - Integer + - The maximum tokens per second that the endpoint can scale up to. + +- - `min_provisioned_concurrency` + - Integer + - The minimum provisioned concurrency that the endpoint can scale down to. Do not use if workload_size is specified. + +- - `min_provisioned_throughput` + - Integer + - The minimum tokens per second that the endpoint can scale down to. + +- - `name` + - String + - The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. + +- - `provisioned_model_units` + - Integer + - The number of model units provisioned. + +- - `scale_to_zero_enabled` + - Boolean + - Whether the compute resources for the served entity should scale down to zero. + +- - `workload_size` + - String + - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). Additional custom workload sizes can also be used when available in the workspace. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified. + +- - `workload_type` + - String + - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model + +**`Type: Map`** + +The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same. + + + +:::list-table + +- - Key + - Type + - Description + +- - `ai21labs_config` + - Map + - AI21Labs Config. Only required if the provider is 'ai21labs'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelai21labs_config). + +- - `amazon_bedrock_config` + - Map + - Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelamazon_bedrock_config). + +- - `anthropic_config` + - Map + - Anthropic Config. Only required if the provider is 'anthropic'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelanthropic_config). + +- - `cohere_config` + - Map + - Cohere Config. Only required if the provider is 'cohere'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcohere_config). + +- - `custom_provider_config` + - Map + - Custom Provider Config. Only required if the provider is 'custom'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcustom_provider_config). + +- - `databricks_model_serving_config` + - Map + - Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modeldatabricks_model_serving_config). + +- - `google_cloud_vertex_ai_config` + - Map + - Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelgoogle_cloud_vertex_ai_config). + +- - `name` + - String + - The name of the external model. + +- - `openai_config` + - Map + - OpenAI Config. Only required if the provider is 'openai'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelopenai_config). + +- - `palm_config` + - Map + - PaLM Config. Only required if the provider is 'palm'. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelpalm_config). + +- - `provider` + - String + - The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', 'palm', and 'custom'. + +- - `task` + - String + - The task type of the external model. + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model.ai21labs_config + +**`Type: Map`** + +AI21Labs Config. Only required if the provider is 'ai21labs'. + + + +:::list-table + +- - Key + - Type + - Description + +- - `ai21labs_api_key` + - String + - The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. + +- - `ai21labs_api_key_plaintext` + - String + - An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model.amazon_bedrock_config + +**`Type: Map`** + +Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. + + + +:::list-table + +- - Key + - Type + - Description + +- - `aws_access_key_id` + - String + - The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id_plaintext`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. + +- - `aws_access_key_id_plaintext` + - String + - An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. + +- - `aws_region` + - String + - The AWS region to use. Bedrock has to be enabled there. + +- - `aws_secret_access_key` + - String + - The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. + +- - `aws_secret_access_key_plaintext` + - String + - An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. + +- - `bedrock_provider` + - String + - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon. + +- - `instance_profile_arn` + - String + - ARN of the instance profile that the external model will use to access AWS resources. You must authenticate using an instance profile or access keys. If you prefer to authenticate using access keys, see `aws_access_key_id`, `aws_access_key_id_plaintext`, `aws_secret_access_key` and `aws_secret_access_key_plaintext`. + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model.anthropic_config + +**`Type: Map`** + +Anthropic Config. Only required if the provider is 'anthropic'. + + + +:::list-table + +- - Key + - Type + - Description + +- - `anthropic_api_key` + - String + - The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. + +- - `anthropic_api_key_plaintext` + - String + - The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model.cohere_config + +**`Type: Map`** + +Cohere Config. Only required if the provider is 'cohere'. + + + +:::list-table + +- - Key + - Type + - Description + +- - `cohere_api_base` + - String + - This is an optional field to provide a customized base URL for the Cohere API. If left unspecified, the standard Cohere base URL is used. + +- - `cohere_api_key` + - String + - The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. + +- - `cohere_api_key_plaintext` + - String + - The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model.custom_provider_config + +**`Type: Map`** + +Custom Provider Config. Only required if the provider is 'custom'. + + + +:::list-table + +- - Key + - Type + - Description + +- - `api_key_auth` + - Map + - This is a field to provide API key authentication for the custom provider API. You can only specify one authentication method. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcustom_provider_configapi_key_auth). + +- - `bearer_token_auth` + - Map + - This is a field to provide bearer token authentication for the custom provider API. You can only specify one authentication method. See [\_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcustom_provider_configbearer_token_auth). + +- - `custom_provider_url` + - String + - This is a field to provide the URL of the custom provider API. + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model.custom_provider_config.api_key_auth + +**`Type: Map`** + +This is a field to provide API key authentication for the custom provider API. +You can only specify one authentication method. + + + +:::list-table + +- - Key + - Type + - Description + +- - `key` + - String + - The name of the API key parameter used for authentication. + +- - `value` + - String + - The Databricks secret key reference for an API Key. If you prefer to paste your token directly, see `value_plaintext`. + +- - `value_plaintext` + - String + - The API Key provided as a plaintext string. If you prefer to reference your token using Databricks Secrets, see `value`. + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model.custom_provider_config.bearer_token_auth + +**`Type: Map`** + +This is a field to provide bearer token authentication for the custom provider API. +You can only specify one authentication method. + + + +:::list-table + +- - Key + - Type + - Description + +- - `token` + - String + - The Databricks secret key reference for a token. If you prefer to paste your token directly, see `token_plaintext`. + +- - `token_plaintext` + - String + - The token provided as a plaintext string. If you prefer to reference your token using Databricks Secrets, see `token`. + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model.databricks_model_serving_config + +**`Type: Map`** + +Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'. + + + +:::list-table + +- - Key + - Type + - Description + +- - `databricks_api_token` + - String + - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model. If you prefer to paste your API key directly, see `databricks_api_token_plaintext`. You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. + +- - `databricks_api_token_plaintext` + - String + - The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `databricks_api_token`. You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. + +- - `databricks_workspace_url` + - String + - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model. + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model.google_cloud_vertex_ai_config + +**`Type: Map`** + +Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'. + + + +:::list-table + +- - Key + - Type + - Description + +- - `private_key` + - String + - The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys]. If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext` [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys + +- - `private_key_plaintext` + - String + - The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys]. If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`. [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys + +- - `project_id` + - String + - This is the Google Cloud project id that the service account is associated with. + +- - `region` + - String + - This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more details. Some models are only available in specific regions. [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model.openai_config + +**`Type: Map`** + +OpenAI Config. Only required if the provider is 'openai'. + + + +:::list-table + +- - Key + - Type + - Description + +- - `microsoft_entra_client_id` + - String + - This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID. + +- - `microsoft_entra_client_secret` + - String + - The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication. If you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`. You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. + +- - `microsoft_entra_client_secret_plaintext` + - String + - The client secret used for Microsoft Entra ID authentication provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`. You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. + +- - `microsoft_entra_tenant_id` + - String + - This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID. + +- - `openai_api_base` + - String + - This is a field to provide a customized base URl for the OpenAI API. For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure. For other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used. + +- - `openai_api_key` + - String + - The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. + +- - `openai_api_key_plaintext` + - String + - The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. + +- - `openai_api_type` + - String + - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread. + +- - `openai_api_version` + - String + - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date. + +- - `openai_deployment_name` + - String + - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service. + +- - `openai_organization` + - String + - This is an optional field to specify the organization in OpenAI or Azure OpenAI. + +::: + + +### model_serving_endpoints._name_.config.served_entities.external_model.palm_config + +**`Type: Map`** + +PaLM Config. Only required if the provider is 'palm'. + + + +:::list-table + +- - Key + - Type + - Description + +- - `palm_api_key` + - String + - The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. + +- - `palm_api_key_plaintext` + - String + - The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. + +::: + + +### model_serving_endpoints._name_.config.served_models + +**`Type: Sequence`** + +(Deprecated, use served_entities instead) The list of served models under the serving endpoint config. + + + +:::list-table + +- - Key + - Type + - Description + +- - `burst_scaling_enabled` + - Boolean + - Whether burst scaling is enabled. When enabled (default), the endpoint can automatically scale up beyond provisioned capacity to handle traffic spikes. When disabled, the endpoint maintains fixed capacity at provisioned_model_units. + +- - `environment_vars` + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` + +- - `instance_profile_arn` + - String + - ARN of the instance profile that the served entity uses to access AWS resources. + +- - `max_provisioned_concurrency` + - Integer + - The maximum provisioned concurrency that the endpoint can scale up to. Do not use if workload_size is specified. + +- - `max_provisioned_throughput` + - Integer + - The maximum tokens per second that the endpoint can scale up to. + +- - `min_provisioned_concurrency` + - Integer + - The minimum provisioned concurrency that the endpoint can scale down to. Do not use if workload_size is specified. + +- - `min_provisioned_throughput` + - Integer + - The minimum tokens per second that the endpoint can scale down to. + +- - `model_name` + - String + - + +- - `model_version` + - String + - + +- - `name` + - String + - The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. + +- - `provisioned_model_units` + - Integer + - The number of model units provisioned. + +- - `scale_to_zero_enabled` + - Boolean + - Whether the compute resources for the served entity should scale down to zero. + +- - `workload_size` + - String + - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). Additional custom workload sizes can also be used when available in the workspace. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified. + +- - `workload_type` + - String + - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). + +::: + + +### model_serving_endpoints._name_.config.traffic_config + +**`Type: Map`** + +The traffic configuration associated with the serving endpoint config. + + + +:::list-table + +- - Key + - Type + - Description + +- - `routes` + - Sequence + - The list of routes that define traffic to each served entity. See [\_](#model_serving_endpointsnameconfigtraffic_configroutes). + +::: + + +### model_serving_endpoints._name_.config.traffic_config.routes + +**`Type: Sequence`** + +The list of routes that define traffic to each served entity. + + + +:::list-table + +- - Key + - Type + - Description + +- - `served_entity_name` + - String + - + +- - `served_model_name` + - String + - The name of the served model this route configures traffic for. + +- - `traffic_percentage` + - Integer + - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive. + +::: + + +### model_serving_endpoints._name_.email_notifications + +**`Type: Map`** + +Email notification settings. + + + +:::list-table + +- - Key + - Type + - Description + +- - `on_update_failure` + - Sequence + - A list of email addresses to be notified when an endpoint fails to update its configuration or state. + +- - `on_update_success` + - Sequence + - A list of email addresses to be notified when an endpoint successfully updates its configuration or state. + +::: + + +### model_serving_endpoints._name_.lifecycle + +**`Type: Map`** + +Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### model_serving_endpoints._name_.permissions + +**`Type: Sequence`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `group_name` + - String + - + +- - `level` + - String + - Permission level + +- - `service_principal_name` + - String + - + +- - `user_name` + - String + - + +::: + + +### model_serving_endpoints._name_.tags + +**`Type: Sequence`** + +Tags to be attached to the serving endpoint and automatically propagated to billing logs. + + + +:::list-table + +- - Key + - Type + - Description + +- - `key` + - String + - Key field for a serving endpoint tag. + +- - `value` + - String + - Optional value field for a serving endpoint tag. + +::: + + +## models + +**`Type: Map`** + +The model resource allows you to define [legacy models](/api/workspace/modelregistry/createmodel) in bundles. Databricks recommends you use Unity Catalog [registered models](#registered-model) instead. + +```yaml +models: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `description` + - String + - Optional description for registered model. + +- - `lifecycle` + - Map + - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#modelsnamelifecycle). + +- - `name` + - String + - Register models under this name + +- - `permissions` + - Sequence + - See [\_](#modelsnamepermissions). + +- - `tags` + - Sequence + - Additional metadata for registered model. See [\_](#modelsnametags). + +::: + + +### models._name_.lifecycle + +**`Type: Map`** + +Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + + + +:::list-table + +- - Key + - Type + - Description + +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. + +::: + + +### models._name_.permissions + +**`Type: Sequence`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `group_name` + - String + - + +- - `level` + - String + - Permission level + +- - `service_principal_name` + - String + - + +- - `user_name` + - String + - + +::: + + +### models._name_.tags + +**`Type: Sequence`** + +Additional metadata for registered model. + + + +:::list-table + +- - Key + - Type + - Description + +- - `key` + - String + - The tag key. + +- - `value` + - String + - The tag value. + +::: + + +## pipelines + +**`Type: Map`** + +The pipeline resource allows you to create Spark Declarative [Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). + +```yaml +pipelines: + : + : +``` + + +:::list-table + +- - Key + - Type + - Description + +- - `allow_duplicate_names` + - Boolean + - If false, deployment will fail if name conflicts with that of another pipeline. + +- - `budget_policy_id` + - String + - Budget policy of this pipeline. + +- - `catalog` + - String + - A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. + +- - `channel` + - String + - DLT Release Channel that specifies which version to use. + +- - `clusters` + - Sequence + - Cluster settings for this pipeline deployment. See [\_](#pipelinesnameclusters). + +- - `configuration` + - Map + - String-String configuration for this pipeline execution. + +- - `continuous` + - Boolean + - Whether the pipeline is continuous or triggered. This replaces `trigger`. + +- - `deployment` + - Map + - Deployment type of this pipeline. See [\_](#pipelinesnamedeployment). + +- - `development` + - Boolean + - Whether the pipeline is in Development mode. Defaults to false. + +- - `dry_run` + - Boolean + - + +- - `edition` + - String + - Pipeline product edition. + +- - `environment` + - Map + - Environment specification for this pipeline used to install dependencies. See [\_](#pipelinesnameenvironment). + +- - `event_log` + - Map + - Event log configuration for this pipeline. See [\_](#pipelinesnameevent_log). + +- - `filters` + - Map + - Filters on which Pipeline packages to include in the deployed graph. See [\_](#pipelinesnamefilters). + +- - `id` + - String + - Unique identifier for this pipeline. + +- - `ingestion_definition` + - Map + - The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'schema', 'target', or 'catalog' settings. See [\_](#pipelinesnameingestion_definition). + +- - `libraries` + - Sequence + - Libraries or code needed by this deployment. See [\_](#pipelinesnamelibraries). + +- - `lifecycle` + - Map + - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#pipelinesnamelifecycle). + +- - `name` + - String + - Friendly identifier for this pipeline. + +- - `notifications` + - Sequence + - List of notification settings for this pipeline. See [\_](#pipelinesnamenotifications). + +- - `permissions` + - Sequence + - See [\_](#pipelinesnamepermissions). + +- - `photon` + - Boolean + - Whether Photon is enabled for this pipeline. + +- - `root_path` + - String + - Root path for this pipeline. This is used as the root directory when editing the pipeline in the Databricks user interface and it is added to sys.path when executing Python sources during pipeline execution. + +- - `run_as` + - Map + - Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline. Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. See [\_](#pipelinesnamerun_as). + +- - `schema` + - String + - The default schema (database) where tables are read from or published to. + +- - `serverless` + - Boolean + - Whether serverless compute is enabled for this pipeline. + +- - `storage` + - String + - DBFS root directory for storing checkpoints and tables. + +- - `tags` + - Map + - A map of tags associated with the pipeline. These are forwarded to the cluster as cluster tags, and are therefore subject to the same limitations. A maximum of 25 tags can be added to the pipeline. + +- - `target` + - String + - This field is deprecated + +- - `trigger` + - Map + - Use continuous instead + +::: + + +**Example** + +The following example defines a pipeline with the resource key `hello-pipeline`: + +```yaml +resources: + pipelines: + hello-pipeline: + name: hello-pipeline + clusters: + - label: default + num_workers: 1 + development: true + continuous: false + channel: CURRENT + edition: CORE + photon: false + libraries: + - notebook: + path: ./pipeline.py +``` + +### pipelines._name_.clusters + +**`Type: Sequence`** + +Cluster settings for this pipeline deployment. + + + +:::list-table + +- - Key + - Type + - Description + +- - `apply_policy_default_values` + - Boolean + - Note: This field won't be persisted. Only API users will check this field. + +- - `autoscale` + - Map + - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#pipelinesnameclustersautoscale). + +- - `aws_attributes` + - Map + - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#pipelinesnameclustersaws_attributes). + +- - `azure_attributes` + - Map + - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#pipelinesnameclustersazure_attributes). + +- - `cluster_log_conf` + - Map + - The configuration for delivering spark logs to a long-term storage destination. Only dbfs destinations are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#pipelinesnameclusterscluster_log_conf). + +- - `custom_tags` + - Map + - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + +- - `driver_instance_pool_id` + - String + - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. + +- - `driver_node_type_id` + - String + - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + +- - `enable_local_disk_encryption` + - Boolean + - Whether to enable local disk encryption for the cluster. + +- - `gcp_attributes` + - Map + - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#pipelinesnameclustersgcp_attributes). + +- - `init_scripts` + - Sequence + - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#pipelinesnameclustersinit_scripts). + +- - `instance_pool_id` + - String + - The optional ID of the instance pool to which the cluster belongs. + +- - `label` + - String + - A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`. + +- - `node_type_id` + - String + - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. + +- - `num_workers` + - Integer + - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. + +- - `policy_id` + - String + - The ID of the cluster policy used to create the cluster if applicable. + +- - `spark_conf` + - Map + - An object containing a set of optional, user-specified Spark configuration key-value pairs. See :method:clusters/create for more details. + +- - `spark_env_vars` + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + +- - `ssh_public_keys` + - Sequence + - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. + +::: + + +### pipelines._name_.clusters.autoscale + +**`Type: Map`** + +Parameters needed in order to automatically scale clusters up and down based on load. +Note: autoscaling works best with DB runtime versions 3.0 or later. + + + +:::list-table + +- - Key + - Type + - Description + +- - `max_workers` + - Integer + - The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`. + +- - `min_workers` + - Integer + - The minimum number of workers the cluster can scale down to when underutilized. It is also the initial number of workers the cluster will have after creation. + +- - `mode` - String - - This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more details. Some models are only available in specific regions. [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations + - Databricks Enhanced Autoscaling optimizes cluster utilization by automatically allocating cluster resources based on workload volume, with minimal impact to the data processing latency of your pipelines. Enhanced Autoscaling is available for `updates` clusters only. The legacy autoscaling feature is used for `maintenance` clusters. ::: -### model_serving_endpoints._name_.config.served_entities.external_model.openai_config +### pipelines._name_.clusters.aws_attributes **`Type: Map`** -OpenAI Config. Only required if the provider is 'openai'. +Attributes related to clusters running on Amazon Web Services. +If not specified at cluster creation, a set of default values will be used. @@ -6363,58 +8479,55 @@ OpenAI Config. Only required if the provider is 'openai'. - Type - Description -- - `microsoft_entra_client_id` - - String - - This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID. - -- - `microsoft_entra_client_secret` +- - `availability` - String - - The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication. If you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`. You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. -- - `microsoft_entra_client_secret_plaintext` - - String - - The client secret used for Microsoft Entra ID authentication provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`. You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. +- - `ebs_volume_count` + - Integer + - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. -- - `microsoft_entra_tenant_id` - - String - - This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID. +- - `ebs_volume_iops` + - Integer + - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. -- - `openai_api_base` - - String - - This is a field to provide a customized base URl for the OpenAI API. For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure. For other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used. +- - `ebs_volume_size` + - Integer + - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. -- - `openai_api_key` - - String - - The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. +- - `ebs_volume_throughput` + - Integer + - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. -- - `openai_api_key_plaintext` +- - `ebs_volume_type` - String - - The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. + - All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. -- - `openai_api_type` - - String - - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread. +- - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. -- - `openai_api_version` +- - `instance_profile_arn` - String - - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date. + - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. -- - `openai_deployment_name` - - String - - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service. +- - `spot_bid_price_percent` + - Integer + - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. -- - `openai_organization` +- - `zone_id` - String - - This is an optional field to specify the organization in OpenAI or Azure OpenAI. + - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, the zone "auto" will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. ::: -### model_serving_endpoints._name_.config.served_entities.external_model.palm_config +### pipelines._name_.clusters.azure_attributes **`Type: Map`** -PaLM Config. Only required if the provider is 'palm'. +Attributes related to clusters running on Microsoft Azure. +If not specified at cluster creation, a set of default values will be used. @@ -6424,22 +8537,30 @@ PaLM Config. Only required if the provider is 'palm'. - Type - Description -- - `palm_api_key` +- - `availability` - String - - The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. -- - `palm_api_key_plaintext` - - String - - The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. +- - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + +- - `log_analytics_info` + - Map + - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#pipelinesnameclustersazure_attributeslog_analytics_info). + +- - `spot_bid_max_price` + - Any + - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. ::: -### model_serving_endpoints._name_.config.served_models +### pipelines._name_.clusters.azure_attributes.log_analytics_info -**`Type: Sequence`** +**`Type: Map`** -(Deprecated, use served_entities instead) The list of served models under the serving endpoint config. +Defines values necessary to configure and run Azure Log Analytics agent @@ -6449,66 +8570,56 @@ PaLM Config. Only required if the provider is 'palm'. - Type - Description -- - `environment_vars` - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` +- - `log_analytics_primary_key` + - String + - The primary key for the Azure Log Analytics agent configuration -- - `instance_profile_arn` +- - `log_analytics_workspace_id` - String - - ARN of the instance profile that the served entity uses to access AWS resources. + - The workspace ID for the Azure Log Analytics agent configuration -- - `max_provisioned_concurrency` - - Integer - - The maximum provisioned concurrency that the endpoint can scale up to. Do not use if workload_size is specified. +::: -- - `max_provisioned_throughput` - - Integer - - The maximum tokens per second that the endpoint can scale up to. -- - `min_provisioned_concurrency` - - Integer - - The minimum provisioned concurrency that the endpoint can scale down to. Do not use if workload_size is specified. +### pipelines._name_.clusters.cluster_log_conf -- - `min_provisioned_throughput` - - Integer - - The minimum tokens per second that the endpoint can scale down to. +**`Type: Map`** -- - `model_name` - - String - - +The configuration for delivering spark logs to a long-term storage destination. +Only dbfs destinations are supported. Only one destination can be specified +for one cluster. If the conf is given, the logs will be delivered to the destination every +`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while +the destination of executor logs is `$destination/$clusterId/executor`. -- - `model_version` - - String - - -- - `name` - - String - - The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. -- - `provisioned_model_units` - - Integer - - The number of model units provisioned. +:::list-table -- - `scale_to_zero_enabled` - - Boolean - - Whether the compute resources for the served entity should scale down to zero. +- - Key + - Type + - Description -- - `workload_size` - - String - - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). Additional custom workload sizes can also be used when available in the workspace. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified. +- - `dbfs` + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#pipelinesnameclusterscluster_log_confdbfs). -- - `workload_type` - - String - - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). +- - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#pipelinesnameclusterscluster_log_confs3). + +- - `volumes` + - Map + - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#pipelinesnameclusterscluster_log_confvolumes). ::: -### model_serving_endpoints._name_.config.traffic_config +### pipelines._name_.clusters.cluster_log_conf.dbfs **`Type: Map`** -The traffic configuration associated with the serving endpoint config. +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` @@ -6518,18 +8629,21 @@ The traffic configuration associated with the serving endpoint config. - Type - Description -- - `routes` - - Sequence - - The list of routes that define traffic to each served entity. See [\_](#model_serving_endpointsnameconfigtraffic_configroutes). +- - `destination` + - String + - dbfs destination, e.g. `dbfs:/my/path` ::: -### model_serving_endpoints._name_.config.traffic_config.routes +### pipelines._name_.clusters.cluster_log_conf.s3 -**`Type: Sequence`** +**`Type: Map`** -The list of routes that define traffic to each served entity. +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. @@ -6539,26 +8653,43 @@ The list of routes that define traffic to each served entity. - Type - Description -- - `served_entity_name` +- - `canned_acl` - String - - + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. -- - `served_model_name` +- - `destination` - String - - The name of the served model this route configures traffic for. + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. -- - `traffic_percentage` - - Integer - - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive. +- - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + +- - `encryption_type` + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + +- - `endpoint` + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + +- - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + +- - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. ::: -### model_serving_endpoints._name_.email_notifications +### pipelines._name_.clusters.cluster_log_conf.volumes **`Type: Map`** -Email notification settings. +destination needs to be provided, e.g. +`{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` @@ -6568,22 +8699,19 @@ Email notification settings. - Type - Description -- - `on_update_failure` - - Sequence - - A list of email addresses to be notified when an endpoint fails to update its configuration or state. - -- - `on_update_success` - - Sequence - - A list of email addresses to be notified when an endpoint successfully updates its configuration or state. +- - `destination` + - String + - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` ::: -### model_serving_endpoints._name_.lifecycle +### pipelines._name_.clusters.gcp_attributes **`Type: Map`** -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. +Attributes related to clusters running on Google Cloud Platform. +If not specified at cluster creation, a set of default values will be used. @@ -6593,18 +8721,42 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `prevent_destroy` +- - `availability` + - String + - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. + +- - `boot_disk_size` + - Integer + - Boot disk size in GB + +- - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + +- - `google_service_account` + - String + - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. + +- - `local_ssd_count` + - Integer + - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + +- - `use_preemptible_executors` - Boolean - - Lifecycle setting to prevent the resource from being destroyed. + - This field is deprecated + +- - `zone_id` + - String + - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. ::: -### model_serving_endpoints._name_.permissions +### pipelines._name_.clusters.init_scripts **`Type: Sequence`** - +The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. @@ -6614,30 +8766,42 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `group_name` - - String - - +- - `abfss` + - Map + - Contains the Azure Data Lake Storage destination path. See [\_](#pipelinesnameclustersinit_scriptsabfss). -- - `level` - - String - - +- - `dbfs` + - Map + - This field is deprecated -- - `service_principal_name` - - String - - +- - `file` + - Map + - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#pipelinesnameclustersinit_scriptsfile). -- - `user_name` - - String - - +- - `gcs` + - Map + - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#pipelinesnameclustersinit_scriptsgcs). + +- - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#pipelinesnameclustersinit_scriptss3). + +- - `volumes` + - Map + - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#pipelinesnameclustersinit_scriptsvolumes). + +- - `workspace` + - Map + - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#pipelinesnameclustersinit_scriptsworkspace). ::: -### model_serving_endpoints._name_.tags +### pipelines._name_.clusters.init_scripts.abfss -**`Type: Sequence`** +**`Type: Map`** -Tags to be attached to the serving endpoint and automatically propagated to billing logs. +Contains the Azure Data Lake Storage destination path @@ -6647,28 +8811,20 @@ Tags to be attached to the serving endpoint and automatically propagated to bill - Type - Description -- - `key` - - String - - Key field for a serving endpoint tag. - -- - `value` +- - `destination` - String - - Optional value field for a serving endpoint tag. + - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. ::: -## models +### pipelines._name_.clusters.init_scripts.file **`Type: Map`** -The model resource allows you to define [legacy models](/api/workspace/modelregistry/createmodel) in bundles. Databricks recommends you use Unity Catalog [registered models](#registered-model) instead. +destination needs to be provided, e.g. +`{ "file": { "destination": "file:/my/local/file.sh" } }` -```yaml -models: - : - : -``` :::list-table @@ -6677,34 +8833,19 @@ models: - Type - Description -- - `description` - - String - - Optional description for registered model. - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#modelsnamelifecycle). - -- - `name` +- - `destination` - String - - Register models under this name - -- - `permissions` - - Sequence - - See [\_](#modelsnamepermissions). - -- - `tags` - - Sequence - - Additional metadata for registered model. See [\_](#modelsnametags). + - local file destination, e.g. `file:/my/local/file.sh` ::: -### models._name_.lifecycle +### pipelines._name_.clusters.init_scripts.gcs **`Type: Map`** -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. +destination needs to be provided, e.g. +`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` @@ -6714,18 +8855,21 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `prevent_destroy` - - Boolean - - Lifecycle setting to prevent the resource from being destroyed. +- - `destination` + - String + - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` ::: -### models._name_.permissions +### pipelines._name_.clusters.init_scripts.s3 -**`Type: Sequence`** +**`Type: Map`** - +destination and either the region or endpoint need to be provided. e.g. +`{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. @@ -6735,30 +8879,43 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `group_name` +- - `canned_acl` - String - - + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. -- - `level` +- - `destination` - String - - + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. -- - `service_principal_name` +- - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + +- - `encryption_type` - String - - + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. -- - `user_name` +- - `endpoint` - String - - + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + +- - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + +- - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. ::: -### models._name_.tags +### pipelines._name_.clusters.init_scripts.volumes -**`Type: Sequence`** +**`Type: Map`** -Additional metadata for registered model. +destination needs to be provided. e.g. +`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` @@ -6768,28 +8925,20 @@ Additional metadata for registered model. - Type - Description -- - `key` - - String - - The tag key. - -- - `value` +- - `destination` - String - - The tag value. + - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` ::: -## pipelines +### pipelines._name_.clusters.init_scripts.workspace **`Type: Map`** -The pipeline resource allows you to create Delta Live Tables [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). +destination needs to be provided, e.g. +`{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` -```yaml -pipelines: - : - : -``` :::list-table @@ -6798,148 +8947,43 @@ pipelines: - Type - Description -- - `allow_duplicate_names` - - Boolean - - If false, deployment will fail if name conflicts with that of another pipeline. - -- - `catalog` - - String - - A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. - -- - `channel` - - String - - DLT Release Channel that specifies which version to use. - -- - `clusters` - - Sequence - - Cluster settings for this pipeline deployment. See [\_](#pipelinesnameclusters). - -- - `configuration` - - Map - - String-String configuration for this pipeline execution. - -- - `continuous` - - Boolean - - Whether the pipeline is continuous or triggered. This replaces `trigger`. - -- - `deployment` - - Map - - Deployment type of this pipeline. See [\_](#pipelinesnamedeployment). - -- - `development` - - Boolean - - Whether the pipeline is in Development mode. Defaults to false. - -- - `dry_run` - - Boolean - - - -- - `edition` - - String - - Pipeline product edition. - -- - `environment` - - Map - - Environment specification for this pipeline used to install dependencies. See [\_](#pipelinesnameenvironment). - -- - `event_log` - - Map - - Event log configuration for this pipeline. See [\_](#pipelinesnameevent_log). - -- - `filters` - - Map - - Filters on which Pipeline packages to include in the deployed graph. See [\_](#pipelinesnamefilters). - -- - `id` +- - `destination` - String - - Unique identifier for this pipeline. - -- - `ingestion_definition` - - Map - - The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'schema', 'target', or 'catalog' settings. See [\_](#pipelinesnameingestion_definition). - -- - `libraries` - - Sequence - - Libraries or code needed by this deployment. See [\_](#pipelinesnamelibraries). - -- - `lifecycle` - - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#pipelinesnamelifecycle). + - wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` -- - `name` - - String - - Friendly identifier for this pipeline. +::: -- - `notifications` - - Sequence - - List of notification settings for this pipeline. See [\_](#pipelinesnamenotifications). -- - `permissions` - - Sequence - - See [\_](#pipelinesnamepermissions). +### pipelines._name_.deployment -- - `photon` - - Boolean - - Whether Photon is enabled for this pipeline. +**`Type: Map`** -- - `root_path` - - String - - Root path for this pipeline. This is used as the root directory when editing the pipeline in the Databricks user interface and it is added to sys.path when executing Python sources during pipeline execution. +Deployment type of this pipeline. -- - `schema` - - String - - The default schema (database) where tables are read from or published to. -- - `serverless` - - Boolean - - Whether serverless compute is enabled for this pipeline. -- - `storage` - - String - - DBFS root directory for storing checkpoints and tables. +:::list-table -- - `tags` - - Map - - A map of tags associated with the pipeline. These are forwarded to the cluster as cluster tags, and are therefore subject to the same limitations. A maximum of 25 tags can be added to the pipeline. +- - Key + - Type + - Description -- - `target` +- - `kind` - String - - This field is deprecated + - The deployment method that manages the pipeline. -- - `trigger` - - Map - - Use continuous instead +- - `metadata_file_path` + - String + - The path to the file containing metadata about the deployment. ::: -**Example** - -The following example defines a pipeline with the resource key `hello-pipeline`: - -```yaml -resources: - pipelines: - hello-pipeline: - name: hello-pipeline - clusters: - - label: default - num_workers: 1 - development: true - continuous: false - channel: CURRENT - edition: CORE - photon: false - libraries: - - notebook: - path: ./pipeline.py -``` - -### pipelines._name_.clusters +### pipelines._name_.environment -**`Type: Sequence`** +**`Type: Map`** -Cluster settings for this pipeline deployment. +Environment specification for this pipeline used to install dependencies. @@ -6949,91 +8993,72 @@ Cluster settings for this pipeline deployment. - Type - Description -- - `apply_policy_default_values` - - Boolean - - Note: This field won't be persisted. Only API users will check this field. - -- - `autoscale` - - Map - - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [\_](#pipelinesnameclustersautoscale). +- - `dependencies` + - Sequence + - List of pip dependencies, as supported by the version of pip in this environment. Each dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/ Allowed dependency could be , , (WSFS or Volumes in Databricks), -- - `aws_attributes` - - Map - - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [\_](#pipelinesnameclustersaws_attributes). +::: -- - `azure_attributes` - - Map - - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [\_](#pipelinesnameclustersazure_attributes). -- - `cluster_log_conf` - - Map - - The configuration for delivering spark logs to a long-term storage destination. Only dbfs destinations are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [\_](#pipelinesnameclusterscluster_log_conf). +### pipelines._name_.event_log -- - `custom_tags` - - Map - - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags +**`Type: Map`** -- - `driver_instance_pool_id` - - String - - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. +Event log configuration for this pipeline -- - `driver_node_type_id` - - String - - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. -- - `enable_local_disk_encryption` - - Boolean - - Whether to enable local disk encryption for the cluster. -- - `gcp_attributes` - - Map - - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [\_](#pipelinesnameclustersgcp_attributes). +:::list-table -- - `init_scripts` - - Sequence - - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [\_](#pipelinesnameclustersinit_scripts). +- - Key + - Type + - Description -- - `instance_pool_id` +- - `catalog` - String - - The optional ID of the instance pool to which the cluster belongs. + - The UC catalog the event log is published under. -- - `label` +- - `name` - String - - A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`. + - The name the event log is published to in UC. -- - `node_type_id` +- - `schema` - String - - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. + - The UC schema the event log is published under. -- - `num_workers` - - Integer - - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. +::: -- - `policy_id` - - String - - The ID of the cluster policy used to create the cluster if applicable. -- - `spark_conf` - - Map - - An object containing a set of optional, user-specified Spark configuration key-value pairs. See :method:clusters/create for more details. +### pipelines._name_.filters -- - `spark_env_vars` - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` +**`Type: Map`** -- - `ssh_public_keys` +Filters on which Pipeline packages to include in the deployed graph. + + + +:::list-table + +- - Key + - Type + - Description + +- - `exclude` - Sequence - - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. + - Paths to exclude. + +- - `include` + - Sequence + - Paths to include. ::: -### pipelines._name_.clusters.autoscale +### pipelines._name_.ingestion_definition **`Type: Map`** -Parameters needed in order to automatically scale clusters up and down based on load. -Note: autoscaling works best with DB runtime versions 3.0 or later. +The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'schema', 'target', or 'catalog' settings. @@ -7043,27 +9068,38 @@ Note: autoscaling works best with DB runtime versions 3.0 or later. - Type - Description -- - `max_workers` - - Integer - - The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`. +- - `connection_name` + - String + - The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with both connectors for applications like Salesforce, Workday, and so on, and also database connectors like Oracle, (connector_type = QUERY_BASED OR connector_type = CDC). If connection name corresponds to database connectors like Oracle, and connector_type is not provided then connector_type defaults to QUERY_BASED. If connector_type is passed as CDC we use Combined Cdc Managed Ingestion pipeline. Under certain conditions, this can be replaced with ingestion_gateway_id to change the connector to Cdc Managed Ingestion Pipeline with Gateway pipeline. -- - `min_workers` - - Integer - - The minimum number of workers the cluster can scale down to when underutilized. It is also the initial number of workers the cluster will have after creation. +- - `full_refresh_window` + - Map + - (Optional) A window that specifies a set of time ranges for snapshot queries in CDC. See [\_](#pipelinesnameingestion_definitionfull_refresh_window). -- - `mode` +- - `ingestion_gateway_id` - String - - Databricks Enhanced Autoscaling optimizes cluster utilization by automatically allocating cluster resources based on workload volume, with minimal impact to the data processing latency of your pipelines. Enhanced Autoscaling is available for `updates` clusters only. The legacy autoscaling feature is used for `maintenance` clusters. + - Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with CDC connectors to databases like SQL Server using a gateway pipeline (connector_type = CDC). Under certain conditions, this can be replaced with connection_name to change the connector to Combined Cdc Managed Ingestion Pipeline. + +- - `objects` + - Sequence + - Required. Settings specifying tables to replicate and the destination for the replicated tables. See [\_](#pipelinesnameingestion_definitionobjects). + +- - `source_configurations` + - Sequence + - Top-level source configurations. See [\_](#pipelinesnameingestion_definitionsource_configurations). + +- - `table_configuration` + - Map + - Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. See [\_](#pipelinesnameingestion_definitiontable_configuration). ::: -### pipelines._name_.clusters.aws_attributes +### pipelines._name_.ingestion_definition.full_refresh_window **`Type: Map`** -Attributes related to clusters running on Amazon Web Services. -If not specified at cluster creation, a set of default values will be used. +(Optional) A window that specifies a set of time ranges for snapshot queries in CDC. @@ -7073,55 +9109,63 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description -- - `availability` - - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. +- - `days_of_week` + - Sequence + - Days of week in which the window is allowed to happen If not specified all days of the week will be used. -- - `ebs_volume_count` +- - `start_hour` - Integer - - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. + - An integer between 0 and 23 denoting the start hour for the window in the 24-hour day. -- - `ebs_volume_iops` - - Integer - - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. +- - `time_zone_id` + - String + - Time zone id of window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. -- - `ebs_volume_size` - - Integer - - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. +::: -- - `ebs_volume_throughput` - - Integer - - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. -- - `ebs_volume_type` - - String - - All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. +### pipelines._name_.ingestion_definition.full_refresh_window.days_of_week -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. +**`Type: Sequence`** -- - `instance_profile_arn` - - String - - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. +Days of week in which the window is allowed to happen +If not specified all days of the week will be used. -- - `spot_bid_price_percent` - - Integer - - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. -- - `zone_id` - - String - - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. +### pipelines._name_.ingestion_definition.objects + +**`Type: Sequence`** + +Required. Settings specifying tables to replicate and the destination for the replicated tables. + + + +:::list-table + +- - Key + - Type + - Description + +- - `report` + - Map + - Select a specific source report. See [\_](#pipelinesnameingestion_definitionobjectsreport). + +- - `schema` + - Map + - Select all tables from a specific source schema. See [\_](#pipelinesnameingestion_definitionobjectsschema). + +- - `table` + - Map + - Select a specific source table. See [\_](#pipelinesnameingestion_definitionobjectstable). ::: -### pipelines._name_.clusters.azure_attributes +### pipelines._name_.ingestion_definition.objects.report **`Type: Map`** -Attributes related to clusters running on Microsoft Azure. -If not specified at cluster creation, a set of default values will be used. +Select a specific source report. @@ -7131,30 +9175,34 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description -- - `availability` +- - `destination_catalog` - String - - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. + - Required. Destination catalog to store table. -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. +- - `destination_schema` + - String + - Required. Destination schema to store table. -- - `log_analytics_info` - - Map - - Defines values necessary to configure and run Azure Log Analytics agent. See [\_](#pipelinesnameclustersazure_attributeslog_analytics_info). +- - `destination_table` + - String + - Required. Destination table name. The pipeline fails if a table with that name already exists. -- - `spot_bid_max_price` - - Any - - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. +- - `source_url` + - String + - Required. Report URL in the source system. + +- - `table_configuration` + - Map + - Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object. See [\_](#pipelinesnameingestion_definitionobjectsreporttable_configuration). ::: -### pipelines._name_.clusters.azure_attributes.log_analytics_info +### pipelines._name_.ingestion_definition.objects.report.table_configuration **`Type: Map`** -Defines values necessary to configure and run Azure Log Analytics agent +Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object. @@ -7164,26 +9212,44 @@ Defines values necessary to configure and run Azure Log Analytics agent - Type - Description -- - `log_analytics_primary_key` - - String - - The primary key for the Azure Log Analytics agent configuration +- - `auto_full_refresh_policy` + - Map + - (Optional, Mutable) Policy for auto full refresh, if enabled pipeline will automatically try to fix issues by doing a full refresh on the table in the retry run. auto_full_refresh_policy in table configuration will override the above level auto_full_refresh_policy. For example, { "auto_full_refresh_policy": { "enabled": true, "min_interval_hours": 23, } } If unspecified, auto full refresh is disabled. See [\_](#pipelinesnameingestion_definitionobjectsreporttable_configurationauto_full_refresh_policy). + +- - `exclude_columns` + - Sequence + - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. + +- - `include_columns` + - Sequence + - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. -- - `log_analytics_workspace_id` - - String - - The workspace ID for the Azure Log Analytics agent configuration +- - `primary_keys` + - Sequence + - The primary key of the table used to apply changes. + +- - `sequence_by` + - Sequence + - The column names specifying the logical order of events in the source data. Spark Declarative Pipelines uses this sequencing to handle change events that arrive out of order. ::: -### pipelines._name_.clusters.cluster_log_conf +### pipelines._name_.ingestion_definition.objects.report.table_configuration.auto_full_refresh_policy **`Type: Map`** -The configuration for delivering spark logs to a long-term storage destination. -Only dbfs destinations are supported. Only one destination can be specified -for one cluster. If the conf is given, the logs will be delivered to the destination every -`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while -the destination of executor logs is `$destination/$clusterId/executor`. +(Optional, Mutable) Policy for auto full refresh, if enabled pipeline will automatically try +to fix issues by doing a full refresh on the table in the retry run. auto_full_refresh_policy +in table configuration will override the above level auto_full_refresh_policy. +For example, +{ +"auto_full_refresh_policy": { +"enabled": true, +"min_interval_hours": 23, +} +} +If unspecified, auto full refresh is disabled. @@ -7193,27 +9259,22 @@ the destination of executor logs is `$destination/$clusterId/executor`. - Type - Description -- - `dbfs` - - Map - - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [\_](#pipelinesnameclusterscluster_log_confdbfs). - -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#pipelinesnameclusterscluster_log_confs3). +- - `enabled` + - Boolean + - (Required, Mutable) Whether to enable auto full refresh or not. -- - `volumes` - - Map - - destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`. See [\_](#pipelinesnameclusterscluster_log_confvolumes). +- - `min_interval_hours` + - Integer + - (Optional, Mutable) Specify the minimum interval in hours between the timestamp at which a table was last full refreshed and the current timestamp for triggering auto full If unspecified and autoFullRefresh is enabled then by default min_interval_hours is 24 hours. ::: -### pipelines._name_.clusters.cluster_log_conf.dbfs +### pipelines._name_.ingestion_definition.objects.schema **`Type: Map`** -destination needs to be provided. e.g. -`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` +Select all tables from a specific source schema. @@ -7223,21 +9284,34 @@ destination needs to be provided. e.g. - Type - Description -- - `destination` +- - `destination_catalog` - String - - dbfs destination, e.g. `dbfs:/my/path` + - Required. Destination catalog to store tables. + +- - `destination_schema` + - String + - Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists. + +- - `source_catalog` + - String + - The source catalog name. Might be optional depending on the type of source. + +- - `source_schema` + - String + - Required. Schema name in the source database. + +- - `table_configuration` + - Map + - Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object. See [\_](#pipelinesnameingestion_definitionobjectsschematable_configuration). ::: -### pipelines._name_.clusters.cluster_log_conf.s3 +### pipelines._name_.ingestion_definition.objects.schema.table_configuration **`Type: Map`** -destination and either the region or endpoint need to be provided. e.g. -`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. +Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object. @@ -7247,43 +9321,44 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in - Type - Description -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. +- - `auto_full_refresh_policy` + - Map + - (Optional, Mutable) Policy for auto full refresh, if enabled pipeline will automatically try to fix issues by doing a full refresh on the table in the retry run. auto_full_refresh_policy in table configuration will override the above level auto_full_refresh_policy. For example, { "auto_full_refresh_policy": { "enabled": true, "min_interval_hours": 23, } } If unspecified, auto full refresh is disabled. See [\_](#pipelinesnameingestion_definitionobjectsschematable_configurationauto_full_refresh_policy). -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. +- - `exclude_columns` + - Sequence + - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. +- - `include_columns` + - Sequence + - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. -- - `kms_key` - - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. +- - `primary_keys` + - Sequence + - The primary key of the table used to apply changes. -- - `region` - - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. +- - `sequence_by` + - Sequence + - The column names specifying the logical order of events in the source data. Spark Declarative Pipelines uses this sequencing to handle change events that arrive out of order. ::: -### pipelines._name_.clusters.cluster_log_conf.volumes +### pipelines._name_.ingestion_definition.objects.schema.table_configuration.auto_full_refresh_policy **`Type: Map`** -destination needs to be provided, e.g. -`{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }` +(Optional, Mutable) Policy for auto full refresh, if enabled pipeline will automatically try +to fix issues by doing a full refresh on the table in the retry run. auto_full_refresh_policy +in table configuration will override the above level auto_full_refresh_policy. +For example, +{ +"auto_full_refresh_policy": { +"enabled": true, +"min_interval_hours": 23, +} +} +If unspecified, auto full refresh is disabled. @@ -7293,19 +9368,22 @@ destination needs to be provided, e.g. - Type - Description -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` +- - `enabled` + - Boolean + - (Required, Mutable) Whether to enable auto full refresh or not. + +- - `min_interval_hours` + - Integer + - (Optional, Mutable) Specify the minimum interval in hours between the timestamp at which a table was last full refreshed and the current timestamp for triggering auto full If unspecified and autoFullRefresh is enabled then by default min_interval_hours is 24 hours. ::: -### pipelines._name_.clusters.gcp_attributes +### pipelines._name_.ingestion_definition.objects.table **`Type: Map`** -Attributes related to clusters running on Google Cloud Platform. -If not specified at cluster creation, a set of default values will be used. +Select a specific source table. @@ -7315,42 +9393,42 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description -- - `availability` +- - `destination_catalog` - String - - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. - -- - `boot_disk_size` - - Integer - - Boot disk size in GB + - Required. Destination catalog to store table. -- - `first_on_demand` - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. +- - `destination_schema` + - String + - Required. Destination schema to store table. -- - `google_service_account` +- - `destination_table` - String - - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. + - Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used. -- - `local_ssd_count` - - Integer - - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. +- - `source_catalog` + - String + - Source catalog name. Might be optional depending on the type of source. -- - `use_preemptible_executors` - - Boolean - - This field is deprecated +- - `source_schema` + - String + - Schema name in the source database. Might be optional depending on the type of source. -- - `zone_id` +- - `source_table` - String - - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + - Required. Table name in the source database. + +- - `table_configuration` + - Map + - Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. See [\_](#pipelinesnameingestion_definitionobjectstabletable_configuration). ::: -### pipelines._name_.clusters.init_scripts +### pipelines._name_.ingestion_definition.objects.table.table_configuration -**`Type: Sequence`** +**`Type: Map`** -The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. +Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. @@ -7360,42 +9438,44 @@ The configuration for storing init scripts. Any number of destinations can be sp - Type - Description -- - `abfss` - - Map - - Contains the Azure Data Lake Storage destination path. See [\_](#pipelinesnameclustersinit_scriptsabfss). - -- - `dbfs` - - Map - - This field is deprecated - -- - `file` +- - `auto_full_refresh_policy` - Map - - destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`. See [\_](#pipelinesnameclustersinit_scriptsfile). + - (Optional, Mutable) Policy for auto full refresh, if enabled pipeline will automatically try to fix issues by doing a full refresh on the table in the retry run. auto_full_refresh_policy in table configuration will override the above level auto_full_refresh_policy. For example, { "auto_full_refresh_policy": { "enabled": true, "min_interval_hours": 23, } } If unspecified, auto full refresh is disabled. See [\_](#pipelinesnameingestion_definitionobjectstabletable_configurationauto_full_refresh_policy). -- - `gcs` - - Map - - destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [\_](#pipelinesnameclustersinit_scriptsgcs). +- - `exclude_columns` + - Sequence + - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. -- - `s3` - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [\_](#pipelinesnameclustersinit_scriptss3). +- - `include_columns` + - Sequence + - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. -- - `volumes` - - Map - - destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`. See [\_](#pipelinesnameclustersinit_scriptsvolumes). +- - `primary_keys` + - Sequence + - The primary key of the table used to apply changes. -- - `workspace` - - Map - - destination needs to be provided, e.g. `{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }`. See [\_](#pipelinesnameclustersinit_scriptsworkspace). +- - `sequence_by` + - Sequence + - The column names specifying the logical order of events in the source data. Spark Declarative Pipelines uses this sequencing to handle change events that arrive out of order. ::: -### pipelines._name_.clusters.init_scripts.abfss +### pipelines._name_.ingestion_definition.objects.table.table_configuration.auto_full_refresh_policy **`Type: Map`** -Contains the Azure Data Lake Storage destination path +(Optional, Mutable) Policy for auto full refresh, if enabled pipeline will automatically try +to fix issues by doing a full refresh on the table in the retry run. auto_full_refresh_policy +in table configuration will override the above level auto_full_refresh_policy. +For example, +{ +"auto_full_refresh_policy": { +"enabled": true, +"min_interval_hours": 23, +} +} +If unspecified, auto full refresh is disabled. @@ -7405,19 +9485,22 @@ Contains the Azure Data Lake Storage destination path - Type - Description -- - `destination` - - String - - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. +- - `enabled` + - Boolean + - (Required, Mutable) Whether to enable auto full refresh or not. + +- - `min_interval_hours` + - Integer + - (Optional, Mutable) Specify the minimum interval in hours between the timestamp at which a table was last full refreshed and the current timestamp for triggering auto full If unspecified and autoFullRefresh is enabled then by default min_interval_hours is 24 hours. ::: -### pipelines._name_.clusters.init_scripts.file +### pipelines._name_.ingestion_definition.source_configurations -**`Type: Map`** +**`Type: Sequence`** -destination needs to be provided, e.g. -`{ "file": { "destination": "file:/my/local/file.sh" } }` +Top-level source configurations @@ -7427,19 +9510,18 @@ destination needs to be provided, e.g. - Type - Description -- - `destination` - - String - - local file destination, e.g. `file:/my/local/file.sh` +- - `catalog` + - Map + - Catalog-level source configuration parameters. See [\_](#pipelinesnameingestion_definitionsource_configurationscatalog). ::: -### pipelines._name_.clusters.init_scripts.gcs +### pipelines._name_.ingestion_definition.source_configurations.catalog **`Type: Map`** -destination needs to be provided, e.g. -`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` +Catalog-level source configuration parameters @@ -7449,21 +9531,22 @@ destination needs to be provided, e.g. - Type - Description -- - `destination` +- - `postgres` + - Map + - Postgres-specific catalog-level configuration parameters. See [\_](#pipelinesnameingestion_definitionsource_configurationscatalogpostgres). + +- - `source_catalog` - String - - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` + - Source catalog name ::: -### pipelines._name_.clusters.init_scripts.s3 +### pipelines._name_.ingestion_definition.source_configurations.catalog.postgres **`Type: Map`** -destination and either the region or endpoint need to be provided. e.g. -`{ \"s3\": { \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. +Postgres-specific catalog-level configuration parameters @@ -7473,43 +9556,43 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in - Type - Description -- - `canned_acl` - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. +- - `slot_config` + - Map + - Optional. The Postgres slot configuration to use for logical replication. See [\_](#pipelinesnameingestion_definitionsource_configurationscatalogpostgresslot_config). -- - `destination` - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. +::: -- - `enable_encryption` - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. -- - `encryption_type` - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. +### pipelines._name_.ingestion_definition.source_configurations.catalog.postgres.slot_config -- - `endpoint` - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. +**`Type: Map`** -- - `kms_key` +Optional. The Postgres slot configuration to use for logical replication + + + +:::list-table + +- - Key + - Type + - Description + +- - `publication_name` - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + - The name of the publication to use for the Postgres source -- - `region` +- - `slot_name` - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + - The name of the logical replication slot to use for the Postgres source ::: -### pipelines._name_.clusters.init_scripts.volumes +### pipelines._name_.ingestion_definition.table_configuration **`Type: Map`** -destination needs to be provided. e.g. -`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }` +Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. @@ -7519,19 +9602,44 @@ destination needs to be provided. e.g. - Type - Description -- - `destination` - - String - - UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` +- - `auto_full_refresh_policy` + - Map + - (Optional, Mutable) Policy for auto full refresh, if enabled pipeline will automatically try to fix issues by doing a full refresh on the table in the retry run. auto_full_refresh_policy in table configuration will override the above level auto_full_refresh_policy. For example, { "auto_full_refresh_policy": { "enabled": true, "min_interval_hours": 23, } } If unspecified, auto full refresh is disabled. See [\_](#pipelinesnameingestion_definitiontable_configurationauto_full_refresh_policy). + +- - `exclude_columns` + - Sequence + - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. + +- - `include_columns` + - Sequence + - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. + +- - `primary_keys` + - Sequence + - The primary key of the table used to apply changes. + +- - `sequence_by` + - Sequence + - The column names specifying the logical order of events in the source data. Spark Declarative Pipelines uses this sequencing to handle change events that arrive out of order. ::: -### pipelines._name_.clusters.init_scripts.workspace +### pipelines._name_.ingestion_definition.table_configuration.auto_full_refresh_policy **`Type: Map`** -destination needs to be provided, e.g. -`{ "workspace": { "destination": "/cluster-init-scripts/setup-datadog.sh" } }` +(Optional, Mutable) Policy for auto full refresh, if enabled pipeline will automatically try +to fix issues by doing a full refresh on the table in the retry run. auto_full_refresh_policy +in table configuration will override the above level auto_full_refresh_policy. +For example, +{ +"auto_full_refresh_policy": { +"enabled": true, +"min_interval_hours": 23, +} +} +If unspecified, auto full refresh is disabled. @@ -7541,18 +9649,22 @@ destination needs to be provided, e.g. - Type - Description -- - `destination` - - String - - wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh` +- - `enabled` + - Boolean + - (Required, Mutable) Whether to enable auto full refresh or not. + +- - `min_interval_hours` + - Integer + - (Optional, Mutable) Specify the minimum interval in hours between the timestamp at which a table was last full refreshed and the current timestamp for triggering auto full If unspecified and autoFullRefresh is enabled then by default min_interval_hours is 24 hours. ::: -### pipelines._name_.deployment +### pipelines._name_.libraries -**`Type: Map`** +**`Type: Sequence`** -Deployment type of this pipeline. +Libraries or code needed by this deployment. @@ -7562,22 +9674,30 @@ Deployment type of this pipeline. - Type - Description -- - `kind` - - String - - The deployment method that manages the pipeline. +- - `file` + - Map + - The path to a file that defines a pipeline and is stored in the Databricks Repos. See [\_](#pipelinesnamelibrariesfile). -- - `metadata_file_path` +- - `glob` + - Map + - The unified field to include source codes. Each entry can be a notebook path, a file path, or a folder path that ends `/**`. This field cannot be used together with `notebook` or `file`. See [\_](#pipelinesnamelibrariesglob). + +- - `notebook` + - Map + - The path to a notebook that defines a pipeline and is stored in the Databricks workspace. See [\_](#pipelinesnamelibrariesnotebook). + +- - `whl` - String - - The path to the file containing metadata about the deployment. + - This field is deprecated ::: -### pipelines._name_.environment +### pipelines._name_.libraries.file **`Type: Map`** -Environment specification for this pipeline used to install dependencies. +The path to a file that defines a pipeline and is stored in the Databricks Repos. @@ -7587,18 +9707,20 @@ Environment specification for this pipeline used to install dependencies. - Type - Description -- - `dependencies` - - Sequence - - List of pip dependencies, as supported by the version of pip in this environment. Each dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/ Allowed dependency could be , , (WSFS or Volumes in Databricks), +- - `path` + - String + - The absolute path of the source code. ::: -### pipelines._name_.event_log +### pipelines._name_.libraries.glob **`Type: Map`** -Event log configuration for this pipeline +The unified field to include source codes. +Each entry can be a notebook path, a file path, or a folder path that ends `/**`. +This field cannot be used together with `notebook` or `file`. @@ -7608,26 +9730,18 @@ Event log configuration for this pipeline - Type - Description -- - `catalog` - - String - - The UC catalog the event log is published under. - -- - `name` - - String - - The name the event log is published to in UC. - -- - `schema` +- - `include` - String - - The UC schema the event log is published under. + - The source code to include for pipelines ::: -### pipelines._name_.filters +### pipelines._name_.libraries.notebook **`Type: Map`** -Filters on which Pipeline packages to include in the deployed graph. +The path to a notebook that defines a pipeline and is stored in the Databricks workspace. @@ -7637,22 +9751,18 @@ Filters on which Pipeline packages to include in the deployed graph. - Type - Description -- - `exclude` - - Sequence - - Paths to exclude. - -- - `include` - - Sequence - - Paths to include. +- - `path` + - String + - The absolute path of the source code. ::: -### pipelines._name_.ingestion_definition +### pipelines._name_.lifecycle **`Type: Map`** -The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'schema', 'target', or 'catalog' settings. +Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. @@ -7662,30 +9772,18 @@ The configuration for a managed ingestion pipeline. These settings cannot be use - Type - Description -- - `connection_name` - - String - - Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. - -- - `ingestion_gateway_id` - - String - - Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. - -- - `objects` - - Sequence - - Required. Settings specifying tables to replicate and the destination for the replicated tables. See [\_](#pipelinesnameingestion_definitionobjects). - -- - `table_configuration` - - Map - - Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. See [\_](#pipelinesnameingestion_definitiontable_configuration). +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### pipelines._name_.ingestion_definition.objects +### pipelines._name_.notifications **`Type: Sequence`** -Required. Settings specifying tables to replicate and the destination for the replicated tables. +List of notification settings for this pipeline. @@ -7695,26 +9793,22 @@ Required. Settings specifying tables to replicate and the destination for the re - Type - Description -- - `report` - - Map - - Select a specific source report. See [\_](#pipelinesnameingestion_definitionobjectsreport). - -- - `schema` - - Map - - Select all tables from a specific source schema. See [\_](#pipelinesnameingestion_definitionobjectsschema). +- - `alerts` + - Sequence + - A list of alerts that trigger the sending of notifications to the configured destinations. The supported alerts are: * `on-update-success`: A pipeline update completes successfully. * `on-update-failure`: Each time a pipeline update fails. * `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error. * `on-flow-failure`: A single data flow fails. -- - `table` - - Map - - Select a specific source table. See [\_](#pipelinesnameingestion_definitionobjectstable). +- - `email_recipients` + - Sequence + - A list of email addresses notified when a configured alert is triggered. ::: -### pipelines._name_.ingestion_definition.objects.report +### pipelines._name_.permissions -**`Type: Map`** +**`Type: Sequence`** -Select a specific source report. + @@ -7724,34 +9818,32 @@ Select a specific source report. - Type - Description -- - `destination_catalog` +- - `group_name` - String - - Required. Destination catalog to store table. + - -- - `destination_schema` +- - `level` - String - - Required. Destination schema to store table. + - Permission level -- - `destination_table` +- - `service_principal_name` - String - - Required. Destination table name. The pipeline fails if a table with that name already exists. + - -- - `source_url` +- - `user_name` - String - - Required. Report URL in the source system. - -- - `table_configuration` - - Map - - Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object. See [\_](#pipelinesnameingestion_definitionobjectsreporttable_configuration). + - ::: -### pipelines._name_.ingestion_definition.objects.report.table_configuration +### pipelines._name_.run_as **`Type: Map`** -Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object. +Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline. + +Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. @@ -7761,31 +9853,28 @@ Configuration settings to control the ingestion of tables. These settings overri - Type - Description -- - `exclude_columns` - - Sequence - - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. - -- - `include_columns` - - Sequence - - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. - -- - `primary_keys` - - Sequence - - The primary key of the table used to apply changes. +- - `service_principal_name` + - String + - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. -- - `sequence_by` - - Sequence - - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. +- - `user_name` + - String + - The email of an active workspace user. Users can only set this field to their own email. ::: -### pipelines._name_.ingestion_definition.objects.schema +## postgres_branches **`Type: Map`** -Select all tables from a specific source schema. + +```yaml +postgres_branches: + : + : +``` :::list-table @@ -7794,34 +9883,54 @@ Select all tables from a specific source schema. - Type - Description -- - `destination_catalog` +- - `branch_id` - String - - Required. Destination catalog to store tables. + - -- - `destination_schema` +- - `expire_time` + - Map + - + +- - `is_protected` + - Boolean + - + +- - `lifecycle` + - Map + - See [\_](#postgres_branchesnamelifecycle). + +- - `no_expiry` + - Boolean + - + +- - `parent` - String - - Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists. + - -- - `source_catalog` +- - `source_branch` - String - - The source catalog name. Might be optional depending on the type of source. + - -- - `source_schema` +- - `source_branch_lsn` - String - - Required. Schema name in the source database. + - -- - `table_configuration` +- - `source_branch_time` - Map - - Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object. See [\_](#pipelinesnameingestion_definitionobjectsschematable_configuration). + - + +- - `ttl` + - String + - ::: -### pipelines._name_.ingestion_definition.objects.schema.table_configuration +### postgres_branches._name_.lifecycle **`Type: Map`** -Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object. + @@ -7831,75 +9940,84 @@ Configuration settings to control the ingestion of tables. These settings are ap - Type - Description -- - `exclude_columns` - - Sequence - - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. - -- - `include_columns` - - Sequence - - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. - -- - `primary_keys` - - Sequence - - The primary key of the table used to apply changes. - -- - `sequence_by` - - Sequence - - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### pipelines._name_.ingestion_definition.objects.table +## postgres_endpoints **`Type: Map`** -Select a specific source table. + +```yaml +postgres_endpoints: + : + : +``` :::list-table -- - Key - - Type - - Description +- - Key + - Type + - Description + +- - `autoscaling_limit_max_cu` + - Any + - + +- - `autoscaling_limit_min_cu` + - Any + - + +- - `disabled` + - Boolean + - -- - `destination_catalog` +- - `endpoint_id` - String - - Required. Destination catalog to store table. + - -- - `destination_schema` +- - `endpoint_type` - String - - Required. Destination schema to store table. + - The compute endpoint type. Either `read_write` or `read_only`. -- - `destination_table` - - String - - Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used. +- - `group` + - Map + - See [\_](#postgres_endpointsnamegroup). -- - `source_catalog` - - String - - Source catalog name. Might be optional depending on the type of source. +- - `lifecycle` + - Map + - See [\_](#postgres_endpointsnamelifecycle). -- - `source_schema` - - String - - Schema name in the source database. Might be optional depending on the type of source. +- - `no_suspension` + - Boolean + - -- - `source_table` +- - `parent` - String - - Required. Table name in the source database. + - -- - `table_configuration` +- - `settings` - Map - - Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. See [\_](#pipelinesnameingestion_definitionobjectstabletable_configuration). + - A collection of settings for a compute endpoint. See [\_](#postgres_endpointsnamesettings). + +- - `suspend_timeout_duration` + - String + - ::: -### pipelines._name_.ingestion_definition.objects.table.table_configuration +### postgres_endpoints._name_.group **`Type: Map`** -Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. + @@ -7909,30 +10027,26 @@ Configuration settings to control the ingestion of tables. These settings overri - Type - Description -- - `exclude_columns` - - Sequence - - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. - -- - `include_columns` - - Sequence - - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. +- - `enable_readable_secondaries` + - Boolean + - Whether to allow read-only connections to read-write endpoints. Only relevant for read-write endpoints where size.max > 1. -- - `primary_keys` - - Sequence - - The primary key of the table used to apply changes. +- - `max` + - Integer + - The maximum number of computes in the endpoint group. Currently, this must be equal to min. Set to 1 for single compute endpoints, to disable HA. To manually suspend all computes in an endpoint group, set disabled to true on the EndpointSpec. -- - `sequence_by` - - Sequence - - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. +- - `min` + - Integer + - The minimum number of computes in the endpoint group. Currently, this must be equal to max. This must be greater than or equal to 1. ::: -### pipelines._name_.ingestion_definition.table_configuration +### postgres_endpoints._name_.lifecycle **`Type: Map`** -Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. + @@ -7942,30 +10056,18 @@ Configuration settings to control the ingestion of tables. These settings are ap - Type - Description -- - `exclude_columns` - - Sequence - - A list of column names to be excluded for the ingestion. When not specified, include_columns fully controls what columns to be ingested. When specified, all other columns including future ones will be automatically included for ingestion. This field in mutually exclusive with `include_columns`. - -- - `include_columns` - - Sequence - - A list of column names to be included for the ingestion. When not specified, all columns except ones in exclude_columns will be included. Future columns will be automatically included. When specified, all other future columns will be automatically excluded from ingestion. This field in mutually exclusive with `exclude_columns`. - -- - `primary_keys` - - Sequence - - The primary key of the table used to apply changes. - -- - `sequence_by` - - Sequence - - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### pipelines._name_.libraries +### postgres_endpoints._name_.settings -**`Type: Sequence`** +**`Type: Map`** -Libraries or code needed by this deployment. +A collection of settings for a compute endpoint. @@ -7975,31 +10077,24 @@ Libraries or code needed by this deployment. - Type - Description -- - `file` - - Map - - The path to a file that defines a pipeline and is stored in the Databricks Repos. See [\_](#pipelinesnamelibrariesfile). - -- - `glob` - - Map - - The unified field to include source codes. Each entry can be a notebook path, a file path, or a folder path that ends `/**`. This field cannot be used together with `notebook` or `file`. See [\_](#pipelinesnamelibrariesglob). - -- - `notebook` +- - `pg_settings` - Map - - The path to a notebook that defines a pipeline and is stored in the Databricks workspace. See [\_](#pipelinesnamelibrariesnotebook). - -- - `whl` - - String - - This field is deprecated + - A raw representation of Postgres settings. ::: -### pipelines._name_.libraries.file +## postgres_projects **`Type: Map`** -The path to a file that defines a pipeline and is stored in the Databricks Repos. + +```yaml +postgres_projects: + : + : +``` :::list-table @@ -8008,41 +10103,54 @@ The path to a file that defines a pipeline and is stored in the Databricks Repos - Type - Description -- - `path` +- - `budget_policy_id` - String - - The absolute path of the source code. - -::: + - +- - `custom_tags` + - Sequence + - See [\_](#postgres_projectsnamecustom_tags). -### pipelines._name_.libraries.glob +- - `default_endpoint_settings` + - Map + - A collection of settings for a compute endpoint. See [\_](#postgres_projectsnamedefault_endpoint_settings). -**`Type: Map`** +- - `display_name` + - String + - -The unified field to include source codes. -Each entry can be a notebook path, a file path, or a folder path that ends `/**`. -This field cannot be used together with `notebook` or `file`. +- - `enable_pg_native_login` + - Boolean + - +- - `history_retention_duration` + - String + - +- - `lifecycle` + - Map + - See [\_](#postgres_projectsnamelifecycle). -:::list-table +- - `permissions` + - Sequence + - See [\_](#postgres_projectsnamepermissions). -- - Key - - Type - - Description +- - `pg_version` + - Integer + - -- - `include` +- - `project_id` - String - - The source code to include for pipelines + - ::: -### pipelines._name_.libraries.notebook +### postgres_projects._name_.custom_tags -**`Type: Map`** +**`Type: Sequence`** -The path to a notebook that defines a pipeline and is stored in the Databricks workspace. + @@ -8052,18 +10160,22 @@ The path to a notebook that defines a pipeline and is stored in the Databricks w - Type - Description -- - `path` +- - `key` - String - - The absolute path of the source code. + - The key of the custom tag. + +- - `value` + - String + - The value of the custom tag. ::: -### pipelines._name_.lifecycle +### postgres_projects._name_.default_endpoint_settings **`Type: Map`** -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. +A collection of settings for a compute endpoint. @@ -8073,18 +10185,34 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - Type - Description -- - `prevent_destroy` +- - `autoscaling_limit_max_cu` + - Any + - The maximum number of Compute Units. Minimum value is 0.5. + +- - `autoscaling_limit_min_cu` + - Any + - The minimum number of Compute Units. Minimum value is 0.5. + +- - `no_suspension` - Boolean - - Lifecycle setting to prevent the resource from being destroyed. + - When set to true, explicitly disables automatic suspension (never suspend). Should be set to true when provided. + +- - `pg_settings` + - Map + - A raw representation of Postgres settings. + +- - `suspend_timeout_duration` + - String + - Duration of inactivity after which the compute endpoint is automatically suspended. If specified should be between 60s and 604800s (1 minute to 1 week). ::: -### pipelines._name_.notifications +### postgres_projects._name_.lifecycle -**`Type: Sequence`** +**`Type: Map`** -List of notification settings for this pipeline. + @@ -8094,18 +10222,14 @@ List of notification settings for this pipeline. - Type - Description -- - `alerts` - - Sequence - - A list of alerts that trigger the sending of notifications to the configured destinations. The supported alerts are: * `on-update-success`: A pipeline update completes successfully. * `on-update-failure`: Each time a pipeline update fails. * `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error. * `on-flow-failure`: A single data flow fails. - -- - `email_recipients` - - Sequence - - A list of email addresses notified when a configured alert is triggered. +- - `prevent_destroy` + - Boolean + - Lifecycle setting to prevent the resource from being destroyed. ::: -### pipelines._name_.permissions +### postgres_projects._name_.permissions **`Type: Sequence`** @@ -8121,19 +10245,19 @@ List of notification settings for this pipeline. - - `group_name` - String - - + - The name of the group that has the permission set in level. - - `level` - String - - + - The allowed permission for user, group, service principal defined for this permission. - - `service_principal_name` - String - - + - The name of the service principal that has the permission set in level. - - `user_name` - String - - + - The name of the user that has the permission set in level. ::: @@ -8468,6 +10592,14 @@ registered_models: - Type - Description +- - `aliases` + - Sequence + - See [\_](#registered_modelsnamealiases). + +- - `browse_only` + - Boolean + - + - - `catalog_name` - String - The name of the catalog where the schema and the registered model reside @@ -8476,6 +10608,18 @@ registered_models: - String - The comment attached to the registered model +- - `created_at` + - Integer + - + +- - `created_by` + - String + - + +- - `full_name` + - String + - + - - `grants` - Sequence - See [\_](#registered_modelsnamegrants). @@ -8484,10 +10628,18 @@ registered_models: - Map - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#registered_modelsnamelifecycle). +- - `metastore_id` + - String + - + - - `name` - String - The name of the registered model +- - `owner` + - String + - + - - `schema_name` - String - The name of the schema where the registered model resides @@ -8496,6 +10648,14 @@ registered_models: - String - The storage location on the cloud under which model version data files are stored +- - `updated_at` + - Integer + - + +- - `updated_by` + - String + - + ::: @@ -8517,6 +10677,47 @@ resources: principal: account users ``` +### registered_models._name_.aliases + +**`Type: Sequence`** + + + + + +:::list-table + +- - Key + - Type + - Description + +- - `alias_name` + - String + - Name of the alias, e.g. 'champion' or 'latest_stable' + +- - `catalog_name` + - String + - + +- - `id` + - String + - + +- - `model_name` + - String + - + +- - `schema_name` + - String + - + +- - `version_num` + - Integer + - Integer version number of the model version to which this alias points. + +::: + + ### registered_models._name_.grants **`Type: Sequence`** @@ -8533,15 +10734,22 @@ resources: - - `principal` - String - - The name of the principal that will be granted privileges + - The principal (user email address or group name). For deleted principals, `principal` is empty while `principal_id` is populated. - - `privileges` - Sequence - - The privileges to grant to the specified entity + - The privileges assigned to the principal. ::: +### registered_models._name_.grants.privileges + +**`Type: Sequence`** + +The privileges assigned to the principal. + + ### registered_models._name_.lifecycle **`Type: Map`** @@ -8567,7 +10775,7 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co **`Type: Map`** -The schema resource type allows you to define Unity Catalog [schemas](/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations: +The schema resource type allows you to define Unity Catalog [schemas](/api/workspace/schemas/create) for tables and other assets in your jobs and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations: - The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema. - Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](/api/workspace/schemas/update). @@ -8674,11 +10882,11 @@ resources: - - `principal` - String - - + - The principal (user email address or group name). For deleted principals, `principal` is empty while `principal_id` is populated. - - `privileges` - Sequence - - + - The privileges assigned to the principal. ::: @@ -8687,7 +10895,7 @@ resources: **`Type: Sequence`** - +The privileges assigned to the principal. ### schemas._name_.lifecycle @@ -8836,7 +11044,7 @@ The permissions to apply to the secret scope. Permissions are managed via secret **`Type: Map`** -The SQL warehouse definitions for the bundle, where each key is the name of the warehouse. See [\_](/dev-tools/bundles/resources.md#sql_warehouses). +Creates a new SQL warehouse. ```yaml sql_warehouses: @@ -8853,7 +11061,7 @@ sql_warehouses: - - `auto_stop_mins` - Integer - - The amount of time in minutes that a SQL warehouse must be idle (i.e., no RUNNING queries) before it is automatically stopped. Supported values: - Must be >= 0 mins for serverless warehouses - Must be == 0 or >= 10 mins for non-serverless warehouses - 0 indicates no autostop. Defaults to 120 mins + - The amount of time in minutes that a SQL warehouse must be idle (i.e., no RUNNING queries) before it is automatically stopped. Supported values: - Must be == 0 or >= 10 mins - 0 indicates no autostop. Defaults to 120 mins - - `channel` - Map @@ -8861,7 +11069,7 @@ sql_warehouses: - - `cluster_size` - String - - Size of the clusters allocated for this warehouse. Increasing the size of a spark cluster allows you to run larger queries on it. If you want to increase the number of concurrent queries, please tune max_num_clusters. Supported values: - 2X-Small - X-Small - Small - Medium - Large - X-Large - 2X-Large - 3X-Large - 4X-Large + - Size of the clusters allocated for this warehouse. Increasing the size of a spark cluster allows you to run larger queries on it. If you want to increase the number of concurrent queries, please tune max_num_clusters. Supported values: - 2X-Small - X-Small - Small - Medium - Large - X-Large - 2X-Large - 3X-Large - 4X-Large - 5X-Large - - `creator_name` - String @@ -8885,15 +11093,15 @@ sql_warehouses: - - `max_num_clusters` - Integer - - Maximum number of clusters that the autoscaler will create to handle concurrent queries. Supported values: - Must be >= min_num_clusters - Must be <= 30. Defaults to min_clusters if unset. + - Maximum number of clusters that the autoscaler will create to handle concurrent queries. Supported values: - Must be >= min_num_clusters - Must be <= 40. Defaults to min_clusters if unset. - - `min_num_clusters` - Integer - - Minimum number of available clusters that will be maintained for this SQL warehouse. Increasing this will ensure that a larger number of clusters are always running and therefore may reduce the cold start time for new queries. This is similar to reserved vs. revocable cores in a resource manager. Supported values: - Must be > 0 - Must be <= min(max_num_clusters, 30) Defaults to 1 + - Minimum number of available clusters that will be maintained for this SQL warehouse. Increasing this will ensure that a larger number of clusters are always running and therefore may reduce the cold start time for new queries. This is similar to reserved vs. revocable cores in a resource manager. Supported values: - Must be > 0 - Must be <= min(max_num_clusters, 30) Defaults to 1 - - `name` - String - - Logical name for the cluster. Supported values: - Must be unique within an org. - Must be less than 100 characters. + - Logical name for the cluster. Supported values: - Must be unique within an org. - Must be less than 100 characters. - - `permissions` - Sequence @@ -8901,15 +11109,15 @@ sql_warehouses: - - `spot_instance_policy` - String - - Configurations whether the warehouse should use spot instances. + - EndpointSpotInstancePolicy configures whether the endpoint should use spot instances. The breakdown of how the EndpointSpotInstancePolicy converts to per cloud configurations is: +-------+--------------------------------------+--------------------------------+ | Cloud | COST_OPTIMIZED | RELIABILITY_OPTIMIZED | +-------+--------------------------------------+--------------------------------+ | AWS | On Demand Driver with Spot Executors | On Demand Driver and Executors | | AZURE | On Demand Driver and Executors | On Demand Driver and Executors | +-------+--------------------------------------+--------------------------------+ While including "spot" in the enum name may limit the the future extensibility of this field because it limits this enum to denoting "spot or not", this is the field that PM recommends after discussion with customers per SC-48783. - - `tags` - Map - - A set of key-value pairs that will be tagged on all resources (e.g., AWS instances and EBS volumes) associated with this SQL warehouse. Supported values: - Number of tags < 45. See [\_](#sql_warehousesnametags). + - A set of key-value pairs that will be tagged on all resources (e.g., AWS instances and EBS volumes) associated with this SQL warehouse. Supported values: - Number of tags < 45. See [\_](#sql_warehousesnametags). - - `warehouse_type` - String - - Warehouse type: `PRO` or `CLASSIC`. If you want to use serverless compute, you must set to `PRO` and also set the field `enable_serverless_compute` to `true`. + - ::: @@ -8980,7 +11188,7 @@ Lifecycle is a struct that contains the lifecycle settings for a resource. It co - - `level` - String - - + - Permission level - - `service_principal_name` - String @@ -9001,7 +11209,7 @@ A set of key-value pairs that will be tagged on all resources (e.g., AWS instanc with this SQL warehouse. Supported values: - - Number of tags < 45. +- Number of tags < 45. @@ -9047,7 +11255,7 @@ Supported values: **`Type: Map`** -Next field marker: 14 + ```yaml synced_database_tables: @@ -9068,7 +11276,7 @@ synced_database_tables: - - `lifecycle` - Map - - Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. See [\_](#synced_database_tablesnamelifecycle). + - See [\_](#synced_database_tablesnamelifecycle). - - `logical_database_name` - String @@ -9089,7 +11297,7 @@ synced_database_tables: **`Type: Map`** -Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed. + @@ -9170,6 +11378,10 @@ only requires read permissions. - Type - Description +- - `budget_policy_id` + - String + - Budget policy to set on the newly created pipeline. + - - `storage_catalog` - String - This field needs to be specified if the destination catalog is a managed postgres catalog. UC catalog for the pipeline to store intermediate files (checkpoints, event logs etc). This needs to be a standard catalog where the user has permissions to create Delta tables. @@ -9234,7 +11446,7 @@ volumes: - - `volume_type` - String - - The type of the volume. An external volume is located in the specified external location. A managed volume is located in the default location which is specified by the parent schema, or the parent catalog, or the Metastore. [Learn more](https://docs.databricks.com/aws/en/volumes/managed-vs-external) + - ::: @@ -9270,11 +11482,11 @@ For an example bundle that runs a job that writes to a file in Unity Catalog vol - - `principal` - String - - + - The principal (user email address or group name). For deleted principals, `principal` is empty while `principal_id` is populated. - - `privileges` - Sequence - - + - The privileges assigned to the principal. ::: @@ -9283,7 +11495,7 @@ For an example bundle that runs a job that writes to a file in Unity Catalog vol **`Type: Sequence`** - +The privileges assigned to the principal. ### volumes._name_.lifecycle diff --git a/bundle/schema/jsonschema_for_docs.json b/bundle/schema/jsonschema_for_docs.json index bcb6866296..194fa1697e 100644 --- a/bundle/schema/jsonschema_for_docs.json +++ b/bundle/schema/jsonschema_for_docs.json @@ -119,7 +119,7 @@ }, "lifecycle": { "description": "Lifecycle is a struct that contains the lifecycle settings for a resource. It controls the behavior of the resource when it is deployed or destroyed.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Lifecycle", + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.LifecycleWithStarted", "x-since-version": "v0.268.0" }, "name": { @@ -890,6 +890,20 @@ }, "additionalProperties": false }, + "resources.LifecycleWithStarted": { + "type": "object", + "properties": { + "prevent_destroy": { + "description": "Lifecycle setting to prevent the resource from being destroyed.", + "$ref": "#/$defs/bool" + }, + "started": { + "description": "Lifecycle setting to deploy the resource in started mode. Only supported for apps, clusters, and sql_warehouses in direct deployment mode.", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, "resources.MlflowExperiment": { "type": "object", "properties": { @@ -1295,7 +1309,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The pipeline resource allows you to create Delta Live Tables [pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." + "markdownDescription": "The pipeline resource allows you to create Spark Declarative [Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, "resources.PipelinePermission": { "type": "object", @@ -1687,7 +1701,7 @@ "catalog_name", "name" ], - "markdownDescription": "The schema resource type allows you to define Unity Catalog [schemas](https://docs.databricks.com/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations:\n\n- The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema.\n- Only fields supported by the corresponding [Schemas object create API](https://docs.databricks.com/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](https://docs.databricks.com/api/workspace/schemas/update)." + "markdownDescription": "The schema resource type allows you to define Unity Catalog [schemas](https://docs.databricks.com/api/workspace/schemas/create) for tables and other assets in your jobs and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations:\n\n- The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema.\n- Only fields supported by the corresponding [Schemas object create API](https://docs.databricks.com/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](https://docs.databricks.com/api/workspace/schemas/update)." }, "resources.SecretScope": { "type": "object", @@ -2592,8 +2606,13 @@ "config.Workspace": { "type": "object", "properties": { + "account_id": { + "description": "The Databricks account ID.", + "$ref": "#/$defs/string", + "x-since-version": "v0.296.0" + }, "artifact_path": { - "description": "The artifact path to use within the workspace for both deployments and workflow runs", + "description": "The artifact path to use within the workspace for both deployments and job runs", "$ref": "#/$defs/string", "x-since-version": "v0.229.0" }, @@ -2643,7 +2662,7 @@ "x-since-version": "v0.285.0" }, "file_path": { - "description": "The file path to use within the workspace for both deployments and workflow runs", + "description": "The file path to use within the workspace for both deployments and job runs", "$ref": "#/$defs/string", "x-since-version": "v0.229.0" }, @@ -4753,19 +4772,23 @@ "properties": { "alert_id": { "description": "The alert_id is the canonical identifier of the alert.", - "$ref": "#/$defs/string" + "$ref": "#/$defs/string", + "x-since-version": "v0.296.0" }, "subscribers": { "description": "The subscribers receive alert evaluation result notifications after the alert task is completed.\nThe number of subscriptions is limited to 100.", - "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.AlertTaskSubscriber" + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.AlertTaskSubscriber", + "x-since-version": "v0.296.0" }, "warehouse_id": { "description": "The warehouse_id identifies the warehouse settings used by the alert task.", - "$ref": "#/$defs/string" + "$ref": "#/$defs/string", + "x-since-version": "v0.296.0" }, "workspace_path": { "description": "The workspace_path is the path to the alert file in the workspace. The path:\n* must start with \"/Workspace\"\n* must be a normalized path.\nUser has to select only one of alert_id or workspace_path to identify the alert.", - "$ref": "#/$defs/string" + "$ref": "#/$defs/string", + "x-since-version": "v0.296.0" } }, "additionalProperties": false @@ -4775,11 +4798,13 @@ "description": "Represents a subscriber that will receive alert notifications.\nA subscriber can be either a user (via email) or a notification destination (via destination_id).", "properties": { "destination_id": { - "$ref": "#/$defs/string" + "$ref": "#/$defs/string", + "x-since-version": "v0.296.0" }, "user_name": { "description": "A valid workspace email address.", - "$ref": "#/$defs/string" + "$ref": "#/$defs/string", + "x-since-version": "v0.296.0" } }, "additionalProperties": false @@ -6159,7 +6184,8 @@ "properties": { "alert_task": { "description": "New alert v2 task", - "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.AlertTask" + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.AlertTask", + "x-since-version": "v0.296.0" }, "clean_rooms_notebook_task": { "description": "The task runs a [clean rooms](https://docs.databricks.com/clean-rooms/index.html) notebook\nwhen the `clean_rooms_notebook_task` field is present.", @@ -6637,15 +6663,18 @@ "properties": { "catalog_name": { "description": "(Required, Immutable) The name of the catalog for the connector's staging storage location.", - "$ref": "#/$defs/string" + "$ref": "#/$defs/string", + "x-since-version": "v0.296.0" }, "schema_name": { "description": "(Required, Immutable) The name of the schema for the connector's staging storage location.", - "$ref": "#/$defs/string" + "$ref": "#/$defs/string", + "x-since-version": "v0.296.0" }, "volume_name": { "description": "(Optional) The Unity Catalog-compatible name for the storage location.\nThis is the volume to use for the data that is extracted by the connector.\nSpark Declarative Pipelines system will automatically create the volume under the catalog and schema.\nFor Combined Cdc Managed Ingestion pipelines default name for the volume would be :\n__databricks_ingestion_gateway_staging_data-$pipelineId", - "$ref": "#/$defs/string" + "$ref": "#/$defs/string", + "x-since-version": "v0.296.0" } }, "additionalProperties": false, @@ -6801,13 +6830,15 @@ "description": "(Optional) Connector Type for sources. Ex: CDC, Query Based.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.ConnectorType", "x-databricks-preview": "PRIVATE", - "doNotSuggest": true + "doNotSuggest": true, + "x-since-version": "v0.296.0" }, "data_staging_options": { "description": "(Optional) Location of staged data storage. This is required for migration from Cdc Managed Ingestion Pipeline\nwith Gateway pipeline to Combined Cdc Managed Ingestion Pipeline.\nIf not specified, the volume for staged data will be created in catalog and schema/target specified in the\ntop level pipeline definition.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.DataStagingOptions", "x-databricks-preview": "PRIVATE", - "doNotSuggest": true + "doNotSuggest": true, + "x-since-version": "v0.296.0" }, "full_refresh_window": { "description": "(Optional) A window that specifies a set of time ranges for snapshot queries in CDC.", @@ -9474,9 +9505,9 @@ "x-since-version": "v0.229.0" }, "run_as": { - "description": "The identity to use when running Declarative Automation Bundles workflows.", + "description": "The identity to use when running Declarative Automation Bundles resources.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs", - "markdownDescription": "The identity to use when running Declarative Automation Bundles workflows. See [link](https://docs.databricks.com/dev-tools/bundles/run-as.html).", + "markdownDescription": "The identity to use when running Declarative Automation Bundles resources. See [link](https://docs.databricks.com/dev-tools/bundles/run-as.html).", "x-since-version": "v0.229.0" }, "scripts": { From ff8a2dc4c57673ed9ba426789b1991047d196dda Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 17 Apr 2026 12:22:32 +0200 Subject: [PATCH 09/16] Address juliacrawf-db review: fix product naming per style guide MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use "Jobs & Pipelines" for UI navigation references in template READMEs - Remove singular "Spark Declarative Pipeline" — feature name is always plural - Use just "pipeline" when referring to the resource, not the product - Regenerate template outputs and help text Co-authored-by: Isaac --- acceptance/bundle/help/bundle-generate-pipeline/output.txt | 6 +++--- .../bundle/templates/dbt-sql/output/my_dbt_sql/README.md | 2 +- .../default-scala/output/my_default_scala/README.md | 2 +- .../templates/default-sql/output/my_default_sql/README.md | 2 +- .../output/my_jobs_as_code/README.md | 2 +- .../output/my_jobs_as_code/src/sdp_pipeline.ipynb | 2 +- cmd/bundle/generate/pipeline.go | 6 +++--- cmd/workspace/permissions/overrides.go | 5 ++--- .../dbt-sql/template/{{.project_name}}/README.md.tmpl | 2 +- .../default-scala/template/{{.project_name}}/README.md.tmpl | 2 +- .../default-sql/template/{{.project_name}}/README.md.tmpl | 2 +- .../databricks_template_schema.json | 2 +- .../template/{{.project_name}}/README.md.tmpl | 2 +- .../template/{{.project_name}}/src/sdp_pipeline.ipynb.tmpl | 2 +- 14 files changed, 19 insertions(+), 20 deletions(-) diff --git a/acceptance/bundle/help/bundle-generate-pipeline/output.txt b/acceptance/bundle/help/bundle-generate-pipeline/output.txt index c24f552ed4..884cd8614e 100644 --- a/acceptance/bundle/help/bundle-generate-pipeline/output.txt +++ b/acceptance/bundle/help/bundle-generate-pipeline/output.txt @@ -1,13 +1,13 @@ >>> [CLI] bundle generate pipeline --help -Generate bundle configuration for an existing Spark Declarative Pipeline. +Generate bundle configuration for an existing pipeline. -This command downloads an existing Lakeflow Spark Declarative Pipeline's configuration and any associated +This command downloads an existing pipeline's configuration and any associated notebooks, creating bundle files that you can use to deploy the pipeline to other environments or manage it as code. Examples: - # Import a production Lakeflow Spark Declarative Pipeline + # Import a production pipeline databricks bundle generate pipeline --existing-pipeline-id abc123 --key etl_pipeline # Organize files in custom directories diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md index 1793627340..00a91e430c 100644 --- a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md @@ -117,7 +117,7 @@ is optional here.) This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_dbt_sql_job` to your workspace. -You can find that job by opening your workpace and clicking on **Jobs**. +You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. You can also deploy to your production target directly from the command-line. The warehouse, catalog, and schema for that target are configured in `dbt_profiles/profiles.yml`. diff --git a/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md b/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md index 7a02014993..1e5f08854e 100644 --- a/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md +++ b/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md @@ -21,7 +21,7 @@ The 'my_default_scala' project was generated by using the default-scala template This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_default_scala_job` to your workspace. - You can find that job by opening your workspace and clicking on **Jobs**. + You can find that job by opening your workspace and clicking on **Jobs & Pipelines**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md index 5d55c2cf74..551aae1ccf 100644 --- a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md @@ -21,7 +21,7 @@ The 'my_default_sql' project was generated by using the default-sql template. This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_default_sql_job` to your workspace. - You can find that job by opening your workpace and clicking on **Jobs**. + You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md index 1eddbfdc8e..0c16441e51 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md @@ -40,7 +40,7 @@ The 'my_jobs_as_code' project was generated by using the "Jobs as code" template This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_jobs_as_code_job` to your workspace. - You can find that job by opening your workspace and clicking on **Jobs**. + You can find that job by opening your workspace and clicking on **Jobs & Pipelines**. 3. Similarly, to deploy a production copy, type: ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/sdp_pipeline.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/sdp_pipeline.ipynb index eec7c3f2da..7a3350c1f9 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/sdp_pipeline.ipynb +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/sdp_pipeline.ipynb @@ -14,7 +14,7 @@ "source": [ "# SDP pipeline\n", "\n", - "This Lakeflow Spark Declarative Pipeline definition is executed using a pipeline defined in resources/my_jobs_as_code.pipeline.yml." + "This pipeline definition is executed using a pipeline defined in resources/my_jobs_as_code.pipeline.yml." ] }, { diff --git a/cmd/bundle/generate/pipeline.go b/cmd/bundle/generate/pipeline.go index 51d780014b..3eda7dda8e 100644 --- a/cmd/bundle/generate/pipeline.go +++ b/cmd/bundle/generate/pipeline.go @@ -30,14 +30,14 @@ func NewGeneratePipelineCommand() *cobra.Command { cmd := &cobra.Command{ Use: "pipeline", Short: "Generate bundle configuration for a pipeline", - Long: `Generate bundle configuration for an existing Spark Declarative Pipeline. + Long: `Generate bundle configuration for an existing pipeline. -This command downloads an existing Lakeflow Spark Declarative Pipeline's configuration and any associated +This command downloads an existing pipeline's configuration and any associated notebooks, creating bundle files that you can use to deploy the pipeline to other environments or manage it as code. Examples: - # Import a production Lakeflow Spark Declarative Pipeline + # Import a production pipeline databricks bundle generate pipeline --existing-pipeline-id abc123 --key etl_pipeline # Organize files in custom directories diff --git a/cmd/workspace/permissions/overrides.go b/cmd/workspace/permissions/overrides.go index b8f15e6276..a09f1a745a 100644 --- a/cmd/workspace/permissions/overrides.go +++ b/cmd/workspace/permissions/overrides.go @@ -15,9 +15,8 @@ func cmdOverride(cmd *cobra.Command) { * **[Cluster policy permissions](:service:clusterpolicies)** — Manage which users can use cluster policies. - * **[Spark Declarative Pipeline permissions](:service:pipelines)** — Manage - which users can view, manage, run, cancel, or own a Spark Declarative - Pipeline. + * **[Spark Declarative Pipelines permissions](:service:pipelines)** — Manage + which users can view, manage, run, cancel, or own a pipeline. * **[Job permissions](:service:jobs)** — Manage which users can view, manage, trigger, cancel, or own a job. diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl index 1a98bc1917..efd17bdfb0 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl @@ -117,7 +117,7 @@ is optional here.) This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. -You can find that job by opening your workpace and clicking on **Jobs**. +You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. You can also deploy to your production target directly from the command-line. The warehouse, catalog, and schema for that target are configured in `dbt_profiles/profiles.yml`. diff --git a/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl index b0d5520713..80115834b1 100644 --- a/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl @@ -21,7 +21,7 @@ The '{{.project_name}}' project was generated by using the default-scala templat This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workspace and clicking on **Jobs**. + You can find that job by opening your workspace and clicking on **Jobs & Pipelines**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl index 8cb1308aab..28a39f07f1 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl @@ -21,7 +21,7 @@ The '{{.project_name}}' project was generated by using the default-sql template. This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workpace and clicking on **Jobs**. + You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json index c523f61397..4c78ec0635 100644 --- a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json +++ b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json @@ -20,7 +20,7 @@ "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a stub (sample) Spark Declarative Pipeline in '{{.project_name}}{{path_separator}}src'", + "description": "Include a stub (sample) pipeline in '{{.project_name}}{{path_separator}}src'", "order": 3 }, "include_python": { diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl index 6e399d0162..94d6d79539 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl @@ -42,7 +42,7 @@ The '{{.project_name}}' project was generated by using the "Jobs as code" templa This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workspace and clicking on **Jobs**. + You can find that job by opening your workspace and clicking on **Jobs & Pipelines**. 3. Similarly, to deploy a production copy, type: ``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/sdp_pipeline.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/sdp_pipeline.ipynb.tmpl index 5e70f5549c..ebccec33ae 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/sdp_pipeline.ipynb.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/sdp_pipeline.ipynb.tmpl @@ -14,7 +14,7 @@ "source": [ "# SDP pipeline\n", "\n", - "This Lakeflow Spark Declarative Pipeline definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." + "This pipeline definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." ] }, { From 41d318bdf6a28ef077c90a7c4749dcfc0862b3ab Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 17 Apr 2026 13:32:41 +0200 Subject: [PATCH 10/16] Keep full product name inside link: "create [Spark Declarative Pipelines](...)" Co-authored-by: Isaac --- bundle/docsgen/output/reference.md | 2 +- bundle/docsgen/output/resources.md | 4 ++-- bundle/internal/schema/annotations_openapi_overrides.yml | 2 +- bundle/schema/jsonschema.json | 2 +- bundle/schema/jsonschema_for_docs.json | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bundle/docsgen/output/reference.md b/bundle/docsgen/output/reference.md index 3ace4ef309..cb48644fd4 100644 --- a/bundle/docsgen/output/reference.md +++ b/bundle/docsgen/output/reference.md @@ -1,7 +1,7 @@ --- description: 'Configuration reference for databricks.yml' last_update: - date: 2026-04-16 + date: 2026-04-17 --- diff --git a/bundle/docsgen/output/resources.md b/bundle/docsgen/output/resources.md index 464d3bc92b..5ebb3c0d03 100644 --- a/bundle/docsgen/output/resources.md +++ b/bundle/docsgen/output/resources.md @@ -1,7 +1,7 @@ --- description: 'Learn about resources supported by Declarative Automation Bundles and how to configure them.' last_update: - date: 2026-04-16 + date: 2026-04-17 --- @@ -8181,7 +8181,7 @@ Additional metadata for registered model. **`Type: Map`** -The pipeline resource allows you to create Spark Declarative [Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). +The pipeline resource allows you to create [Spark Declarative Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). ```yaml pipelines: diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index 921c35e55a..9ab06345ed 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -328,7 +328,7 @@ github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: github.com/databricks/cli/bundle/config/resources.Pipeline: "_": "markdown_description": |- - The pipeline resource allows you to create Spark Declarative [Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). + The pipeline resource allows you to create [Spark Declarative Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). "markdown_examples": |- The following example defines a pipeline with the resource key `hello-pipeline`: diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 9284467c8f..24f098581c 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -1322,7 +1322,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The pipeline resource allows you to create Spark Declarative [Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." + "markdownDescription": "The pipeline resource allows you to create [Spark Declarative Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, { "type": "string", diff --git a/bundle/schema/jsonschema_for_docs.json b/bundle/schema/jsonschema_for_docs.json index 194fa1697e..aefc835c3b 100644 --- a/bundle/schema/jsonschema_for_docs.json +++ b/bundle/schema/jsonschema_for_docs.json @@ -1309,7 +1309,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The pipeline resource allows you to create Spark Declarative [Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." + "markdownDescription": "The pipeline resource allows you to create [Spark Declarative Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, "resources.PipelinePermission": { "type": "object", From f6b016ef17682df73781b752fe757077bc783171 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Fri, 17 Apr 2026 13:38:24 +0200 Subject: [PATCH 11/16] Apply suggestions from code review Co-authored-by: Julia Crawford (Databricks) --- acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md | 2 +- .../templates/default-scala/output/my_default_scala/README.md | 2 +- .../templates/default-sql/output/my_default_sql/README.md | 2 +- .../experimental-jobs-as-code/output/my_jobs_as_code/README.md | 2 +- .../templates/dbt-sql/template/{{.project_name}}/README.md.tmpl | 2 +- .../default-scala/template/{{.project_name}}/README.md.tmpl | 2 +- .../default-sql/template/{{.project_name}}/README.md.tmpl | 2 +- .../template/{{.project_name}}/README.md.tmpl | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md index 1793627340..00a91e430c 100644 --- a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md @@ -117,7 +117,7 @@ is optional here.) This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_dbt_sql_job` to your workspace. -You can find that job by opening your workpace and clicking on **Jobs**. +You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. You can also deploy to your production target directly from the command-line. The warehouse, catalog, and schema for that target are configured in `dbt_profiles/profiles.yml`. diff --git a/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md b/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md index 7a02014993..1e5f08854e 100644 --- a/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md +++ b/acceptance/bundle/templates/default-scala/output/my_default_scala/README.md @@ -21,7 +21,7 @@ The 'my_default_scala' project was generated by using the default-scala template This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_default_scala_job` to your workspace. - You can find that job by opening your workspace and clicking on **Jobs**. + You can find that job by opening your workspace and clicking on **Jobs & Pipelines**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md index 5d55c2cf74..551aae1ccf 100644 --- a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md @@ -21,7 +21,7 @@ The 'my_default_sql' project was generated by using the default-sql template. This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_default_sql_job` to your workspace. - You can find that job by opening your workpace and clicking on **Jobs**. + You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md index 1eddbfdc8e..0c16441e51 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md @@ -40,7 +40,7 @@ The 'my_jobs_as_code' project was generated by using the "Jobs as code" template This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_jobs_as_code_job` to your workspace. - You can find that job by opening your workspace and clicking on **Jobs**. + You can find that job by opening your workspace and clicking on **Jobs & Pipelines**. 3. Similarly, to deploy a production copy, type: ``` diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl index 1a98bc1917..efd17bdfb0 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl @@ -117,7 +117,7 @@ is optional here.) This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. -You can find that job by opening your workpace and clicking on **Jobs**. +You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. You can also deploy to your production target directly from the command-line. The warehouse, catalog, and schema for that target are configured in `dbt_profiles/profiles.yml`. diff --git a/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl index b0d5520713..80115834b1 100644 --- a/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-scala/template/{{.project_name}}/README.md.tmpl @@ -21,7 +21,7 @@ The '{{.project_name}}' project was generated by using the default-scala templat This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workspace and clicking on **Jobs**. + You can find that job by opening your workspace and clicking on **Jobs & Pipelines**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl index 8cb1308aab..28a39f07f1 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl @@ -21,7 +21,7 @@ The '{{.project_name}}' project was generated by using the default-sql template. This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workpace and clicking on **Jobs**. + You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. 4. Similarly, to deploy a production copy, type: ``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl index 6e399d0162..94d6d79539 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl @@ -42,7 +42,7 @@ The '{{.project_name}}' project was generated by using the "Jobs as code" templa This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workspace and clicking on **Jobs**. + You can find that job by opening your workspace and clicking on **Jobs & Pipelines**. 3. Similarly, to deploy a production copy, type: ``` From 3891f3f0146d7d68ba146b6cc4b96449c910296e Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 17 Apr 2026 13:40:50 +0200 Subject: [PATCH 12/16] Add Lakeflow prefix to remaining Spark Declarative Pipelines references Per Julia's review: prefer "Lakeflow Spark Declarative Pipelines" over bare "Spark Declarative Pipelines" to avoid confusion. Co-authored-by: Isaac --- .../output/my_jobs_as_code/pyproject.toml | 2 +- bundle/config/mutator/resourcemutator/run_as.go | 2 +- bundle/docsgen/output/resources.md | 2 +- bundle/internal/schema/annotations_openapi_overrides.yml | 2 +- bundle/schema/jsonschema.json | 2 +- bundle/schema/jsonschema_for_docs.json | 2 +- cmd/workspace/permissions/overrides.go | 2 +- .../template/{{.project_name}}/pyproject.toml.tmpl | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml index 06b7cde899..393b59b1c3 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml @@ -32,7 +32,7 @@ where = ["src"] dev-dependencies = [ "databricks-bundles==x.y.z", - ## Add code completion support for Spark Declarative Pipelines + ## Add code completion support for Lakeflow Spark Declarative Pipelines # "databricks-dlt", ## databricks-connect can be used to run parts of this project locally. diff --git a/bundle/config/mutator/resourcemutator/run_as.go b/bundle/config/mutator/resourcemutator/run_as.go index 074b6d07db..312c672a5b 100644 --- a/bundle/config/mutator/resourcemutator/run_as.go +++ b/bundle/config/mutator/resourcemutator/run_as.go @@ -228,7 +228,7 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { return diag.Diagnostics{ { Severity: diag.Warning, - Summary: "You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.", + Summary: "You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Lakeflow Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.", Paths: []dyn.Path{dyn.MustPathFromString("experimental.use_legacy_run_as")}, Locations: b.Config.GetLocations("experimental.use_legacy_run_as"), }, diff --git a/bundle/docsgen/output/resources.md b/bundle/docsgen/output/resources.md index 5ebb3c0d03..5df9dce757 100644 --- a/bundle/docsgen/output/resources.md +++ b/bundle/docsgen/output/resources.md @@ -8181,7 +8181,7 @@ Additional metadata for registered model. **`Type: Map`** -The pipeline resource allows you to create [Spark Declarative Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). +The pipeline resource allows you to create [Lakeflow Spark Declarative Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). ```yaml pipelines: diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index 9ab06345ed..d125bef6e8 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -328,7 +328,7 @@ github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: github.com/databricks/cli/bundle/config/resources.Pipeline: "_": "markdown_description": |- - The pipeline resource allows you to create [Spark Declarative Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). + The pipeline resource allows you to create [Lakeflow Spark Declarative Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). "markdown_examples": |- The following example defines a pipeline with the resource key `hello-pipeline`: diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 24f098581c..edf654f2ac 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -1322,7 +1322,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The pipeline resource allows you to create [Spark Declarative Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." + "markdownDescription": "The pipeline resource allows you to create [Lakeflow Spark Declarative Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, { "type": "string", diff --git a/bundle/schema/jsonschema_for_docs.json b/bundle/schema/jsonschema_for_docs.json index aefc835c3b..99247e4fb6 100644 --- a/bundle/schema/jsonschema_for_docs.json +++ b/bundle/schema/jsonschema_for_docs.json @@ -1309,7 +1309,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The pipeline resource allows you to create [Spark Declarative Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." + "markdownDescription": "The pipeline resource allows you to create [Lakeflow Spark Declarative Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, "resources.PipelinePermission": { "type": "object", diff --git a/cmd/workspace/permissions/overrides.go b/cmd/workspace/permissions/overrides.go index a09f1a745a..5afdbe0c2d 100644 --- a/cmd/workspace/permissions/overrides.go +++ b/cmd/workspace/permissions/overrides.go @@ -15,7 +15,7 @@ func cmdOverride(cmd *cobra.Command) { * **[Cluster policy permissions](:service:clusterpolicies)** — Manage which users can use cluster policies. - * **[Spark Declarative Pipelines permissions](:service:pipelines)** — Manage + * **[Lakeflow Spark Declarative Pipelines permissions](:service:pipelines)** — Manage which users can view, manage, run, cancel, or own a pipeline. * **[Job permissions](:service:jobs)** — Manage which users can view, diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl index 63b28b0994..b1898234d9 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl @@ -41,7 +41,7 @@ py-modules = [] dev-dependencies = [ "databricks-bundles=={{template "latest_databricks_bundles_version"}}", - ## Add code completion support for Spark Declarative Pipelines + ## Add code completion support for Lakeflow Spark Declarative Pipelines # "databricks-dlt", ## databricks-connect can be used to run parts of this project locally. From d699554c1815cfce01c49192537b1ac81b2fe3e5 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 17 Apr 2026 13:45:12 +0200 Subject: [PATCH 13/16] Fix template description and regenerate acceptance test outputs - Change "stub (sample) pipeline" to "stub (sample) ETL pipeline" - Regenerate acceptance tests for Lakeflow prefix in run_as warning Co-authored-by: Isaac --- .../bundle/run_as/pipelines_legacy/output.txt | 2 +- .../telemetry/deploy-experimental/output.txt | 63 ++++--------------- .../databricks_template_schema.json | 2 +- 3 files changed, 15 insertions(+), 52 deletions(-) diff --git a/acceptance/bundle/run_as/pipelines_legacy/output.txt b/acceptance/bundle/run_as/pipelines_legacy/output.txt index 1462caf861..0915177bc3 100644 --- a/acceptance/bundle/run_as/pipelines_legacy/output.txt +++ b/acceptance/bundle/run_as/pipelines_legacy/output.txt @@ -1,6 +1,6 @@ >>> [CLI] bundle validate -o json -Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. +Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Lakeflow Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. at experimental.use_legacy_run_as in databricks.yml:8:22 diff --git a/acceptance/bundle/telemetry/deploy-experimental/output.txt b/acceptance/bundle/telemetry/deploy-experimental/output.txt index a65a9b2e8a..49b5722473 100644 --- a/acceptance/bundle/telemetry/deploy-experimental/output.txt +++ b/acceptance/bundle/telemetry/deploy-experimental/output.txt @@ -1,56 +1,19 @@ >>> [CLI] bundle deploy -Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. +Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Lakeflow Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. at experimental.use_legacy_run_as in databricks.yml:5:22 Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... -Deploying resources... -Updating deployment state... -Deployment complete! - ->>> cat out.requests.txt -{ - "bool_values": [ - { - "key": "local.cache.attempt", - "value": true - }, - { - "key": "local.cache.miss", - "value": true - }, - { - "key": "experimental.use_legacy_run_as", - "value": true - }, - { - "key": "run_as_set", - "value": true - }, - { - "key": "presets_name_prefix_is_set", - "value": false - }, - { - "key": "python_wheel_wrapper_is_set", - "value": false - }, - { - "key": "skip_artifact_cleanup", - "value": false - }, - { - "key": "has_serverless_compute", - "value": false - }, - { - "key": "has_classic_job_compute", - "value": false - }, - { - "key": "has_classic_interactive_compute", - "value": true - } - ] -} +Error: terraform init: exit status 1 + +Error: Failed to install provider + +Error while installing databricks/databricks v1.111.0: provider binary not +found: could not find executable file starting with +terraform-provider-databricks + + + + +Exit code: 1 diff --git a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json index 4c78ec0635..ace1cafdc5 100644 --- a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json +++ b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json @@ -20,7 +20,7 @@ "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a stub (sample) pipeline in '{{.project_name}}{{path_separator}}src'", + "description": "Include a stub (sample) ETL pipeline in '{{.project_name}}{{path_separator}}src'", "order": 3 }, "include_python": { From 220b42774c841d74df3f45b8654334314547f273 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 17 Apr 2026 13:50:02 +0200 Subject: [PATCH 14/16] Revert all changes to hidden experimental-jobs-as-code template MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This template is hidden/unlisted — renaming properties would break existing users without benefit. Co-authored-by: Isaac --- .../templates/experimental-jobs-as-code/input.json | 2 +- .../templates/experimental-jobs-as-code/output.txt | 2 +- .../output/my_jobs_as_code/README.md | 2 +- .../output/my_jobs_as_code/pyproject.toml | 2 +- .../resources/my_jobs_as_code_pipeline.py | 2 +- .../src/{sdp_pipeline.ipynb => dlt_pipeline.ipynb} | 6 +++--- .../output/my_jobs_as_code/src/notebook.ipynb | 2 +- .../databricks_template_schema.json | 4 ++-- .../experimental-jobs-as-code/template/__preamble.tmpl | 8 ++++---- .../template/{{.project_name}}/README.md.tmpl | 2 +- .../template/{{.project_name}}/pyproject.toml.tmpl | 2 +- .../resources/{{.project_name}}_job.py.tmpl | 10 +++++----- .../resources/{{.project_name}}_pipeline.py.tmpl | 2 +- ...sdp_pipeline.ipynb.tmpl => dlt_pipeline.ipynb.tmpl} | 6 +++--- .../template/{{.project_name}}/src/notebook.ipynb.tmpl | 2 +- 15 files changed, 27 insertions(+), 27 deletions(-) rename acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/{sdp_pipeline.ipynb => dlt_pipeline.ipynb} (89%) rename libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{sdp_pipeline.ipynb.tmpl => dlt_pipeline.ipynb.tmpl} (91%) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/input.json b/acceptance/bundle/templates/experimental-jobs-as-code/input.json index b67fd93769..5c5fcfc385 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/input.json +++ b/acceptance/bundle/templates/experimental-jobs-as-code/input.json @@ -2,5 +2,5 @@ "project_name": "my_jobs_as_code", "include_notebook": "yes", "include_python": "yes", - "include_sdp": "yes" + "include_dlt": "yes" } diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt index 19931a2f73..089a5c53a4 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt @@ -95,7 +95,7 @@ Warning: Ignoring Databricks CLI version constraint for development build. Requi "libraries": [ { "notebook": { - "path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/files/src/sdp_pipeline" + "path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/files/src/dlt_pipeline" } } ], diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md index 0c16441e51..6bfac07da0 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md @@ -40,7 +40,7 @@ The 'my_jobs_as_code' project was generated by using the "Jobs as code" template This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_jobs_as_code_job` to your workspace. - You can find that job by opening your workspace and clicking on **Jobs & Pipelines**. + You can find that job by opening your workspace and clicking on **Workflows**. 3. Similarly, to deploy a production copy, type: ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml index 393b59b1c3..4478dace35 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml @@ -32,7 +32,7 @@ where = ["src"] dev-dependencies = [ "databricks-bundles==x.y.z", - ## Add code completion support for Lakeflow Spark Declarative Pipelines + ## Add code completion support for DLT # "databricks-dlt", ## databricks-connect can be used to run parts of this project locally. diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py index 5e86c5c232..9d83e573a9 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py @@ -9,7 +9,7 @@ "libraries": [ { "notebook": { - "path": "src/sdp_pipeline.ipynb", + "path": "src/dlt_pipeline.ipynb", }, }, ], diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/sdp_pipeline.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb similarity index 89% rename from acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/sdp_pipeline.ipynb rename to acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb index 7a3350c1f9..d651c00422 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/sdp_pipeline.ipynb +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb @@ -12,9 +12,9 @@ } }, "source": [ - "# SDP pipeline\n", + "# DLT pipeline\n", "\n", - "This pipeline definition is executed using a pipeline defined in resources/my_jobs_as_code.pipeline.yml." + "This Lakeflow Spark Declarative Pipeline definition is executed using a pipeline defined in resources/my_jobs_as_code.pipeline.yml." ] }, { @@ -72,7 +72,7 @@ "notebookMetadata": { "pythonIndentUnit": 2 }, - "notebookName": "sdp_pipeline", + "notebookName": "dlt_pipeline", "widgets": {} }, "kernelspec": { diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb index 247706b44f..227c7cc558 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb @@ -14,7 +14,7 @@ "source": [ "# Default notebook\n", "\n", - "This default notebook is executed using Databricks Jobs as defined in resources/my_jobs_as_code.job.yml." + "This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml." ] }, { diff --git a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json index ace1cafdc5..574ce59259 100644 --- a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json +++ b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json @@ -16,11 +16,11 @@ "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", "order": 2 }, - "include_sdp": { + "include_dlt": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a stub (sample) ETL pipeline in '{{.project_name}}{{path_separator}}src'", + "description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'", "order": 3 }, "include_python": { diff --git a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl index d2bbe23e2d..bd284b0252 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl @@ -4,7 +4,7 @@ This file only contains template directives; it is skipped for the actual output {{skip "__preamble"}} -{{$notSDP := not (eq .include_sdp "yes")}} +{{$notDLT := not (eq .include_dlt "yes")}} {{$notNotebook := not (eq .include_notebook "yes")}} {{$notPython := not (eq .include_python "yes")}} @@ -13,8 +13,8 @@ This file only contains template directives; it is skipped for the actual output {{skip "{{.project_name}}/tests/main_test.py"}} {{end}} -{{if $notSDP}} - {{skip "{{.project_name}}/src/sdp_pipeline.ipynb"}} +{{if $notDLT}} + {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}} {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.py"}} {{end}} @@ -22,7 +22,7 @@ This file only contains template directives; it is skipped for the actual output {{skip "{{.project_name}}/src/notebook.ipynb"}} {{end}} -{{if (and $notSDP $notNotebook $notPython)}} +{{if (and $notDLT $notNotebook $notPython)}} {{skip "{{.project_name}}/resources/{{.project_name}}_job.py"}} {{else}} {{skip "{{.project_name}}/resources/.gitkeep"}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl index 94d6d79539..37e7040846 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl @@ -42,7 +42,7 @@ The '{{.project_name}}' project was generated by using the "Jobs as code" templa This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workspace and clicking on **Jobs & Pipelines**. + You can find that job by opening your workspace and clicking on **Workflows**. 3. Similarly, to deploy a production copy, type: ``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl index b1898234d9..4cb0e6d9ee 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl @@ -41,7 +41,7 @@ py-modules = [] dev-dependencies = [ "databricks-bundles=={{template "latest_databricks_bundles_version"}}", - ## Add code completion support for Lakeflow Spark Declarative Pipelines + ## Add code completion support for DLT # "databricks-dlt", ## databricks-connect can be used to run parts of this project locally. diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl index e0f3322fcf..ff554c45c5 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl @@ -1,11 +1,11 @@ -{{$include_sdp := "no" -}} +{{$include_dlt := "no" -}} from databricks.bundles.jobs import Job """ The main job for {{.project_name}}. -{{- /* Clarify what this job is for for SDP-only users. */}} -{{if and (eq $include_sdp "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} +{{- /* Clarify what this job is for for DLT-only users. */}} +{{if and (eq $include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} This job runs {{.project_name}}_pipeline on a schedule. {{end -}} """ @@ -37,7 +37,7 @@ This job runs {{.project_name}}_pipeline on a schedule. }, }, {{- end -}} - {{- if (eq $include_sdp "yes") -}} + {{- if (eq $include_dlt "yes") -}} {{- "\n " -}} { "task_key": "refresh_pipeline", @@ -58,7 +58,7 @@ This job runs {{.project_name}}_pipeline on a schedule. {{- "\n " -}} { "task_key": "main_task", - {{- if (eq $include_sdp "yes") }} + {{- if (eq $include_dlt "yes") }} "depends_on": [ { "task_key": "refresh_pipeline", diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl index 73c30ab543..c8579ae659 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl @@ -13,7 +13,7 @@ from databricks.bundles.pipelines import Pipeline "libraries": [ { "notebook": { - "path": "src/sdp_pipeline.ipynb", + "path": "src/dlt_pipeline.ipynb", }, }, ], diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/sdp_pipeline.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl similarity index 91% rename from libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/sdp_pipeline.ipynb.tmpl rename to libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl index ebccec33ae..62c4fb1f12 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/sdp_pipeline.ipynb.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -12,9 +12,9 @@ } }, "source": [ - "# SDP pipeline\n", + "# DLT pipeline\n", "\n", - "This pipeline definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." + "This Lakeflow Spark Declarative Pipeline definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." ] }, { @@ -86,7 +86,7 @@ "notebookMetadata": { "pythonIndentUnit": 2 }, - "notebookName": "sdp_pipeline", + "notebookName": "dlt_pipeline", "widgets": {} }, "kernelspec": { diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl index fbc12f872e..6782a053ba 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -14,7 +14,7 @@ "source": [ "# Default notebook\n", "\n", - "This default notebook is executed using Databricks Jobs as defined in resources/{{.project_name}}.job.yml." + "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." ] }, { From 1aa1a318d301d2991e0c3ca2e6cf2389666c4696 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 17 Apr 2026 13:55:24 +0200 Subject: [PATCH 15/16] Simplify pipeline references: use generic 'pipeline' instead of product name - run_as warning: "pipelines in your DABs project" instead of product name - Pipeline resource description: "create pipelines" instead of product name - Regenerate schema, docs, and acceptance test outputs Co-authored-by: Isaac --- acceptance/bundle/run_as/pipelines_legacy/output.txt | 2 +- acceptance/bundle/telemetry/deploy-experimental/output.txt | 2 +- bundle/config/mutator/resourcemutator/run_as.go | 2 +- bundle/docsgen/output/resources.md | 2 +- bundle/internal/schema/annotations_openapi_overrides.yml | 2 +- bundle/schema/jsonschema.json | 2 +- bundle/schema/jsonschema_for_docs.json | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/acceptance/bundle/run_as/pipelines_legacy/output.txt b/acceptance/bundle/run_as/pipelines_legacy/output.txt index 0915177bc3..cfd58c5e86 100644 --- a/acceptance/bundle/run_as/pipelines_legacy/output.txt +++ b/acceptance/bundle/run_as/pipelines_legacy/output.txt @@ -1,6 +1,6 @@ >>> [CLI] bundle validate -o json -Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Lakeflow Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. +Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the pipelines in your DABs project as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. at experimental.use_legacy_run_as in databricks.yml:8:22 diff --git a/acceptance/bundle/telemetry/deploy-experimental/output.txt b/acceptance/bundle/telemetry/deploy-experimental/output.txt index 49b5722473..44dfd68da7 100644 --- a/acceptance/bundle/telemetry/deploy-experimental/output.txt +++ b/acceptance/bundle/telemetry/deploy-experimental/output.txt @@ -1,6 +1,6 @@ >>> [CLI] bundle deploy -Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Lakeflow Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. +Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the pipelines in your DABs project as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. at experimental.use_legacy_run_as in databricks.yml:5:22 diff --git a/bundle/config/mutator/resourcemutator/run_as.go b/bundle/config/mutator/resourcemutator/run_as.go index 312c672a5b..1bed0b9be1 100644 --- a/bundle/config/mutator/resourcemutator/run_as.go +++ b/bundle/config/mutator/resourcemutator/run_as.go @@ -228,7 +228,7 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { return diag.Diagnostics{ { Severity: diag.Warning, - Summary: "You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the Lakeflow Spark Declarative Pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.", + Summary: "You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the pipelines in your DABs project as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.", Paths: []dyn.Path{dyn.MustPathFromString("experimental.use_legacy_run_as")}, Locations: b.Config.GetLocations("experimental.use_legacy_run_as"), }, diff --git a/bundle/docsgen/output/resources.md b/bundle/docsgen/output/resources.md index 5df9dce757..8277b4c30d 100644 --- a/bundle/docsgen/output/resources.md +++ b/bundle/docsgen/output/resources.md @@ -8181,7 +8181,7 @@ Additional metadata for registered model. **`Type: Map`** -The pipeline resource allows you to create [Lakeflow Spark Declarative Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). +This resource allows you to create [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). ```yaml pipelines: diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index d125bef6e8..68df739096 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -328,7 +328,7 @@ github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: github.com/databricks/cli/bundle/config/resources.Pipeline: "_": "markdown_description": |- - The pipeline resource allows you to create [Lakeflow Spark Declarative Pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). + This resource allows you to create [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/dlt/index.md). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). "markdown_examples": |- The following example defines a pipeline with the resource key `hello-pipeline`: diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index edf654f2ac..c930e30d62 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -1322,7 +1322,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The pipeline resource allows you to create [Lakeflow Spark Declarative Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." + "markdownDescription": "This resource allows you to create [pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, { "type": "string", diff --git a/bundle/schema/jsonschema_for_docs.json b/bundle/schema/jsonschema_for_docs.json index 99247e4fb6..6e6e3116e5 100644 --- a/bundle/schema/jsonschema_for_docs.json +++ b/bundle/schema/jsonschema_for_docs.json @@ -1309,7 +1309,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The pipeline resource allows you to create [Lakeflow Spark Declarative Pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." + "markdownDescription": "This resource allows you to create [pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/dlt/index.html). For a tutorial that uses the Declarative Automation Bundles template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, "resources.PipelinePermission": { "type": "object", From 8eb6e9de1f298dc07b540339fbaeca61669e3710 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 17 Apr 2026 14:10:35 +0200 Subject: [PATCH 16/16] Fix deploy-experimental acceptance test output for direct engine variant Co-authored-by: Isaac --- .../telemetry/deploy-experimental/output.txt | 61 +++++++++++++++---- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/acceptance/bundle/telemetry/deploy-experimental/output.txt b/acceptance/bundle/telemetry/deploy-experimental/output.txt index 44dfd68da7..cf7a2358da 100644 --- a/acceptance/bundle/telemetry/deploy-experimental/output.txt +++ b/acceptance/bundle/telemetry/deploy-experimental/output.txt @@ -5,15 +5,52 @@ Warning: You are using the legacy mode of run_as. The support for this mode is e in databricks.yml:5:22 Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... -Error: terraform init: exit status 1 - -Error: Failed to install provider - -Error while installing databricks/databricks v1.111.0: provider binary not -found: could not find executable file starting with -terraform-provider-databricks - - - - -Exit code: 1 +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> cat out.requests.txt +{ + "bool_values": [ + { + "key": "local.cache.attempt", + "value": true + }, + { + "key": "local.cache.miss", + "value": true + }, + { + "key": "experimental.use_legacy_run_as", + "value": true + }, + { + "key": "run_as_set", + "value": true + }, + { + "key": "presets_name_prefix_is_set", + "value": false + }, + { + "key": "python_wheel_wrapper_is_set", + "value": false + }, + { + "key": "skip_artifact_cleanup", + "value": false + }, + { + "key": "has_serverless_compute", + "value": false + }, + { + "key": "has_classic_job_compute", + "value": false + }, + { + "key": "has_classic_interactive_compute", + "value": true + } + ] +}