diff --git a/.codegen.json b/.codegen.json index 175688cecc..b8bb2b3ec5 100644 --- a/.codegen.json +++ b/.codegen.json @@ -7,7 +7,6 @@ "python/databricks/bundles/version.py": "__version__ = \"$VERSION\"", "python/pyproject.toml": "version = \"$VERSION\"", "python/uv.lock": "name = \"databricks-bundles\"\nversion = \"$VERSION\"", - "libs/template/templates/experimental-jobs-as-code/library/versions.tmpl": "{{define \"latest_databricks_bundles_version\" -}}$VERSION{{- end}}", "libs/template/templates/default/library/versions.tmpl": "{{define \"latest_databricks_bundles_version\" -}}$VERSION{{- end}}" }, "toolchain": { diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 0e4d6de08f..52d6128834 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -10,6 +10,8 @@ ### Bundles +* Remove `experimental-jobs-as-code` template, superseded by `pydabs` ([#4999](https://github.com/databricks/cli/pull/4999)). + ### Dependency updates * Bump `github.com/databricks/databricks-sdk-go` from v0.126.0 to v0.127.0 ([#4984](https://github.com/databricks/cli/pull/4984)). diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/input.json b/acceptance/bundle/templates/experimental-jobs-as-code/input.json deleted file mode 100644 index 5c5fcfc385..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/input.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "project_name": "my_jobs_as_code", - "include_notebook": "yes", - "include_python": "yes", - "include_dlt": "yes" -} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/out.test.toml b/acceptance/bundle/templates/experimental-jobs-as-code/out.test.toml deleted file mode 100644 index d560f1de04..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/out.test.toml +++ /dev/null @@ -1,5 +0,0 @@ -Local = true -Cloud = false - -[EnvMatrix] - DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt deleted file mode 100644 index 089a5c53a4..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt +++ /dev/null @@ -1,118 +0,0 @@ - ->>> [CLI] bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output - -Welcome to (EXPERIMENTAL) "Jobs as code" template for Declarative Automation Bundles! -Workspace to use (auto-detected, edit in 'my_jobs_as_code/databricks.yml'): [DATABRICKS_URL] - -✨ Your new project has been created in the 'my_jobs_as_code' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. - ->>> [CLI] bundle validate -t dev --output json -Warning: Ignoring Databricks CLI version constraint for development build. Required: >= 0.248.0, current: [DEV_VERSION] - -{ - "jobs": { - "my_jobs_as_code_job": { - "deployment": { - "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/state/metadata.json" - }, - "edit_mode": "UI_LOCKED", - "format": "MULTI_TASK", - "job_clusters": [ - { - "job_cluster_key": "job_cluster", - "new_cluster": { - "autoscale": { - "max_workers": 4, - "min_workers": 1 - }, - "data_security_mode": "SINGLE_USER", - "node_type_id": "[NODE_TYPE_ID]", - "spark_version": "15.4.x-scala2.12" - } - } - ], - "max_concurrent_runs": 4, - "name": "[dev [USERNAME]] my_jobs_as_code_job", - "queue": { - "enabled": true - }, - "tags": { - "dev": "[USERNAME]" - }, - "tasks": [ - { - "depends_on": [ - { - "task_key": "notebook_task" - } - ], - "job_cluster_key": "job_cluster", - "libraries": [ - { - "whl": "dist/*.whl" - } - ], - "python_wheel_task": { - "entry_point": "main", - "package_name": "my_jobs_as_code" - }, - "task_key": "main_task" - }, - { - "job_cluster_key": "job_cluster", - "notebook_task": { - "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/files/src/notebook" - }, - "task_key": "notebook_task" - } - ], - "trigger": { - "pause_status": "PAUSED", - "periodic": { - "interval": 1, - "unit": "DAYS" - } - } - } - }, - "pipelines": { - "my_jobs_as_code_pipeline": { - "catalog": "catalog_name", - "channel": "CURRENT", - "configuration": { - "bundle.sourcePath": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/files/src" - }, - "deployment": { - "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/state/metadata.json" - }, - "development": true, - "edition": "ADVANCED", - "libraries": [ - { - "notebook": { - "path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/files/src/dlt_pipeline" - } - } - ], - "name": "[dev [USERNAME]] my_jobs_as_code_pipeline", - "tags": { - "dev": "[USERNAME]" - }, - "target": "my_jobs_as_code_dev" - } - } -} - ->>> unzip -Z1 dist/my_jobs_as_code-0.0.1-py3-none-any.whl -my_jobs_as_code/__init__.py -my_jobs_as_code/main.py -my_jobs_as_code-0.0.1.dist-info/METADATA -my_jobs_as_code-0.0.1.dist-info/WHEEL -my_jobs_as_code-0.0.1.dist-info/entry_points.txt -my_jobs_as_code-0.0.1.dist-info/top_level.txt -my_jobs_as_code-0.0.1.dist-info/RECORD diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md deleted file mode 100644 index 6bfac07da0..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md +++ /dev/null @@ -1,58 +0,0 @@ -# my_jobs_as_code - -The 'my_jobs_as_code' project was generated by using the "Jobs as code" template. - -## Prerequisites - -1. Install Databricks CLI 0.238 or later. - See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). - -2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). - We use uv to create a virtual environment and install the required dependencies. - -3. Authenticate to your Databricks workspace if you have not done so already: - ``` - $ databricks configure - ``` - -4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from - https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for - **Databricks Connect** for instructions on running the included Python code from a different IDE. - -5. For documentation on the Declarative Automation Bundles format used - for this project, and for CI/CD configuration, see - https://docs.databricks.com/dev-tools/bundles/index.html. - -## Deploy and run jobs - -1. Create a new virtual environment and install the required dependencies: - ``` - $ uv sync - ``` - -2. To deploy the bundle to the development target: - ``` - $ databricks bundle deploy --target dev - ``` - - *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* - - This deploys everything that's defined for this project. - For example, the default template would deploy a job called - `[dev yourname] my_jobs_as_code_job` to your workspace. - You can find that job by opening your workspace and clicking on **Workflows**. - -3. Similarly, to deploy a production copy, type: - ``` - $ databricks bundle deploy --target prod - ``` - - Note that the default job from the template has a schedule that runs every day - (defined in resources/my_jobs_as_code_job.py). The schedule - is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( - https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). - -4. To run a job: - ``` - $ databricks bundle run - ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml deleted file mode 100644 index b910ecd913..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml +++ /dev/null @@ -1,48 +0,0 @@ -# This is a Databricks asset bundle definition for my_jobs_as_code. -# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. -bundle: - name: my_jobs_as_code - uuid: [UUID] - databricks_cli_version: ">= 0.248.0" - -python: - # Activate virtual environment before loading resources defined in Python. - # If disabled, defaults to using the Python interpreter available in the current shell. - venv_path: .venv - # Functions called to load resources defined in Python. See resources/__init__.py - resources: - - "resources:load_resources" - -artifacts: - default: - type: whl - path: . - # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) - # to ensure that changes to wheel package are picked up when used on all-purpose clusters - build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build - -include: - - resources/*.yml - -targets: - dev: - # The default target uses 'mode: development' to create a development copy. - # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default. - # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. - mode: development - default: true - workspace: - host: [DATABRICKS_URL] - - prod: - mode: production - workspace: - host: [DATABRICKS_URL] - # We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy. - root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} - permissions: - - user_name: [USERNAME] - level: CAN_MANAGE - run_as: - user_name: [USERNAME] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep deleted file mode 100644 index fa25d2745e..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep +++ /dev/null @@ -1,22 +0,0 @@ -# Fixtures - -This folder is reserved for fixtures, such as CSV files. - -Below is an example of how to load fixtures as a data frame: - -``` -import pandas as pd -import os - -def get_absolute_path(*relative_parts): - if 'dbutils' in globals(): - base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore - path = os.path.normpath(os.path.join(base_dir, *relative_parts)) - return path if path.startswith("/Workspace") else "/Workspace" + path - else: - return os.path.join(*relative_parts) - -csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") -df = pd.read_csv(csv_file) -display(df) -``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/out.gitignore b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/out.gitignore deleted file mode 100644 index 0dab7f4995..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/out.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -.databricks/ -build/ -dist/ -__pycache__/ -*.egg-info -.venv/ -scratch/** -!scratch/README.md diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml deleted file mode 100644 index 4478dace35..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml +++ /dev/null @@ -1,49 +0,0 @@ -[build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" - -[project] -name = "my_jobs_as_code" -requires-python = ">=3.10" -description = "wheel file based on my_jobs_as_code" - -# Dependencies in case the output wheel file is used as a library dependency. -# For defining dependencies, when this package is used in Databricks, see: -# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html -# -# Example: -# dependencies = [ -# "requests==x.y.z", -# ] -dependencies = [ -] - -# see setup.py -dynamic = ["version"] - -[project.entry-points.packages] -main = "my_jobs_as_code.main:main" - -[tool.setuptools.packages.find] -where = ["src"] - -[tool.uv] -## Dependencies for local development -dev-dependencies = [ - "databricks-bundles==x.y.z", - - ## Add code completion support for DLT - # "databricks-dlt", - - ## databricks-connect can be used to run parts of this project locally. - ## See https://docs.databricks.com/dev-tools/databricks-connect.html. - ## - ## Uncomment line below to install a version of db-connect that corresponds to - ## the Databricks Runtime version used for this project. - # "databricks-connect>=15.4,<15.5", -] - -override-dependencies = [ - # pyspark package conflicts with 'databricks-connect' - "pyspark; sys_platform == 'never'", -] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py deleted file mode 100644 index fbcb9dc5f0..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from databricks.bundles.core import ( - Bundle, - Resources, - load_resources_from_current_package_module, -) - - -def load_resources(bundle: Bundle) -> Resources: - """ - 'load_resources' function is referenced in databricks.yml and is responsible for loading - bundle resources defined in Python code. This function is called by Databricks CLI during - bundle deployment. After deployment, this function is not used. - """ - - # the default implementation loads all Python files in 'resources' directory - return load_resources_from_current_package_module() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py deleted file mode 100644 index 2407a95462..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py +++ /dev/null @@ -1,68 +0,0 @@ -from databricks.bundles.jobs import Job - -""" -The main job for my_jobs_as_code. -""" - - -my_jobs_as_code_job = Job.from_dict( - { - "name": "my_jobs_as_code_job", - "trigger": { - # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger - "periodic": { - "interval": 1, - "unit": "DAYS", - }, - }, - # "email_notifications": { - # "on_failure": [ - # "[USERNAME]", - # ], - # }, - "tasks": [ - { - "task_key": "notebook_task", - "job_cluster_key": "job_cluster", - "notebook_task": { - "notebook_path": "src/notebook.ipynb", - }, - }, - { - "task_key": "main_task", - "depends_on": [ - { - "task_key": "notebook_task", - }, - ], - "job_cluster_key": "job_cluster", - "python_wheel_task": { - "package_name": "my_jobs_as_code", - "entry_point": "main", - }, - "libraries": [ - # By default we just include the .whl file generated for the my_jobs_as_code package. - # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html - # for more information on how to add other libraries. - { - "whl": "dist/*.whl", - }, - ], - }, - ], - "job_clusters": [ - { - "job_cluster_key": "job_cluster", - "new_cluster": { - "spark_version": "15.4.x-scala2.12", - "node_type_id": "[NODE_TYPE_ID]", - "data_security_mode": "SINGLE_USER", - "autoscale": { - "min_workers": 1, - "max_workers": 4, - }, - }, - }, - ], - } -) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py deleted file mode 100644 index 9d83e573a9..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py +++ /dev/null @@ -1,20 +0,0 @@ -from databricks.bundles.pipelines import Pipeline - -my_jobs_as_code_pipeline = Pipeline.from_dict( - { - "name": "my_jobs_as_code_pipeline", - "target": "my_jobs_as_code_${bundle.target}", - ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: - "catalog": "catalog_name", - "libraries": [ - { - "notebook": { - "path": "src/dlt_pipeline.ipynb", - }, - }, - ], - "configuration": { - "bundle.sourcePath": "${workspace.file_path}/src", - }, - } -) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md deleted file mode 100644 index e6cfb81b46..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# scratch - -This folder is reserved for personal, exploratory notebooks. -By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py deleted file mode 100644 index ba284ba828..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -setup.py configuration script describing how to build and package this project. - -This file is primarily used by the setuptools library and typically should not -be executed directly. See README.md for how to deploy, test, and run -the my_jobs_as_code project. -""" - -import os - -from setuptools import setup - -local_version = os.getenv("LOCAL_VERSION") -version = "0.0.1" - -setup( - version=f"{version}+{local_version}" if local_version else version, -) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb deleted file mode 100644 index d651c00422..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb +++ /dev/null @@ -1,90 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "source": [ - "# DLT pipeline\n", - "\n", - "This Lakeflow Spark Declarative Pipeline definition is executed using a pipeline defined in resources/my_jobs_as_code.pipeline.yml." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "# Import DLT and src/my_jobs_as_code\n", - "import dlt\n", - "import sys\n", - "\n", - "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", - "from pyspark.sql.functions import expr\n", - "from my_jobs_as_code import main" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "@dlt.view\n", - "def taxi_raw():\n", - " return main.get_taxis(spark)\n", - "\n", - "\n", - "@dlt.table\n", - "def filtered_taxis():\n", - " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "dlt_pipeline", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py deleted file mode 100644 index 5ae344c7e2..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py +++ /dev/null @@ -1,25 +0,0 @@ -from pyspark.sql import SparkSession, DataFrame - - -def get_taxis(spark: SparkSession) -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") - - -# Create a new Databricks Connect session. If this fails, -# check that you have configured Databricks Connect correctly. -# See https://docs.databricks.com/dev-tools/databricks-connect.html. -def get_spark() -> SparkSession: - try: - from databricks.connect import DatabricksSession - - return DatabricksSession.builder.getOrCreate() - except ImportError: - return SparkSession.builder.getOrCreate() - - -def main(): - get_taxis(get_spark()).show(5) - - -if __name__ == "__main__": - main() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb deleted file mode 100644 index 227c7cc558..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb +++ /dev/null @@ -1,75 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "source": [ - "# Default notebook\n", - "\n", - "This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "from my_jobs_as_code import main\n", - "\n", - "main.get_taxis(spark).show(10)" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "notebook", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py deleted file mode 100644 index 13e100ee2e..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py +++ /dev/null @@ -1,8 +0,0 @@ -from my_jobs_as_code.main import get_taxis, get_spark - -# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml - - -def test_main(): - taxis = get_taxis(get_spark()) - assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/script b/acceptance/bundle/templates/experimental-jobs-as-code/script deleted file mode 100644 index 31fa7b0742..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/script +++ /dev/null @@ -1,19 +0,0 @@ -trace $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output - -cd output/my_jobs_as_code - -# with -f we add pre-built wheel, in addition to vendored packages; -# if PyPi package is not yet published, it will be used instead. -# Note: -f overrides UV_FIND_LINKS, so we must pass vendored dir explicitly. -uv -q sync --no-index -f $VENDORED_PY_PACKAGES -f $(dirname $DATABRICKS_BUNDLES_WHEEL) - -trace $CLI bundle validate -t dev --output json | jq ".resources" - -uv build -q --no-index -trace unzip -Z1 dist/my_jobs_as_code-0.0.1-py3-none-any.whl - -rm -fr .venv resources/__pycache__ uv.lock src/my_jobs_as_code.egg-info dist - -# Do not affect this repository's git behaviour #2318 -mv .gitignore out.gitignore -rm .databricks/.gitignore diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/test.toml b/acceptance/bundle/templates/experimental-jobs-as-code/test.toml deleted file mode 100644 index 3b56f132b8..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/test.toml +++ /dev/null @@ -1,9 +0,0 @@ -Ignore = [ - '.venv', -] -Timeout = '40s' -TimeoutWindows = '120s' - -[[Repls]] -Old = '"databricks-bundles==0.\d+.\d+"' -New = '"databricks-bundles==x.y.z"' diff --git a/libs/template/reader_test.go b/libs/template/reader_test.go index 77117786ab..e6a854d800 100644 --- a/libs/template/reader_test.go +++ b/libs/template/reader_test.go @@ -17,7 +17,7 @@ func TestBuiltInReader(t *testing.T) { "default-python", "default-sql", "dbt-sql", - "experimental-jobs-as-code", + "pydabs", } for _, name := range exists { diff --git a/libs/template/template.go b/libs/template/template.go index 66a6610713..dc30de4bd0 100644 --- a/libs/template/template.go +++ b/libs/template/template.go @@ -31,13 +31,12 @@ const ( DefaultSql TemplateName = "default-sql" LakeflowPipelines TemplateName = "lakeflow-pipelines" // CLIPipelines is deprecated. Use LakeflowPipelines instead - CLIPipelines TemplateName = "cli-pipelines" - DbtSql TemplateName = "dbt-sql" - MlopsStacks TemplateName = "mlops-stacks" - Pydabs TemplateName = "pydabs" - Custom TemplateName = "custom" - ExperimentalJobsAsCode TemplateName = "experimental-jobs-as-code" - Default TemplateName = "default" + CLIPipelines TemplateName = "cli-pipelines" + DbtSql TemplateName = "dbt-sql" + MlopsStacks TemplateName = "mlops-stacks" + Pydabs TemplateName = "pydabs" + Custom TemplateName = "custom" + Default TemplateName = "default" ) var databricksTemplates = []Template{ @@ -99,13 +98,6 @@ var databricksTemplates = []Template{ Reader: &builtinReader{name: string(Pydabs)}, Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: Pydabs}}, }, - { - name: ExperimentalJobsAsCode, - hidden: true, - description: "Jobs as code template (experimental)", - Reader: &builtinReader{name: string(ExperimentalJobsAsCode)}, - Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: ExperimentalJobsAsCode}}, - }, } func HelpDescriptions() string { diff --git a/libs/template/template_test.go b/libs/template/template_test.go index 2ceeb9d731..4692f0acb2 100644 --- a/libs/template/template_test.go +++ b/libs/template/template_test.go @@ -73,6 +73,7 @@ func TestTemplateGetDatabricksTemplate(t *testing.T) { notExist := []string{ "/some/path", "doesnotexist", + "experimental-jobs-as-code", "https://www.someurl.com", } @@ -81,6 +82,6 @@ func TestTemplateGetDatabricksTemplate(t *testing.T) { assert.Nil(t, tmpl) } - // Assert the alias works. + // Assert aliases work. assert.Equal(t, MlopsStacks, GetDatabricksTemplate(TemplateName("mlops-stack")).name) } diff --git a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json deleted file mode 100644 index 574ce59259..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "welcome_message": "\nWelcome to (EXPERIMENTAL) \"Jobs as code\" template for Declarative Automation Bundles!", - "properties": { - "project_name": { - "type": "string", - "default": "jobs_as_code_project", - "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project", - "order": 1, - "pattern": "^[A-Za-z0-9_]+$", - "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." - }, - "include_notebook": { - "type": "string", - "default": "yes", - "enum": ["yes", "no"], - "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", - "order": 2 - }, - "include_dlt": { - "type": "string", - "default": "yes", - "enum": ["yes", "no"], - "description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'", - "order": 3 - }, - "include_python": { - "type": "string", - "default": "yes", - "enum": ["yes", "no"], - "description": "Include a stub (sample) Python package in '{{.project_name}}/src'", - "order": 4 - } - }, - "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." -} diff --git a/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl deleted file mode 100644 index cab0335541..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl +++ /dev/null @@ -1,9 +0,0 @@ -{{define "latest_lts_dbr_version" -}} - 15.4.x-scala2.12 -{{- end}} - -{{define "latest_lts_db_connect_version_spec" -}} - >=15.4,<15.5 -{{- end}} - -{{define "latest_databricks_bundles_version" -}}0.297.0{{- end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl deleted file mode 100644 index bd284b0252..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl +++ /dev/null @@ -1,29 +0,0 @@ -# Preamble - -This file only contains template directives; it is skipped for the actual output. - -{{skip "__preamble"}} - -{{$notDLT := not (eq .include_dlt "yes")}} -{{$notNotebook := not (eq .include_notebook "yes")}} -{{$notPython := not (eq .include_python "yes")}} - -{{if $notPython}} - {{skip "{{.project_name}}/src/{{.project_name}}"}} - {{skip "{{.project_name}}/tests/main_test.py"}} -{{end}} - -{{if $notDLT}} - {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}} - {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.py"}} -{{end}} - -{{if $notNotebook}} - {{skip "{{.project_name}}/src/notebook.ipynb"}} -{{end}} - -{{if (and $notDLT $notNotebook $notPython)}} - {{skip "{{.project_name}}/resources/{{.project_name}}_job.py"}} -{{else}} - {{skip "{{.project_name}}/resources/.gitkeep"}} -{{end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore deleted file mode 100644 index 0dab7f4995..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -.databricks/ -build/ -dist/ -__pycache__/ -*.egg-info -.venv/ -scratch/** -!scratch/README.md diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl deleted file mode 100644 index 37e7040846..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl +++ /dev/null @@ -1,60 +0,0 @@ -# {{.project_name}} - -The '{{.project_name}}' project was generated by using the "Jobs as code" template. - -## Prerequisites - -1. Install Databricks CLI 0.238 or later. - See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). - -2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). - We use uv to create a virtual environment and install the required dependencies. - -3. Authenticate to your Databricks workspace if you have not done so already: - ``` - $ databricks configure - ``` - -4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from - https://docs.databricks.com/dev-tools/vscode-ext.html. - {{- if (eq .include_python "yes") }} Or read the "getting started" documentation for - **Databricks Connect** for instructions on running the included Python code from a different IDE. - {{- end}} - -5. For documentation on the Declarative Automation Bundles format used - for this project, and for CI/CD configuration, see - https://docs.databricks.com/dev-tools/bundles/index.html. - -## Deploy and run jobs - -1. Create a new virtual environment and install the required dependencies: - ``` - $ uv sync - ``` - -2. To deploy the bundle to the development target: - ``` - $ databricks bundle deploy --target dev - ``` - - *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* - - This deploys everything that's defined for this project. - For example, the default template would deploy a job called - `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workspace and clicking on **Workflows**. - -3. Similarly, to deploy a production copy, type: - ``` - $ databricks bundle deploy --target prod - ``` - - Note that the default job from the template has a schedule that runs every day - (defined in resources/{{.project_name}}_job.py). The schedule - is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( - https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). - -4. To run a job: - ``` - $ databricks bundle run - ``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl deleted file mode 100644 index 3069fdaade..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl +++ /dev/null @@ -1,50 +0,0 @@ -# This is a Databricks asset bundle definition for {{.project_name}}. -# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. -bundle: - name: {{.project_name}} - uuid: {{bundle_uuid}} - databricks_cli_version: ">= 0.248.0" - -python: - # Activate virtual environment before loading resources defined in Python. - # If disabled, defaults to using the Python interpreter available in the current shell. - venv_path: .venv - # Functions called to load resources defined in Python. See resources/__init__.py - resources: - - "resources:load_resources" - -{{ if .include_python -}} -artifacts: - default: - type: whl - path: . - # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) - # to ensure that changes to wheel package are picked up when used on all-purpose clusters - build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build - -{{ end -}} -include: - - resources/*.yml - -targets: - dev: - # The default target uses 'mode: development' to create a development copy. - # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default. - # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. - mode: development - default: true - workspace: - host: {{workspace_host}} - - prod: - mode: production - workspace: - host: {{workspace_host}} - # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. - root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} - permissions: - - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} - level: CAN_MANAGE - run_as: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl deleted file mode 100644 index ee95703028..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl +++ /dev/null @@ -1,27 +0,0 @@ -# Fixtures -{{- /* -We don't want to have too many README.md files, since they -stand out so much. But we do need to have a file here to make -sure the folder is added to Git. -*/}} - -This folder is reserved for fixtures, such as CSV files. - -Below is an example of how to load fixtures as a data frame: - -``` -import pandas as pd -import os - -def get_absolute_path(*relative_parts): - if 'dbutils' in globals(): - base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore - path = os.path.normpath(os.path.join(base_dir, *relative_parts)) - return path if path.startswith("/Workspace") else "/Workspace" + path - else: - return os.path.join(*relative_parts) - -csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") -df = pd.read_csv(csv_file) -display(df) -``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl deleted file mode 100644 index 4cb0e6d9ee..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl +++ /dev/null @@ -1,58 +0,0 @@ -[build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" - -[project] -name = "{{.project_name}}" -requires-python = ">=3.10" -description = "wheel file based on {{.project_name}}" - -# Dependencies in case the output wheel file is used as a library dependency. -# For defining dependencies, when this package is used in Databricks, see: -# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html -# -# Example: -# dependencies = [ -# "requests==x.y.z", -# ] -dependencies = [ -] - -# see setup.py -dynamic = ["version"] - -{{ if eq .include_python "yes" -}} -[project.entry-points.packages] -main = "{{.project_name}}.main:main" - -{{ end -}} - -{{ if eq .include_python "yes" -}} -[tool.setuptools.packages.find] -where = ["src"] - -{{ else -}} -[tool.setuptools] -py-modules = [] - -{{ end -}} -[tool.uv] -## Dependencies for local development -dev-dependencies = [ - "databricks-bundles=={{template "latest_databricks_bundles_version"}}", - - ## Add code completion support for DLT - # "databricks-dlt", - - ## databricks-connect can be used to run parts of this project locally. - ## See https://docs.databricks.com/dev-tools/databricks-connect.html. - ## - ## Uncomment line below to install a version of db-connect that corresponds to - ## the Databricks Runtime version used for this project. - # "databricks-connect{{template "latest_lts_db_connect_version_spec"}}", -] - -override-dependencies = [ - # pyspark package conflicts with 'databricks-connect' - "pyspark; sys_platform == 'never'", -] diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py deleted file mode 100644 index fbcb9dc5f0..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from databricks.bundles.core import ( - Bundle, - Resources, - load_resources_from_current_package_module, -) - - -def load_resources(bundle: Bundle) -> Resources: - """ - 'load_resources' function is referenced in databricks.yml and is responsible for loading - bundle resources defined in Python code. This function is called by Databricks CLI during - bundle deployment. After deployment, this function is not used. - """ - - # the default implementation loads all Python files in 'resources' directory - return load_resources_from_current_package_module() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl deleted file mode 100644 index ff554c45c5..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl +++ /dev/null @@ -1,106 +0,0 @@ -{{$include_dlt := "no" -}} -from databricks.bundles.jobs import Job - -""" -The main job for {{.project_name}}. - -{{- /* Clarify what this job is for for DLT-only users. */}} -{{if and (eq $include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} -This job runs {{.project_name}}_pipeline on a schedule. -{{end -}} -""" - - -{{.project_name}}_job = Job.from_dict( - { - "name": "{{.project_name}}_job", - "trigger": { - # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger - "periodic": { - "interval": 1, - "unit": "DAYS", - }, - }, - # "email_notifications": { - # "on_failure": [ - # "{{user_name}}", - # ], - # }, - "tasks": [ - {{- if eq .include_notebook "yes" -}} - {{- "\n " -}} - { - "task_key": "notebook_task", - "job_cluster_key": "job_cluster", - "notebook_task": { - "notebook_path": "src/notebook.ipynb", - }, - }, - {{- end -}} - {{- if (eq $include_dlt "yes") -}} - {{- "\n " -}} - { - "task_key": "refresh_pipeline", - {{- if (eq .include_notebook "yes" )}} - "depends_on": [ - { - "task_key": "notebook_task", - }, - ], - {{- end}} - "pipeline_task": { - {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}.pipeline.yml */}} - "pipeline_id": "${resources.pipelines.{{.project_name}}_pipeline.id}", - }, - }, - {{- end -}} - {{- if (eq .include_python "yes") -}} - {{- "\n " -}} - { - "task_key": "main_task", - {{- if (eq $include_dlt "yes") }} - "depends_on": [ - { - "task_key": "refresh_pipeline", - }, - ], - {{- else if (eq .include_notebook "yes" )}} - "depends_on": [ - { - "task_key": "notebook_task", - }, - ], - {{- end}} - "job_cluster_key": "job_cluster", - "python_wheel_task": { - "package_name": "{{.project_name}}", - "entry_point": "main", - }, - "libraries": [ - # By default we just include the .whl file generated for the {{.project_name}} package. - # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html - # for more information on how to add other libraries. - { - "whl": "dist/*.whl", - }, - ], - }, - {{- end -}} - {{""}} - ], - "job_clusters": [ - { - "job_cluster_key": "job_cluster", - "new_cluster": { - "spark_version": "{{template "latest_lts_dbr_version"}}", - "node_type_id": "{{smallest_node_type}}", - "data_security_mode": "SINGLE_USER", - "autoscale": { - "min_workers": 1, - "max_workers": 4, - }, - }, - }, - ], - } -) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl deleted file mode 100644 index c8579ae659..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl +++ /dev/null @@ -1,24 +0,0 @@ -from databricks.bundles.pipelines import Pipeline - -{{.project_name}}_pipeline = Pipeline.from_dict( - { - "name": "{{.project_name}}_pipeline", - "target": "{{.project_name}}_${bundle.target}", - {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} - ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: - "catalog": "catalog_name", - {{- else}} - "catalog": "{{default_catalog}}", - {{- end}} - "libraries": [ - { - "notebook": { - "path": "src/dlt_pipeline.ipynb", - }, - }, - ], - "configuration": { - "bundle.sourcePath": "${workspace.file_path}/src", - }, - } -) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md deleted file mode 100644 index e6cfb81b46..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# scratch - -This folder is reserved for personal, exploratory notebooks. -By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl deleted file mode 100644 index 19c9d0ebee..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl +++ /dev/null @@ -1,18 +0,0 @@ -""" -setup.py configuration script describing how to build and package this project. - -This file is primarily used by the setuptools library and typically should not -be executed directly. See README.md for how to deploy, test, and run -the {{.project_name}} project. -""" - -import os - -from setuptools import setup - -local_version = os.getenv("LOCAL_VERSION") -version = "0.0.1" - -setup( - version=f"{version}+{local_version}" if local_version else version, -) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl deleted file mode 100644 index 62c4fb1f12..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl +++ /dev/null @@ -1,104 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec", - "showTitle": false, - "title": "" - } - }, - "source": [ - "# DLT pipeline\n", - "\n", - "This Lakeflow Spark Declarative Pipeline definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - {{- if (eq .include_python "yes") }} - "# Import DLT and src/{{.project_name}}\n", - "import dlt\n", - "import sys\n", - "\n", - "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", - "from pyspark.sql.functions import expr\n", - "from {{.project_name}} import main" - {{else}} - "import dlt\n", - "from pyspark.sql.functions import expr\n", - "from pyspark.sql import SparkSession\n", - "\n", - "spark = SparkSession.builder.getOrCreate()" - {{end -}} - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - {{- if (eq .include_python "yes") }} - "@dlt.view\n", - "def taxi_raw():\n", - " return main.get_taxis(spark)\n", - {{else}} - "@dlt.view\n", - "def taxi_raw():\n", - " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", - {{end -}} - "\n", - "\n", - "@dlt.table\n", - "def filtered_taxis():\n", - " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "dlt_pipeline", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl deleted file mode 100644 index 6782a053ba..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl +++ /dev/null @@ -1,79 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "ee353e42-ff58-4955-9608-12865bd0950e", - "showTitle": false, - "title": "" - } - }, - "source": [ - "# Default notebook\n", - "\n", - "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - {{- if (eq .include_python "yes") }} - "from {{.project_name}} import main\n", - "\n", - "main.get_taxis(spark).show(10)" - {{else}} - "spark.range(10)" - {{end -}} - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "notebook", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl deleted file mode 100644 index 5ae344c7e2..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl +++ /dev/null @@ -1,25 +0,0 @@ -from pyspark.sql import SparkSession, DataFrame - - -def get_taxis(spark: SparkSession) -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") - - -# Create a new Databricks Connect session. If this fails, -# check that you have configured Databricks Connect correctly. -# See https://docs.databricks.com/dev-tools/databricks-connect.html. -def get_spark() -> SparkSession: - try: - from databricks.connect import DatabricksSession - - return DatabricksSession.builder.getOrCreate() - except ImportError: - return SparkSession.builder.getOrCreate() - - -def main(): - get_taxis(get_spark()).show(5) - - -if __name__ == "__main__": - main() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl deleted file mode 100644 index 6f89fca538..0000000000 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl +++ /dev/null @@ -1,8 +0,0 @@ -from {{.project_name}}.main import get_taxis, get_spark - -# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml - - -def test_main(): - taxis = get_taxis(get_spark()) - assert taxis.count() > 5 diff --git a/python/README.md b/python/README.md index 04459253b3..c68891558c 100644 --- a/python/README.md +++ b/python/README.md @@ -19,7 +19,7 @@ To use `databricks-bundles`, you must first: ```bash databricks configure ``` -3. To create a new project, initialize a bundle using the `experimental-jobs-as-code` template: +3. To create a new project, initialize a bundle using the `pydabs` template: ```bash databricks bundle init pydabs