diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml index c188c5b3a..0e8cb6808 100644 --- a/.github/workflows/code-coverage.yml +++ b/.github/workflows/code-coverage.yml @@ -1,10 +1,15 @@ -name: Code Coverage +name: E2E Tests and Code Coverage permissions: contents: read id-token: write -on: [pull_request, workflow_dispatch] +on: + push: + branches: + - main + pull_request: + workflow_dispatch: jobs: test-with-coverage: @@ -32,25 +37,16 @@ jobs: with: python-version: "3.10" install-args: "--all-extras" - - name: Run parallel tests with coverage + - name: Run all tests with coverage continue-on-error: false run: | poetry run pytest tests/unit tests/e2e \ - -m "not serial" \ -n auto \ + --dist=loadgroup \ --cov=src \ --cov-report=xml \ --cov-report=term \ -v - - name: Run telemetry tests with coverage (isolated) - continue-on-error: false - run: | - poetry run pytest tests/e2e/test_concurrent_telemetry.py \ - --cov=src \ - --cov-append \ - --cov-report=xml \ - --cov-report=term \ - -v - name: Check for coverage override id: override env: diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml index ecc238263..4071a6e51 100644 --- a/.github/workflows/code-quality-checks.yml +++ b/.github/workflows/code-quality-checks.yml @@ -78,7 +78,6 @@ jobs: with: python-version: ${{ matrix.python-version }} install-args: "--all-extras" - cache-path: ".venv-pyarrow" cache-suffix: "pyarrow-${{ matrix.dependency-version }}-" - name: Install Python tools for custom versions if: matrix.dependency-version != 'default' diff --git a/.github/workflows/daily-telemetry-e2e.yml b/.github/workflows/daily-telemetry-e2e.yml deleted file mode 100644 index b6f78726c..000000000 --- a/.github/workflows/daily-telemetry-e2e.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: Daily Telemetry E2E Tests - -on: - schedule: - - cron: '0 0 * * 0' # Run every Sunday at midnight UTC - - workflow_dispatch: # Allow manual triggering - inputs: - test_pattern: - description: 'Test pattern to run (default: tests/e2e/test_telemetry_e2e.py)' - required: false - default: 'tests/e2e/test_telemetry_e2e.py' - type: string - -permissions: - contents: read - id-token: write - -jobs: - telemetry-e2e-tests: - runs-on: - group: databricks-protected-runner-group - labels: linux-ubuntu-latest - environment: azure-prod - - env: - DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_HOST }} - DATABRICKS_HTTP_PATH: ${{ secrets.TEST_PECO_WAREHOUSE_HTTP_PATH }} - DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} - DATABRICKS_CATALOG: peco - DATABRICKS_USER: ${{ secrets.TEST_PECO_SP_ID }} - - steps: - - name: Check out repository - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - name: Install Kerberos system dependencies - run: | - sudo apt-get update - sudo apt-get install -y libkrb5-dev - - name: Setup Poetry - uses: ./.github/actions/setup-poetry - with: - python-version: "3.10" - install-args: "--all-extras" - - name: Run telemetry E2E tests - run: | - TEST_PATTERN="${{ github.event.inputs.test_pattern || 'tests/e2e/test_telemetry_e2e.py' }}" - echo "Running tests: $TEST_PATTERN" - poetry run python -m pytest $TEST_PATTERN -v -s - - name: Upload test results on failure - if: failure() - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 - with: - name: telemetry-test-results - path: | - .pytest_cache/ - tests-unsafe.log - retention-days: 7 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml deleted file mode 100644 index 6c0cc7059..000000000 --- a/.github/workflows/integration.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: Integration Tests - -on: - push: - branches: - - main - pull_request: - -permissions: - contents: read - id-token: write - -jobs: - run-non-telemetry-tests: - runs-on: - group: databricks-protected-runner-group - labels: linux-ubuntu-latest - environment: azure-prod - env: - DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_HOST }} - DATABRICKS_HTTP_PATH: ${{ secrets.TEST_PECO_WAREHOUSE_HTTP_PATH }} - DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} - DATABRICKS_CATALOG: peco - DATABRICKS_USER: ${{ secrets.TEST_PECO_SP_ID }} - steps: - - name: Check out repository - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - name: Install Kerberos system dependencies - run: | - sudo apt-get update - sudo apt-get install -y libkrb5-dev - - name: Setup Poetry - uses: ./.github/actions/setup-poetry - with: - python-version: "3.10" - install-args: "--all-extras" - - name: Run non-telemetry e2e tests - run: | - poetry run python -m pytest tests/e2e \ - --ignore=tests/e2e/test_telemetry_e2e.py \ - --ignore=tests/e2e/test_concurrent_telemetry.py \ - -n auto - - run-telemetry-tests: - runs-on: - group: databricks-protected-runner-group - labels: linux-ubuntu-latest - needs: run-non-telemetry-tests - environment: azure-prod - env: - DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_HOST }} - DATABRICKS_HTTP_PATH: ${{ secrets.TEST_PECO_WAREHOUSE_HTTP_PATH }} - DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} - DATABRICKS_CATALOG: peco - DATABRICKS_USER: ${{ secrets.TEST_PECO_SP_ID }} - steps: - - name: Check out repository - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install -y libkrb5-dev - - name: Setup Poetry - uses: ./.github/actions/setup-poetry - with: - python-version: "3.10" - install-args: "--all-extras" - - name: Run telemetry tests in isolation - run: | - poetry run python -m pytest tests/e2e/test_concurrent_telemetry.py \ - -n auto --dist=loadgroup -v diff --git a/tests/e2e/common/large_queries_mixin.py b/tests/e2e/common/large_queries_mixin.py index dd7c56996..e0505c2dd 100644 --- a/tests/e2e/common/large_queries_mixin.py +++ b/tests/e2e/common/large_queries_mixin.py @@ -7,10 +7,8 @@ log = logging.getLogger(__name__) -class LargeQueriesMixin: - """ - This mixin expects to be mixed with a CursorTest-like class - """ +class LargeQueriesFetchMixin: + """Shared fetch helper for large query test classes.""" def fetch_rows(self, cursor, row_count, fetchmany_size): """ @@ -44,6 +42,10 @@ def fetch_rows(self, cursor, row_count, fetchmany_size): + "assuming 10K fetch size." ) + +class LargeWideResultSetMixin(LargeQueriesFetchMixin): + """Test mixin for large wide result set queries.""" + @pytest.mark.parametrize( "extra_params", [ @@ -52,7 +54,7 @@ def fetch_rows(self, cursor, row_count, fetchmany_size): ], ) def test_query_with_large_wide_result_set(self, extra_params): - resultSize = 300 * 1000 * 1000 # 300 MB + resultSize = 100 * 1000 * 1000 # 100 MB width = 8192 # B rows = resultSize // width cols = width // 36 @@ -77,6 +79,10 @@ def test_query_with_large_wide_result_set(self, extra_params): assert row[0] == row_id # Verify no rows are dropped in the middle. assert len(row[1]) == 36 + +class LargeNarrowResultSetMixin(LargeQueriesFetchMixin): + """Test mixin for large narrow result set queries.""" + @pytest.mark.parametrize( "extra_params", [ @@ -85,7 +91,7 @@ def test_query_with_large_wide_result_set(self, extra_params): ], ) def test_query_with_large_narrow_result_set(self, extra_params): - resultSize = 300 * 1000 * 1000 # 300 MB + resultSize = 100 * 1000 * 1000 # 100 MB width = 8 # sizeof(long) rows = resultSize / width @@ -98,6 +104,10 @@ def test_query_with_large_narrow_result_set(self, extra_params): for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)): assert row[0] == row_id + +class LongRunningQueryMixin: + """Test mixin for long running queries.""" + @pytest.mark.parametrize( "extra_params", [ @@ -114,7 +124,7 @@ def test_long_running_query(self, extra_params): duration = -1 scale0 = 10000 - scale_factor = 1 + scale_factor = 50 with self.cursor(extra_params) as cursor: while duration < min_duration: assert scale_factor < 4096, "Detected infinite loop" @@ -138,3 +148,8 @@ def test_long_running_query(self, extra_params): print("Took {} s with scale factor={}".format(duration, scale_factor)) # Extrapolate linearly to reach 3 min and add 50% padding to push over the limit scale_factor = math.ceil(1.5 * scale_factor / current_fraction) + + +# Keep backward-compatible alias that combines all three +class LargeQueriesMixin(LargeWideResultSetMixin, LargeNarrowResultSetMixin, LongRunningQueryMixin): + pass diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py index e04e348c9..166529c30 100644 --- a/tests/e2e/test_driver.py +++ b/tests/e2e/test_driver.py @@ -39,7 +39,11 @@ ) from databricks.sql.thrift_api.TCLIService import ttypes from tests.e2e.common.core_tests import CoreTestMixin, SmokeTestMixin -from tests.e2e.common.large_queries_mixin import LargeQueriesMixin +from tests.e2e.common.large_queries_mixin import ( + LargeWideResultSetMixin, + LargeNarrowResultSetMixin, + LongRunningQueryMixin, +) from tests.e2e.common.timestamp_tests import TimestampTestsMixin from tests.e2e.common.decimal_tests import DecimalTestsMixin from tests.e2e.common.retry_test_mixins import ( @@ -138,7 +142,9 @@ def assertEqualRowValues(self, actual, expected): assert act[i] == exp[i] -class TestPySQLLargeQueriesSuite(PySQLPytestTestCase, LargeQueriesMixin): +class _LargeQueryRowHelper: + """Shared helper for fetching rows one at a time in large query tests.""" + def get_some_rows(self, cursor, fetchmany_size): row = cursor.fetchone() if row: @@ -146,16 +152,26 @@ def get_some_rows(self, cursor, fetchmany_size): else: return None + +class TestPySQLLargeWideResultSet(PySQLPytestTestCase, _LargeQueryRowHelper, LargeWideResultSetMixin): + pass + + +class TestPySQLLargeNarrowResultSet(PySQLPytestTestCase, _LargeQueryRowHelper, LargeNarrowResultSetMixin): + pass + + +class TestPySQLLongRunningQuery(PySQLPytestTestCase, LongRunningQueryMixin): + pass + + +class TestPySQLCloudFetch(PySQLPytestTestCase): @skipUnless(pysql_supports_arrow(), "needs arrow support") @pytest.mark.skip("This test requires a previously uploaded data set") def test_cloud_fetch(self): - # This test can take several minutes to run limits = [100000, 300000] threads = [10, 25] self.arraysize = 100000 - # This test requires a large table with many rows to properly initiate cloud fetch. - # e2-dogfood host > hive_metastore catalog > main schema has such a table called store_sales. - # If this table is deleted or this test is run on a different host, a different table may need to be used. base_query = "SELECT * FROM store_sales WHERE ss_sold_date_sk = 2452234 " for num_limit, num_threads, lz4_compression in itertools.product( limits, threads, [True, False] diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 5b6991931..4a8cb0b68 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -87,6 +87,7 @@ class ClientTestSuite(unittest.TestCase): "server_hostname": "foo", "http_path": "dummy_path", "access_token": "tok", + "enable_telemetry": False, } @patch("%s.session.ThriftDatabricksClient" % PACKAGE_NAME) @@ -644,6 +645,7 @@ class TransactionTestSuite(unittest.TestCase): "server_hostname": "foo", "http_path": "dummy_path", "access_token": "tok", + "enable_telemetry": False, } def _setup_mock_session_with_http_client(self, mock_session): diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 3a43c1a75..aa7e7f02b 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -22,6 +22,7 @@ class TestSession: "server_hostname": "foo", "http_path": "dummy_path", "access_token": "tok", + "enable_telemetry": False, } @patch("%s.session.ThriftDatabricksClient" % PACKAGE_NAME) @@ -50,6 +51,7 @@ def test_auth_args(self, mock_client_class): "server_hostname": "foo", "http_path": None, "access_token": "tok", + "enable_telemetry": False, }, { "server_hostname": "foo", @@ -57,6 +59,7 @@ def test_auth_args(self, mock_client_class): "_tls_client_cert_file": "something", "_use_cert_as_auth": True, "access_token": None, + "enable_telemetry": False, }, ]