From cd49dabad7358689d238fcaba92f47b614df97cd Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Mon, 1 Dec 2025 12:32:50 +0000 Subject: [PATCH 01/10] Prepare for a release with telemetry on by default Signed-off-by: samikshya-chand_data --- CHANGELOG.md | 8 ++++++++ README.md | 3 ++- src/databricks/sql/auth/common.py | 2 +- src/databricks/sql/client.py | 2 +- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b902e976..0dadd3a8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Release History +# 4.2.1 (2025-11-20) + +- Change default use_hybrid_disposition to False by @samikshya-db in https://github.com/databricks/databricks-sql-python/pull/714 +- Circuit breaker changes using pybreaker by @nikhilsuri-db in https://github.com/databricks/databricks-sql-python/pull/705 +- perf: Optimize telemetry latency logging to reduce overhead by @samikshya-db in https://github.com/databricks/databricks-sql-python/pull/715 +- basic e2e test for force telemetry verification by @nikhilsuri-db in https://github.com/databricks/databricks-sql-python/pull/708 +- Telemetry is ON by default to track connection stats. (Note : This strictly excludes PII, query text, and results) by @samikshya-db in https://github.com/databricks/databricks-sql-python/pull/717 + # 4.2.1 (2025-11-20) - Ignore transactions by default (databricks/databricks-sql-python#711 by @jayantsing-db) diff --git a/README.md b/README.md index ec82a3637..71a0fc1bf 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ + # Databricks SQL Connector for Python [![PyPI](https://img.shields.io/pypi/v/databricks-sql-connector?style=flat-square)](https://pypi.org/project/databricks-sql-connector/) @@ -13,7 +14,7 @@ You are welcome to file an issue here for general use cases. You can also contac ## Requirements -Python 3.8 or above is required. +Python 3.9 or above is required. ## Documentation diff --git a/src/databricks/sql/auth/common.py b/src/databricks/sql/auth/common.py index a764b036d..0e3a01918 100644 --- a/src/databricks/sql/auth/common.py +++ b/src/databricks/sql/auth/common.py @@ -51,7 +51,7 @@ def __init__( pool_connections: Optional[int] = None, pool_maxsize: Optional[int] = None, user_agent: Optional[str] = None, - telemetry_circuit_breaker_enabled: Optional[bool] = None, + telemetry_circuit_breaker_enabled: Optional[bool] = True, ): self.hostname = hostname self.access_token = access_token diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index c873700bc..1f17d54f2 100755 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -328,7 +328,7 @@ def read(self) -> Optional[OAuthToken]: self.ignore_transactions = ignore_transactions self.force_enable_telemetry = kwargs.get("force_enable_telemetry", False) - self.enable_telemetry = kwargs.get("enable_telemetry", False) + self.enable_telemetry = kwargs.get("enable_telemetry", True) self.telemetry_enabled = TelemetryHelper.is_telemetry_enabled(self) TelemetryClientFactory.initialize_telemetry_client( From a04d6da46f90b96cc909431f6f93e60d2fc1d76a Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Mon, 1 Dec 2025 12:51:08 +0000 Subject: [PATCH 02/10] Make edits Signed-off-by: samikshya-chand_data --- CHANGELOG.md | 3 +-- README.md | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0dadd3a8c..e5b6e877d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,6 @@ # Release History -# 4.2.1 (2025-11-20) - +# 4.2.2 (2025-12-01) - Change default use_hybrid_disposition to False by @samikshya-db in https://github.com/databricks/databricks-sql-python/pull/714 - Circuit breaker changes using pybreaker by @nikhilsuri-db in https://github.com/databricks/databricks-sql-python/pull/705 - perf: Optimize telemetry latency logging to reduce overhead by @samikshya-db in https://github.com/databricks/databricks-sql-python/pull/715 diff --git a/README.md b/README.md index 71a0fc1bf..047515ba4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ - # Databricks SQL Connector for Python [![PyPI](https://img.shields.io/pypi/v/databricks-sql-connector?style=flat-square)](https://pypi.org/project/databricks-sql-connector/) From 0eda9f0167b009aacd50a1fa9792d049232bf1de Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Mon, 1 Dec 2025 12:54:46 +0000 Subject: [PATCH 03/10] Update version Signed-off-by: samikshya-chand_data --- pyproject.toml | 2 +- src/databricks/sql/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 61c248e98..c53f4223c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databricks-sql-connector" -version = "4.2.1" +version = "4.2.2" description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index cd37e6ce1..7cf631e83 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -71,7 +71,7 @@ def __repr__(self): DATE = DBAPITypeObject("date") ROWID = DBAPITypeObject() -__version__ = "4.2.1" +__version__ = "4.2.2" USER_AGENT_NAME = "PyDatabricksSqlConnector" # These two functions are pyhive legacy From ad01fc28eb94935ef99105aa4d85794b235fd41f Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Wed, 3 Dec 2025 15:24:38 +0000 Subject: [PATCH 04/10] Fix CHANGELOG formatting to match previous style Signed-off-by: samikshya-chand_data --- CHANGELOG.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5b6e877d..6be2dacaa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,11 @@ # Release History # 4.2.2 (2025-12-01) -- Change default use_hybrid_disposition to False by @samikshya-db in https://github.com/databricks/databricks-sql-python/pull/714 -- Circuit breaker changes using pybreaker by @nikhilsuri-db in https://github.com/databricks/databricks-sql-python/pull/705 -- perf: Optimize telemetry latency logging to reduce overhead by @samikshya-db in https://github.com/databricks/databricks-sql-python/pull/715 -- basic e2e test for force telemetry verification by @nikhilsuri-db in https://github.com/databricks/databricks-sql-python/pull/708 -- Telemetry is ON by default to track connection stats. (Note : This strictly excludes PII, query text, and results) by @samikshya-db in https://github.com/databricks/databricks-sql-python/pull/717 +- Change default use_hybrid_disposition to False (databricks/databricks-sql-python#714 by @samikshya-db) +- Circuit breaker changes using pybreaker (databricks/databricks-sql-python#705 by @nikhilsuri-db) +- perf: Optimize telemetry latency logging to reduce overhead (databricks/databricks-sql-python#715 by @samikshya-db) +- basic e2e test for force telemetry verification (databricks/databricks-sql-python#708 by @nikhilsuri-db) +- Telemetry is ON by default to track connection stats. (Note : This strictly excludes PII, query text, and results) (databricks/databricks-sql-python#717 by @samikshya-db) # 4.2.1 (2025-11-20) - Ignore transactions by default (databricks/databricks-sql-python#711 by @jayantsing-db) From 38230c6d106e167391a7eb84278f75286b19d6f3 Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Wed, 3 Dec 2025 19:11:29 +0000 Subject: [PATCH 05/10] Fix telemetry e2e tests for default-enabled behavior - Update test expectations to reflect telemetry being enabled by default - Add feature flags cache cleanup in teardown to prevent state leakage between tests - This ensures each test runs with fresh feature flag state --- tests/e2e/test_telemetry_e2e.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/e2e/test_telemetry_e2e.py b/tests/e2e/test_telemetry_e2e.py index 917c8e5eb..efc748036 100644 --- a/tests/e2e/test_telemetry_e2e.py +++ b/tests/e2e/test_telemetry_e2e.py @@ -58,6 +58,14 @@ def telemetry_setup_teardown(self): TelemetryClientFactory._stop_flush_thread() TelemetryClientFactory._initialized = False + # Clear feature flags cache to prevent state leakage between tests + from databricks.sql.common.feature_flag import FeatureFlagsContextFactory + with FeatureFlagsContextFactory._lock: + FeatureFlagsContextFactory._context_map.clear() + if FeatureFlagsContextFactory._executor: + FeatureFlagsContextFactory._executor.shutdown(wait=False) + FeatureFlagsContextFactory._executor = None + @pytest.fixture def telemetry_interceptors(self): """Setup reusable telemetry interceptors as a fixture""" @@ -163,7 +171,7 @@ def verify_events(self, captured_events, captured_futures, expected_count): (True, False, 2, "enable_on_force_off"), (False, True, 2, "enable_off_force_on"), (False, False, 0, "both_off"), - (None, None, 0, "default_behavior"), + (None, None, 2, "default_behavior"), ]) def test_telemetry_flags(self, telemetry_interceptors, enable_telemetry, force_enable, expected_count, test_id): From 6d6e45abe0ab9687c642aa0ab03bbbf3a15f0fe8 Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Wed, 3 Dec 2025 20:06:49 +0000 Subject: [PATCH 06/10] Add wait after connection close for async telemetry submission --- tests/e2e/test_telemetry_e2e.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/e2e/test_telemetry_e2e.py b/tests/e2e/test_telemetry_e2e.py index efc748036..65d2fc927 100644 --- a/tests/e2e/test_telemetry_e2e.py +++ b/tests/e2e/test_telemetry_e2e.py @@ -193,6 +193,8 @@ def test_telemetry_flags(self, telemetry_interceptors, enable_telemetry, cursor.execute("SELECT 1") cursor.fetchone() + # Give time for async telemetry submission after connection closes + time.sleep(0.5) self.verify_events(captured_events, captured_futures, expected_count) # Assert statement execution on latency event (if events exist) From fb8e86d4792e20c5e2dd649435ebb4b5c8954fe8 Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Thu, 4 Dec 2025 05:07:44 +0000 Subject: [PATCH 07/10] Remove debug logging from telemetry tests --- tests/e2e/test_telemetry_e2e.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/test_telemetry_e2e.py b/tests/e2e/test_telemetry_e2e.py index 65d2fc927..94dbeb344 100644 --- a/tests/e2e/test_telemetry_e2e.py +++ b/tests/e2e/test_telemetry_e2e.py @@ -150,7 +150,7 @@ def verify_events(self, captured_events, captured_futures, expected_count): else: assert len(captured_events) == expected_count, \ f"Expected {expected_count} events, got {len(captured_events)}" - + time.sleep(2) done, _ = wait(captured_futures, timeout=10) assert len(done) == expected_count, \ From e898be67f86d097621577a7567a97bf37684dc03 Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Thu, 4 Dec 2025 06:24:00 +0000 Subject: [PATCH 08/10] Mark telemetry e2e tests as serial - must not run in parallel Root cause: Telemetry tests share host-level client across pytest-xdist workers, causing test isolation issues with patches. Tests pass serially but fail with -n auto. Solution: Add @pytest.mark.serial marker. CI needs to run these separately without -n auto. --- pyproject.toml | 5 ++++- tests/e2e/test_telemetry_e2e.py | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c53f4223c..d2739c7d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,10 @@ exclude = ['ttypes\.py$', 'TCLIService\.py$'] exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/' [tool.pytest.ini_options] -markers = {"reviewed" = "Test case has been reviewed by Databricks"} +markers = [ + "reviewed: Test case has been reviewed by Databricks", + "serial: Tests that must run serially (not parallelized)" +] minversion = "6.0" log_cli = "false" log_cli_level = "INFO" diff --git a/tests/e2e/test_telemetry_e2e.py b/tests/e2e/test_telemetry_e2e.py index 94dbeb344..0a57edd3c 100644 --- a/tests/e2e/test_telemetry_e2e.py +++ b/tests/e2e/test_telemetry_e2e.py @@ -43,8 +43,9 @@ def connection(self, extra_params=()): conn.close() +@pytest.mark.serial class TestTelemetryE2E(TelemetryTestBase): - """E2E tests for telemetry scenarios""" + """E2E tests for telemetry scenarios - must run serially due to shared host-level telemetry client""" @pytest.fixture(autouse=True) def telemetry_setup_teardown(self): From 98aa3820d1fe48e93d7b7fca8c2d12a7fbbc2a93 Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Thu, 4 Dec 2025 07:35:27 +0000 Subject: [PATCH 09/10] Split test execution to run serial tests separately Telemetry e2e tests must run serially due to shared host-level telemetry client across pytest-xdist workers. Running with -n auto causes test isolation issues where futures aren't properly captured. Changes: - Run parallel tests with -m 'not serial' -n auto - Run serial tests with -m 'serial' without parallelization - Use --cov-append for serial tests to combine coverage - Mark telemetry e2e tests with @pytest.mark.serial - Update test expectations for default telemetry behavior - Add feature flags cache cleanup in test teardown --- .github/workflows/code-coverage.yml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml index d9954d051..3c76be728 100644 --- a/.github/workflows/code-coverage.yml +++ b/.github/workflows/code-coverage.yml @@ -61,17 +61,32 @@ jobs: - name: Install library run: poetry install --no-interaction --all-extras #---------------------------------------------- - # run all tests with coverage + # run parallel tests with coverage #---------------------------------------------- - - name: Run all tests with coverage + - name: Run parallel tests with coverage continue-on-error: false run: | poetry run pytest tests/unit tests/e2e \ + -m "not serial" \ -n auto \ --cov=src \ --cov-report=xml \ --cov-report=term \ -v + + #---------------------------------------------- + # run serial tests with coverage + #---------------------------------------------- + - name: Run serial tests with coverage + continue-on-error: false + run: | + poetry run pytest tests/e2e \ + -m "serial" \ + --cov=src \ + --cov-append \ + --cov-report=xml \ + --cov-report=term \ + -v #---------------------------------------------- # check for coverage override From 5921b084b8ee414e0b633c8cedac931518f3826f Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Thu, 4 Dec 2025 08:52:38 +0000 Subject: [PATCH 10/10] Mark telemetry e2e tests as serial - must not run in parallel The concurrent telemetry e2e test globally patches telemetry methods to capture events. When run in parallel with other tests via pytest-xdist, it captures telemetry events from other concurrent tests, causing assertion failures (expected 60 events, got 88). All telemetry e2e tests must run serially to avoid cross-test interference with the shared host-level telemetry client. --- tests/e2e/test_concurrent_telemetry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e2e/test_concurrent_telemetry.py b/tests/e2e/test_concurrent_telemetry.py index 546a2b8b2..bed348c2c 100644 --- a/tests/e2e/test_concurrent_telemetry.py +++ b/tests/e2e/test_concurrent_telemetry.py @@ -26,6 +26,7 @@ def run_in_threads(target, num_threads, pass_index=False): t.join() +@pytest.mark.serial class TestE2ETelemetry(PySQLPytestTestCase): @pytest.fixture(autouse=True) def telemetry_setup_teardown(self):