From 5e42c330bccf37ba185462d963e74dfd387c509f Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Tue, 24 Feb 2026 07:19:36 +0000 Subject: [PATCH] Add AI coding agent detection to User-Agent header Detect when the Python SQL connector is invoked by an AI coding agent (e.g. Claude Code, Cursor, Gemini CLI) by checking well-known environment variables, and append `agent/` to the User-Agent string. This enables Databricks to understand how much driver usage originates from AI coding agents. Detection only succeeds when exactly one agent is detected to avoid ambiguous attribution. Mirrors the approach in databricks/cli#4287. Co-Authored-By: Claude Opus 4.6 Signed-off-by: Vikrant Puppala --- src/databricks/sql/common/agent.py | 52 ++++++++++++++++++++++++++++++ src/databricks/sql/session.py | 5 +++ src/databricks/sql/utils.py | 6 ++++ tests/unit/test_agent_detection.py | 51 +++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+) create mode 100644 src/databricks/sql/common/agent.py create mode 100644 tests/unit/test_agent_detection.py diff --git a/src/databricks/sql/common/agent.py b/src/databricks/sql/common/agent.py new file mode 100644 index 000000000..79d1b2b7a --- /dev/null +++ b/src/databricks/sql/common/agent.py @@ -0,0 +1,52 @@ +""" +Detects whether the Python SQL connector is being invoked by an AI coding agent +by checking for well-known environment variables that agents set in their spawned +shell processes. + +Detection only succeeds when exactly one agent environment variable is present, +to avoid ambiguous attribution when multiple agent environments overlap. + +Adding a new agent requires only a new entry in KNOWN_AGENTS. + +References for each environment variable: + - ANTIGRAVITY_AGENT: Closed source. Google Antigravity sets this variable. + - CLAUDECODE: https://github.com/anthropics/claude-code (sets CLAUDECODE=1) + - CLINE_ACTIVE: https://github.com/cline/cline (shipped in v3.24.0) + - CODEX_CI: https://github.com/openai/codex (part of UNIFIED_EXEC_ENV array in codex-rs) + - CURSOR_AGENT: Closed source. Referenced in a gist by johnlindquist. + - GEMINI_CLI: https://google-gemini.github.io/gemini-cli/docs/tools/shell.html (sets GEMINI_CLI=1) + - OPENCODE: https://github.com/opencode-ai/opencode (sets OPENCODE=1) +""" + +import os + +KNOWN_AGENTS = [ + ("ANTIGRAVITY_AGENT", "antigravity"), + ("CLAUDECODE", "claude-code"), + ("CLINE_ACTIVE", "cline"), + ("CODEX_CI", "codex"), + ("CURSOR_AGENT", "cursor"), + ("GEMINI_CLI", "gemini-cli"), + ("OPENCODE", "opencode"), +] + + +def detect(env=None): + """Detect which AI coding agent (if any) is driving the current process. + + Args: + env: Optional dict-like object for environment variable lookup. + Defaults to os.environ. Exists for testability. + + Returns: + The agent product string if exactly one agent is detected, + or an empty string otherwise. + """ + if env is None: + env = os.environ + + detected = [product for var, product in KNOWN_AGENTS if env.get(var)] + + if len(detected) == 1: + return detected[0] + return "" diff --git a/src/databricks/sql/session.py b/src/databricks/sql/session.py index 0f723d144..1588d9f79 100644 --- a/src/databricks/sql/session.py +++ b/src/databricks/sql/session.py @@ -13,6 +13,7 @@ from databricks.sql.backend.databricks_client import DatabricksClient from databricks.sql.backend.types import SessionId, BackendType from databricks.sql.common.unified_http_client import UnifiedHttpClient +from databricks.sql.common.agent import detect as detect_agent logger = logging.getLogger(__name__) @@ -64,6 +65,10 @@ def __init__( else: self.useragent_header = "{}/{}".format(USER_AGENT_NAME, __version__) + agent_product = detect_agent() + if agent_product: + self.useragent_header += " agent/{}".format(agent_product) + base_headers = [("User-Agent", self.useragent_header)] all_headers = (http_headers or []) + base_headers diff --git a/src/databricks/sql/utils.py b/src/databricks/sql/utils.py index 043183ac2..b0aea8eb7 100644 --- a/src/databricks/sql/utils.py +++ b/src/databricks/sql/utils.py @@ -914,12 +914,18 @@ def build_client_context(server_hostname: str, version: str, **kwargs): ) # Build user agent + from databricks.sql.common.agent import detect as detect_agent + user_agent_entry = kwargs.get("user_agent_entry", "") if user_agent_entry: user_agent = f"PyDatabricksSqlConnector/{version} ({user_agent_entry})" else: user_agent = f"PyDatabricksSqlConnector/{version}" + agent_product = detect_agent() + if agent_product: + user_agent += f" agent/{agent_product}" + # Explicitly construct ClientContext with proper types return ClientContext( hostname=server_hostname, diff --git a/tests/unit/test_agent_detection.py b/tests/unit/test_agent_detection.py new file mode 100644 index 000000000..0be404a1d --- /dev/null +++ b/tests/unit/test_agent_detection.py @@ -0,0 +1,51 @@ +import pytest +from databricks.sql.common.agent import detect, KNOWN_AGENTS + + +class TestAgentDetection: + def test_detects_single_agent_claude_code(self): + assert detect({"CLAUDECODE": "1"}) == "claude-code" + + def test_detects_single_agent_cursor(self): + assert detect({"CURSOR_AGENT": "1"}) == "cursor" + + def test_detects_single_agent_gemini_cli(self): + assert detect({"GEMINI_CLI": "1"}) == "gemini-cli" + + def test_detects_single_agent_cline(self): + assert detect({"CLINE_ACTIVE": "1"}) == "cline" + + def test_detects_single_agent_codex(self): + assert detect({"CODEX_CI": "1"}) == "codex" + + def test_detects_single_agent_opencode(self): + assert detect({"OPENCODE": "1"}) == "opencode" + + def test_detects_single_agent_antigravity(self): + assert detect({"ANTIGRAVITY_AGENT": "1"}) == "antigravity" + + def test_returns_empty_when_no_agent_detected(self): + assert detect({}) == "" + + def test_returns_empty_when_multiple_agents_detected(self): + assert detect({"CLAUDECODE": "1", "CURSOR_AGENT": "1"}) == "" + + def test_ignores_empty_env_var_values(self): + assert detect({"CLAUDECODE": ""}) == "" + + def test_all_known_agents_are_covered(self): + for env_var, product in KNOWN_AGENTS: + assert detect({env_var: "1"}) == product, ( + f"Agent with env var {env_var} should be detected as {product}" + ) + + def test_defaults_to_os_environ(self, monkeypatch): + monkeypatch.delenv("CLAUDECODE", raising=False) + monkeypatch.delenv("CURSOR_AGENT", raising=False) + monkeypatch.delenv("GEMINI_CLI", raising=False) + monkeypatch.delenv("CLINE_ACTIVE", raising=False) + monkeypatch.delenv("CODEX_CI", raising=False) + monkeypatch.delenv("OPENCODE", raising=False) + monkeypatch.delenv("ANTIGRAVITY_AGENT", raising=False) + # With all agent vars cleared, detect() should return empty + assert detect() == ""