Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions src/databricks/sql/common/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Detects whether the Python SQL connector is being invoked by an AI coding agent
by checking for well-known environment variables that agents set in their spawned
shell processes.

Detection only succeeds when exactly one agent environment variable is present,
to avoid ambiguous attribution when multiple agent environments overlap.

Adding a new agent requires only a new entry in KNOWN_AGENTS.

References for each environment variable:
- ANTIGRAVITY_AGENT: Closed source. Google Antigravity sets this variable.
- CLAUDECODE: https://github.com/anthropics/claude-code (sets CLAUDECODE=1)
- CLINE_ACTIVE: https://github.com/cline/cline (shipped in v3.24.0)
- CODEX_CI: https://github.com/openai/codex (part of UNIFIED_EXEC_ENV array in codex-rs)
- CURSOR_AGENT: Closed source. Referenced in a gist by johnlindquist.
- GEMINI_CLI: https://google-gemini.github.io/gemini-cli/docs/tools/shell.html (sets GEMINI_CLI=1)
- OPENCODE: https://github.com/opencode-ai/opencode (sets OPENCODE=1)
"""

import os

KNOWN_AGENTS = [
("ANTIGRAVITY_AGENT", "antigravity"),
("CLAUDECODE", "claude-code"),
("CLINE_ACTIVE", "cline"),
("CODEX_CI", "codex"),
("CURSOR_AGENT", "cursor"),
("GEMINI_CLI", "gemini-cli"),
("OPENCODE", "opencode"),
]


def detect(env=None):
"""Detect which AI coding agent (if any) is driving the current process.

Args:
env: Optional dict-like object for environment variable lookup.
Defaults to os.environ. Exists for testability.

Returns:
The agent product string if exactly one agent is detected,
or an empty string otherwise.
"""
if env is None:
env = os.environ

detected = [product for var, product in KNOWN_AGENTS if env.get(var)]
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The detection logic treats any truthy value as a positive detection. This means environment variables set to "0" or "false" will be incorrectly detected as active agents. Consider checking if the value is non-empty and not a false-like value (e.g., "0", "false", "False", "FALSE"). For example: detected = [product for var, product in KNOWN_AGENTS if env.get(var) and env.get(var).lower() not in ("0", "false")]

Suggested change
detected = [product for var, product in KNOWN_AGENTS if env.get(var)]
detected = [
product
for var, product in KNOWN_AGENTS
if (val := env.get(var)) and str(val).lower() not in ("0", "false")
]

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is intentional — we are mirroring the behavior of the reference implementation in databricks/cli libs/agent/agent.go, which only checks for non-empty (\!= ""). The agents themselves always set their env var to "1" (e.g. CLAUDECODE=1), so "0" or "false" would indicate a misconfiguration, not a valid state. Keeping the logic identical across all drivers and the CLI avoids subtle behavioral divergence.


if len(detected) == 1:
return detected[0]
return ""
5 changes: 5 additions & 0 deletions src/databricks/sql/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from databricks.sql.backend.databricks_client import DatabricksClient
from databricks.sql.backend.types import SessionId, BackendType
from databricks.sql.common.unified_http_client import UnifiedHttpClient
from databricks.sql.common.agent import detect as detect_agent

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -64,6 +65,10 @@ def __init__(
else:
self.useragent_header = "{}/{}".format(USER_AGENT_NAME, __version__)

agent_product = detect_agent()
if agent_product:
self.useragent_header += " agent/{}".format(agent_product)
Comment on lines +68 to +70
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding an integration test in test_session.py that verifies the User-Agent header correctly includes the agent suffix when an agent environment variable is set. This would ensure the agent detection integration works end-to-end. For example, using monkeypatch to set an agent environment variable and verify the resulting User-Agent header contains "agent/claude-code".

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The detection logic is fully covered by unit tests in test_agent_detection.py. The integration in session.py is a 3-line append that is straightforward. Adding an integration test here would require mocking the full Session constructor which adds complexity without meaningful coverage gain.


base_headers = [("User-Agent", self.useragent_header)]
all_headers = (http_headers or []) + base_headers

Expand Down
6 changes: 6 additions & 0 deletions src/databricks/sql/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -914,12 +914,18 @@ def build_client_context(server_hostname: str, version: str, **kwargs):
)

# Build user agent
from databricks.sql.common.agent import detect as detect_agent

user_agent_entry = kwargs.get("user_agent_entry", "")
if user_agent_entry:
user_agent = f"PyDatabricksSqlConnector/{version} ({user_agent_entry})"
else:
user_agent = f"PyDatabricksSqlConnector/{version}"

agent_product = detect_agent()
if agent_product:
user_agent += f" agent/{agent_product}"
Comment on lines +925 to +927
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding a test for build_client_context that verifies the user_agent field correctly includes the agent suffix when an agent environment variable is set. This would ensure the agent detection integration works correctly in the SEA path.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above — the detection is unit-tested and the integration is a trivial string append. The SEA path goes through build_client_context which uses the same detect() function already covered by tests.


# Explicitly construct ClientContext with proper types
return ClientContext(
hostname=server_hostname,
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/test_agent_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import pytest
from databricks.sql.common.agent import detect, KNOWN_AGENTS


class TestAgentDetection:
def test_detects_single_agent_claude_code(self):
assert detect({"CLAUDECODE": "1"}) == "claude-code"

def test_detects_single_agent_cursor(self):
assert detect({"CURSOR_AGENT": "1"}) == "cursor"

def test_detects_single_agent_gemini_cli(self):
assert detect({"GEMINI_CLI": "1"}) == "gemini-cli"

def test_detects_single_agent_cline(self):
assert detect({"CLINE_ACTIVE": "1"}) == "cline"

def test_detects_single_agent_codex(self):
assert detect({"CODEX_CI": "1"}) == "codex"

def test_detects_single_agent_opencode(self):
assert detect({"OPENCODE": "1"}) == "opencode"

def test_detects_single_agent_antigravity(self):
assert detect({"ANTIGRAVITY_AGENT": "1"}) == "antigravity"

def test_returns_empty_when_no_agent_detected(self):
assert detect({}) == ""

def test_returns_empty_when_multiple_agents_detected(self):
assert detect({"CLAUDECODE": "1", "CURSOR_AGENT": "1"}) == ""

def test_ignores_empty_env_var_values(self):
assert detect({"CLAUDECODE": ""}) == ""
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test should also verify that environment variables set to "0" or "false" are ignored, not just empty strings. Currently, the detection logic treats any truthy string value (including "0" or "false") as a positive detection, which could lead to false positives if an agent sets these variables to "0" to indicate it's disabled.

Suggested change
assert detect({"CLAUDECODE": ""}) == ""
assert detect({"CLAUDECODE": ""}) == ""
assert detect({"CLAUDECODE": "0"}) == ""
assert detect({"CLAUDECODE": "false"}) == ""

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same reasoning as above — the CLI reference implementation does not treat "0" or "false" as non-detection, so we intentionally match that behavior. The agents always set =1.


def test_all_known_agents_are_covered(self):
for env_var, product in KNOWN_AGENTS:
assert detect({env_var: "1"}) == product, (
f"Agent with env var {env_var} should be detected as {product}"
)

def test_defaults_to_os_environ(self, monkeypatch):
monkeypatch.delenv("CLAUDECODE", raising=False)
monkeypatch.delenv("CURSOR_AGENT", raising=False)
monkeypatch.delenv("GEMINI_CLI", raising=False)
monkeypatch.delenv("CLINE_ACTIVE", raising=False)
monkeypatch.delenv("CODEX_CI", raising=False)
monkeypatch.delenv("OPENCODE", raising=False)
monkeypatch.delenv("ANTIGRAVITY_AGENT", raising=False)
# With all agent vars cleared, detect() should return empty
assert detect() == ""
Loading