-
Notifications
You must be signed in to change notification settings - Fork 136
[PECOBLR-1928] Add AI coding agent detection to User-Agent header #739
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| """ | ||
| Detects whether the Python SQL connector is being invoked by an AI coding agent | ||
| by checking for well-known environment variables that agents set in their spawned | ||
| shell processes. | ||
|
|
||
| Detection only succeeds when exactly one agent environment variable is present, | ||
| to avoid ambiguous attribution when multiple agent environments overlap. | ||
|
|
||
| Adding a new agent requires only a new entry in KNOWN_AGENTS. | ||
|
|
||
| References for each environment variable: | ||
| - ANTIGRAVITY_AGENT: Closed source. Google Antigravity sets this variable. | ||
| - CLAUDECODE: https://github.com/anthropics/claude-code (sets CLAUDECODE=1) | ||
| - CLINE_ACTIVE: https://github.com/cline/cline (shipped in v3.24.0) | ||
| - CODEX_CI: https://github.com/openai/codex (part of UNIFIED_EXEC_ENV array in codex-rs) | ||
| - CURSOR_AGENT: Closed source. Referenced in a gist by johnlindquist. | ||
| - GEMINI_CLI: https://google-gemini.github.io/gemini-cli/docs/tools/shell.html (sets GEMINI_CLI=1) | ||
| - OPENCODE: https://github.com/opencode-ai/opencode (sets OPENCODE=1) | ||
| """ | ||
|
|
||
| import os | ||
|
|
||
| KNOWN_AGENTS = [ | ||
| ("ANTIGRAVITY_AGENT", "antigravity"), | ||
| ("CLAUDECODE", "claude-code"), | ||
| ("CLINE_ACTIVE", "cline"), | ||
| ("CODEX_CI", "codex"), | ||
| ("CURSOR_AGENT", "cursor"), | ||
| ("GEMINI_CLI", "gemini-cli"), | ||
| ("OPENCODE", "opencode"), | ||
| ] | ||
|
|
||
|
|
||
| def detect(env=None): | ||
| """Detect which AI coding agent (if any) is driving the current process. | ||
|
|
||
| Args: | ||
| env: Optional dict-like object for environment variable lookup. | ||
| Defaults to os.environ. Exists for testability. | ||
|
|
||
| Returns: | ||
| The agent product string if exactly one agent is detected, | ||
| or an empty string otherwise. | ||
| """ | ||
| if env is None: | ||
| env = os.environ | ||
|
|
||
| detected = [product for var, product in KNOWN_AGENTS if env.get(var)] | ||
|
|
||
| if len(detected) == 1: | ||
| return detected[0] | ||
| return "" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ | |
| from databricks.sql.backend.databricks_client import DatabricksClient | ||
| from databricks.sql.backend.types import SessionId, BackendType | ||
| from databricks.sql.common.unified_http_client import UnifiedHttpClient | ||
| from databricks.sql.common.agent import detect as detect_agent | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
@@ -64,6 +65,10 @@ def __init__( | |
| else: | ||
| self.useragent_header = "{}/{}".format(USER_AGENT_NAME, __version__) | ||
|
|
||
| agent_product = detect_agent() | ||
| if agent_product: | ||
| self.useragent_header += " agent/{}".format(agent_product) | ||
|
Comment on lines
+68
to
+70
|
||
|
|
||
| base_headers = [("User-Agent", self.useragent_header)] | ||
| all_headers = (http_headers or []) + base_headers | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -914,12 +914,18 @@ def build_client_context(server_hostname: str, version: str, **kwargs): | |
| ) | ||
|
|
||
| # Build user agent | ||
| from databricks.sql.common.agent import detect as detect_agent | ||
|
|
||
| user_agent_entry = kwargs.get("user_agent_entry", "") | ||
| if user_agent_entry: | ||
| user_agent = f"PyDatabricksSqlConnector/{version} ({user_agent_entry})" | ||
| else: | ||
| user_agent = f"PyDatabricksSqlConnector/{version}" | ||
|
|
||
| agent_product = detect_agent() | ||
| if agent_product: | ||
| user_agent += f" agent/{agent_product}" | ||
|
Comment on lines
+925
to
+927
|
||
|
|
||
| # Explicitly construct ClientContext with proper types | ||
| return ClientContext( | ||
| hostname=server_hostname, | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,51 @@ | ||||||||||
| import pytest | ||||||||||
| from databricks.sql.common.agent import detect, KNOWN_AGENTS | ||||||||||
|
|
||||||||||
|
|
||||||||||
| class TestAgentDetection: | ||||||||||
| def test_detects_single_agent_claude_code(self): | ||||||||||
| assert detect({"CLAUDECODE": "1"}) == "claude-code" | ||||||||||
|
|
||||||||||
| def test_detects_single_agent_cursor(self): | ||||||||||
| assert detect({"CURSOR_AGENT": "1"}) == "cursor" | ||||||||||
|
|
||||||||||
| def test_detects_single_agent_gemini_cli(self): | ||||||||||
| assert detect({"GEMINI_CLI": "1"}) == "gemini-cli" | ||||||||||
|
|
||||||||||
| def test_detects_single_agent_cline(self): | ||||||||||
| assert detect({"CLINE_ACTIVE": "1"}) == "cline" | ||||||||||
|
|
||||||||||
| def test_detects_single_agent_codex(self): | ||||||||||
| assert detect({"CODEX_CI": "1"}) == "codex" | ||||||||||
|
|
||||||||||
| def test_detects_single_agent_opencode(self): | ||||||||||
| assert detect({"OPENCODE": "1"}) == "opencode" | ||||||||||
|
|
||||||||||
| def test_detects_single_agent_antigravity(self): | ||||||||||
| assert detect({"ANTIGRAVITY_AGENT": "1"}) == "antigravity" | ||||||||||
|
|
||||||||||
| def test_returns_empty_when_no_agent_detected(self): | ||||||||||
| assert detect({}) == "" | ||||||||||
|
|
||||||||||
| def test_returns_empty_when_multiple_agents_detected(self): | ||||||||||
| assert detect({"CLAUDECODE": "1", "CURSOR_AGENT": "1"}) == "" | ||||||||||
|
|
||||||||||
| def test_ignores_empty_env_var_values(self): | ||||||||||
| assert detect({"CLAUDECODE": ""}) == "" | ||||||||||
|
||||||||||
| assert detect({"CLAUDECODE": ""}) == "" | |
| assert detect({"CLAUDECODE": ""}) == "" | |
| assert detect({"CLAUDECODE": "0"}) == "" | |
| assert detect({"CLAUDECODE": "false"}) == "" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same reasoning as above — the CLI reference implementation does not treat "0" or "false" as non-detection, so we intentionally match that behavior. The agents always set =1.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The detection logic treats any truthy value as a positive detection. This means environment variables set to "0" or "false" will be incorrectly detected as active agents. Consider checking if the value is non-empty and not a false-like value (e.g., "0", "false", "False", "FALSE"). For example:
detected = [product for var, product in KNOWN_AGENTS if env.get(var) and env.get(var).lower() not in ("0", "false")]There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is intentional — we are mirroring the behavior of the reference implementation in databricks/cli
libs/agent/agent.go, which only checks for non-empty (\!= ""). The agents themselves always set their env var to"1"(e.g.CLAUDECODE=1), so"0"or"false"would indicate a misconfiguration, not a valid state. Keeping the logic identical across all drivers and the CLI avoids subtle behavioral divergence.