From 4bba05a0ceb144375af0a86335b50b7e8fdbaca5 Mon Sep 17 00:00:00 2001
From: kartik <kartikdeshpande2810@gmail.com>
Date: Sun, 8 Mar 2026 13:41:44 +0530
Subject: [PATCH] add graceful prompt-based fallback for local LLMs without
 tool calling support

---
 SETUP.md                                   | 26 +++++--
 backend/app/services/llms_service.py       | 88 +++++++++++++++++++++-
 backend/app/utils/tool_selection_prompt.py | 16 ++++
 3 files changed, 120 insertions(+), 10 deletions(-)
 create mode 100644 backend/app/utils/tool_selection_prompt.py

diff --git a/SETUP.md b/SETUP.md
index 7f77253..562783b 100644
--- a/SETUP.md
+++ b/SETUP.md
@@ -132,18 +132,29 @@ You will need **two models** to run the app properly:
 1. A **chat LLM** (for conversations and reasoning)
 2. An **embedding model** (for knowledge base search and retrieval)
 
-Examples:
+**Recommended (supports tool calling):**
 
 ```bash
-# Pull a chat model (choose one depending on your system resources)
-ollama pull deepseek-r1:8b
-# or smaller / lighter
-ollama pull deepseek-r1:1.5b
+# Pull a chat model that supports tool/function calling
+ollama pull llama3.1:8b
 
 # Pull an embedding model
 ollama pull nomic-embed-text
 ```
 
+**Alternative chat models (with limitations):**
+
+```bash
+# These models do NOT support tool calling — the backend will
+# automatically fall back to a prompt-based approach, which may
+# be less accurate for biomodel searches and knowledge base queries.
+ollama pull deepseek-r1:8b
+# smaller / lighter
+ollama pull deepseek-r1:1.5b
+```
+
+> **Note:** The backend uses [tool/function calling](https://platform.openai.com/docs/guides/function-calling) to interact with the VCell API and knowledge base. Models that support this feature (like `llama3.1`) will provide the best experience. Models without tool calling support (like `deepseek-r1`) will still work, but with a prompt-based fallback.
+
 ### 4.3 Run Ollama Service
 
 Start the Ollama background service:
@@ -157,7 +168,7 @@ ollama serve
 Open a new terminal and run:
 
 ```bash
-ollama run deepseek-r1:1.5b "Hello, how are you?"
+ollama run llama3.1:8b "Hello, how are you?"
 ```
 
 If this works, Ollama is running correctly.
@@ -177,7 +188,7 @@ AZURE_ENDPOINT=http://localhost:11434/v1
 ...
 
 # Models: one LLM + one embedding model
-AZURE_DEPLOYMENT_NAME=deepseek-r1:1.5b
+AZURE_DEPLOYMENT_NAME=llama3.1:8b
 AZURE_EMBEDDING_DEPLOYMENT_NAME=nomic-embed-text
 ```
 
@@ -185,6 +196,7 @@ AZURE_EMBEDDING_DEPLOYMENT_NAME=nomic-embed-text
 
 * When `PROVIDER=azure`, the backend uses Azure OpenAI (default).
 * When `PROVIDER=local`, the backend connects to the **Ollama server** and uses the models you specify in `.env`.
+* If the selected model does not support tool calling, the backend **automatically falls back** to a prompt-based approach and logs a warning.
 
 ---
 
diff --git a/backend/app/services/llms_service.py b/backend/app/services/llms_service.py
index 5d5e91d..a305fcf 100644
--- a/backend/app/services/llms_service.py
+++ b/backend/app/services/llms_service.py
@@ -10,6 +10,7 @@
 )
 
 from app.utils.system_prompt import SYSTEM_PROMPT
+from app.utils.tool_selection_prompt import TOOL_SELECTION_PROMPT
 
 from app.schemas.vcelldb_schema import BiomodelRequestParams
 from app.core.singleton import get_openai_client
@@ -58,6 +59,28 @@ async def get_response_with_tools(conversation_history: list[dict]):
 
     logger.info(f"User prompt: {user_prompt}")
 
+    # Try native tool calling first; fall back to prompt-based approach if not supported
+    try:
+        final_response, bmkeys = await _get_response_with_native_tools(messages)
+    except Exception as e:
+        error_str = str(e).lower()
+        if "does not support tools" in error_str or "tool" in error_str and "400" in error_str:
+            logger.warning(
+                f"Model '{settings.AZURE_DEPLOYMENT_NAME}' does not support native tool calling. "
+                "Falling back to prompt-based tool selection. For best results, use a model that "
+                "supports tool calling (e.g., llama3.1:8b)."
+            )
+            final_response, bmkeys = await _get_response_with_prompt_tools(messages, user_prompt)
+        else:
+            raise
+
+    logger.info(f"LLM Response: {final_response}")
+
+    return final_response, bmkeys
+
+
+async def _get_response_with_native_tools(messages: list[dict]):
+    """Use native OpenAI/Azure tool calling API."""
     response = client.chat.completions.create(
         name="GET_RESPONSE_WITH_TOOLS::RETRIEVE_TOOLS",
         model=settings.AZURE_DEPLOYMENT_NAME,
@@ -107,11 +130,70 @@ async def get_response_with_tools(conversation_history: list[dict]):
         },
     )
 
-    final_response = completion.choices[0].message.content
+    return completion.choices[0].message.content, bmkeys
 
-    logger.info(f"LLM Response: {final_response}")
 
-    return final_response, bmkeys
+async def _get_response_with_prompt_tools(messages: list[dict], user_prompt: str):
+    """Fallback for local LLMs that don't support native tool calling.
+    Uses a two-step prompt approach: first ask LLM which tool to use, then
+    call the tool and ask LLM to generate a final response with the tool result."""
+
+    bmkeys = []
+
+    # Step 1: Ask the LLM which tool to call
+    tool_selection_messages = [
+        {"role": "system", "content": TOOL_SELECTION_PROMPT},
+        {"role": "user", "content": user_prompt},
+    ]
+
+    tool_response = client.chat.completions.create(
+        name="GET_RESPONSE_WITH_TOOLS::PROMPT_TOOL_SELECTION",
+        model=settings.AZURE_DEPLOYMENT_NAME,
+        messages=tool_selection_messages,
+    )
+
+    tool_decision_raw = tool_response.choices[0].message.content.strip()
+    logger.info(f"Tool decision (raw): {tool_decision_raw}")
+
+    # Try to parse the tool decision
+    tool_result = None
+    tool_name = None
+    try:
+        # Extract JSON from the response (handle models that add extra text)
+        json_start = tool_decision_raw.find("{")
+        json_end = tool_decision_raw.rfind("}") + 1
+        if json_start != -1 and json_end > json_start:
+            tool_decision = json.loads(tool_decision_raw[json_start:json_end])
+            tool_name = tool_decision.get("tool", "none")
+
+            if tool_name and tool_name != "none":
+                args = tool_decision.get("args", {})
+                logger.info(f"Prompt-based Tool Call: {tool_name} with args: {args}")
+
+                tool_result = await execute_tool(tool_name, args)
+                logger.info(f"Tool Result: {str(tool_result)[:500]}")
+
+                if isinstance(tool_result, dict):
+                    bmkeys = tool_result.get("unique_model_keys (bmkey)", [])
+    except (json.JSONDecodeError, KeyError, TypeError) as e:
+        logger.warning(f"Failed to parse tool decision: {e}. Proceeding without tools.")
+
+    # Step 2: Generate final response with or without tool results
+    if tool_result is not None:
+        messages.append(
+            {
+                "role": "user",
+                "content": f"[Tool '{tool_name}' returned the following data]\n{str(tool_result)}\n\n[Now answer the original question using this data]",
+            }
+        )
+
+    completion = client.chat.completions.create(
+        name="GET_RESPONSE_WITH_TOOLS::PROMPT_FINAL_RESPONSE",
+        model=settings.AZURE_DEPLOYMENT_NAME,
+        messages=messages,
+    )
+
+    return completion.choices[0].message.content, bmkeys
 
 
 async def analyse_vcml(biomodel_id: str):
diff --git a/backend/app/utils/tool_selection_prompt.py b/backend/app/utils/tool_selection_prompt.py
new file mode 100644
index 0000000..d87005d
--- /dev/null
+++ b/backend/app/utils/tool_selection_prompt.py
@@ -0,0 +1,16 @@
+TOOL_SELECTION_PROMPT = """You have access to the following tools. Based on the user's message, decide if any tool should be called.
+
+Available tools:
+1. fetch_biomodels - Search/retrieve biomodels from VCell database. Args: bmId (str), bmName (str), category (str: all|public|shared|tutorial|educational), owner (str), startRow (int, default 1), maxRows (int, default 1000), orderBy (str: date_desc|date_asc|name_desc|name_asc)
+2. fetch_simulation_details - Get details of a specific simulation. Args: bmId (str), simId (str)
+3. get_vcml_file - Get VCML file content for a biomodel. Args: biomodel_id (str)
+4. search_vcell_knowledge_base - Search VCell knowledge base for general info, concepts, tutorials. Args: query (str), limit (int, default 5)
+5. fetch_publications - Get list of publications from VCell. No args required.
+
+If the user's question requires a tool, respond with ONLY a JSON object like:
+{"tool": "tool_name", "args": {"arg1": "value1"}}
+
+If the user's question is general conversation or doesn't need a tool, respond with:
+{"tool": "none"}
+
+IMPORTANT: Respond with ONLY the JSON object, nothing else."""
\ No newline at end of file