From 4bba05a0ceb144375af0a86335b50b7e8fdbaca5 Mon Sep 17 00:00:00 2001 From: kartik Date: Sun, 8 Mar 2026 13:41:44 +0530 Subject: [PATCH] add graceful prompt-based fallback for local LLMs without tool calling support --- SETUP.md | 26 +++++-- backend/app/services/llms_service.py | 88 +++++++++++++++++++++- backend/app/utils/tool_selection_prompt.py | 16 ++++ 3 files changed, 120 insertions(+), 10 deletions(-) create mode 100644 backend/app/utils/tool_selection_prompt.py diff --git a/SETUP.md b/SETUP.md index 7f77253..562783b 100644 --- a/SETUP.md +++ b/SETUP.md @@ -132,18 +132,29 @@ You will need **two models** to run the app properly: 1. A **chat LLM** (for conversations and reasoning) 2. An **embedding model** (for knowledge base search and retrieval) -Examples: +**Recommended (supports tool calling):** ```bash -# Pull a chat model (choose one depending on your system resources) -ollama pull deepseek-r1:8b -# or smaller / lighter -ollama pull deepseek-r1:1.5b +# Pull a chat model that supports tool/function calling +ollama pull llama3.1:8b # Pull an embedding model ollama pull nomic-embed-text ``` +**Alternative chat models (with limitations):** + +```bash +# These models do NOT support tool calling — the backend will +# automatically fall back to a prompt-based approach, which may +# be less accurate for biomodel searches and knowledge base queries. +ollama pull deepseek-r1:8b +# smaller / lighter +ollama pull deepseek-r1:1.5b +``` + +> **Note:** The backend uses [tool/function calling](https://platform.openai.com/docs/guides/function-calling) to interact with the VCell API and knowledge base. Models that support this feature (like `llama3.1`) will provide the best experience. Models without tool calling support (like `deepseek-r1`) will still work, but with a prompt-based fallback. + ### 4.3 Run Ollama Service Start the Ollama background service: @@ -157,7 +168,7 @@ ollama serve Open a new terminal and run: ```bash -ollama run deepseek-r1:1.5b "Hello, how are you?" +ollama run llama3.1:8b "Hello, how are you?" ``` If this works, Ollama is running correctly. @@ -177,7 +188,7 @@ AZURE_ENDPOINT=http://localhost:11434/v1 ... # Models: one LLM + one embedding model -AZURE_DEPLOYMENT_NAME=deepseek-r1:1.5b +AZURE_DEPLOYMENT_NAME=llama3.1:8b AZURE_EMBEDDING_DEPLOYMENT_NAME=nomic-embed-text ``` @@ -185,6 +196,7 @@ AZURE_EMBEDDING_DEPLOYMENT_NAME=nomic-embed-text * When `PROVIDER=azure`, the backend uses Azure OpenAI (default). * When `PROVIDER=local`, the backend connects to the **Ollama server** and uses the models you specify in `.env`. +* If the selected model does not support tool calling, the backend **automatically falls back** to a prompt-based approach and logs a warning. --- diff --git a/backend/app/services/llms_service.py b/backend/app/services/llms_service.py index 5d5e91d..a305fcf 100644 --- a/backend/app/services/llms_service.py +++ b/backend/app/services/llms_service.py @@ -10,6 +10,7 @@ ) from app.utils.system_prompt import SYSTEM_PROMPT +from app.utils.tool_selection_prompt import TOOL_SELECTION_PROMPT from app.schemas.vcelldb_schema import BiomodelRequestParams from app.core.singleton import get_openai_client @@ -58,6 +59,28 @@ async def get_response_with_tools(conversation_history: list[dict]): logger.info(f"User prompt: {user_prompt}") + # Try native tool calling first; fall back to prompt-based approach if not supported + try: + final_response, bmkeys = await _get_response_with_native_tools(messages) + except Exception as e: + error_str = str(e).lower() + if "does not support tools" in error_str or "tool" in error_str and "400" in error_str: + logger.warning( + f"Model '{settings.AZURE_DEPLOYMENT_NAME}' does not support native tool calling. " + "Falling back to prompt-based tool selection. For best results, use a model that " + "supports tool calling (e.g., llama3.1:8b)." + ) + final_response, bmkeys = await _get_response_with_prompt_tools(messages, user_prompt) + else: + raise + + logger.info(f"LLM Response: {final_response}") + + return final_response, bmkeys + + +async def _get_response_with_native_tools(messages: list[dict]): + """Use native OpenAI/Azure tool calling API.""" response = client.chat.completions.create( name="GET_RESPONSE_WITH_TOOLS::RETRIEVE_TOOLS", model=settings.AZURE_DEPLOYMENT_NAME, @@ -107,11 +130,70 @@ async def get_response_with_tools(conversation_history: list[dict]): }, ) - final_response = completion.choices[0].message.content + return completion.choices[0].message.content, bmkeys - logger.info(f"LLM Response: {final_response}") - return final_response, bmkeys +async def _get_response_with_prompt_tools(messages: list[dict], user_prompt: str): + """Fallback for local LLMs that don't support native tool calling. + Uses a two-step prompt approach: first ask LLM which tool to use, then + call the tool and ask LLM to generate a final response with the tool result.""" + + bmkeys = [] + + # Step 1: Ask the LLM which tool to call + tool_selection_messages = [ + {"role": "system", "content": TOOL_SELECTION_PROMPT}, + {"role": "user", "content": user_prompt}, + ] + + tool_response = client.chat.completions.create( + name="GET_RESPONSE_WITH_TOOLS::PROMPT_TOOL_SELECTION", + model=settings.AZURE_DEPLOYMENT_NAME, + messages=tool_selection_messages, + ) + + tool_decision_raw = tool_response.choices[0].message.content.strip() + logger.info(f"Tool decision (raw): {tool_decision_raw}") + + # Try to parse the tool decision + tool_result = None + tool_name = None + try: + # Extract JSON from the response (handle models that add extra text) + json_start = tool_decision_raw.find("{") + json_end = tool_decision_raw.rfind("}") + 1 + if json_start != -1 and json_end > json_start: + tool_decision = json.loads(tool_decision_raw[json_start:json_end]) + tool_name = tool_decision.get("tool", "none") + + if tool_name and tool_name != "none": + args = tool_decision.get("args", {}) + logger.info(f"Prompt-based Tool Call: {tool_name} with args: {args}") + + tool_result = await execute_tool(tool_name, args) + logger.info(f"Tool Result: {str(tool_result)[:500]}") + + if isinstance(tool_result, dict): + bmkeys = tool_result.get("unique_model_keys (bmkey)", []) + except (json.JSONDecodeError, KeyError, TypeError) as e: + logger.warning(f"Failed to parse tool decision: {e}. Proceeding without tools.") + + # Step 2: Generate final response with or without tool results + if tool_result is not None: + messages.append( + { + "role": "user", + "content": f"[Tool '{tool_name}' returned the following data]\n{str(tool_result)}\n\n[Now answer the original question using this data]", + } + ) + + completion = client.chat.completions.create( + name="GET_RESPONSE_WITH_TOOLS::PROMPT_FINAL_RESPONSE", + model=settings.AZURE_DEPLOYMENT_NAME, + messages=messages, + ) + + return completion.choices[0].message.content, bmkeys async def analyse_vcml(biomodel_id: str): diff --git a/backend/app/utils/tool_selection_prompt.py b/backend/app/utils/tool_selection_prompt.py new file mode 100644 index 0000000..d87005d --- /dev/null +++ b/backend/app/utils/tool_selection_prompt.py @@ -0,0 +1,16 @@ +TOOL_SELECTION_PROMPT = """You have access to the following tools. Based on the user's message, decide if any tool should be called. + +Available tools: +1. fetch_biomodels - Search/retrieve biomodels from VCell database. Args: bmId (str), bmName (str), category (str: all|public|shared|tutorial|educational), owner (str), startRow (int, default 1), maxRows (int, default 1000), orderBy (str: date_desc|date_asc|name_desc|name_asc) +2. fetch_simulation_details - Get details of a specific simulation. Args: bmId (str), simId (str) +3. get_vcml_file - Get VCML file content for a biomodel. Args: biomodel_id (str) +4. search_vcell_knowledge_base - Search VCell knowledge base for general info, concepts, tutorials. Args: query (str), limit (int, default 5) +5. fetch_publications - Get list of publications from VCell. No args required. + +If the user's question requires a tool, respond with ONLY a JSON object like: +{"tool": "tool_name", "args": {"arg1": "value1"}} + +If the user's question is general conversation or doesn't need a tool, respond with: +{"tool": "none"} + +IMPORTANT: Respond with ONLY the JSON object, nothing else.""" \ No newline at end of file