Files
GooseStrike/app/agents/llm_router.py

79 lines
2.8 KiB
Python

"""LLM routing helpers with Claude -> HackGPT fallback."""
from __future__ import annotations
import os
from typing import Dict, List, Tuple
import requests
class LLMProviderError(RuntimeError):
"""Raised when a downstream LLM provider fails."""
def _call_provider(name: str, url: str, prompt: str) -> str:
payload = {"prompt": prompt}
api_key = os.getenv(f"{name.upper()}_API_KEY")
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
response = requests.post(url, json=payload, headers=headers, timeout=30)
response.raise_for_status()
data: Dict[str, str] = response.json() if response.headers.get("content-type", "").startswith("application/json") else {}
return data.get("response") or data.get("answer") or data.get("text") or response.text
def normalize_ollama_url(base_url: str) -> str:
"""Return a usable Ollama generate endpoint for the supplied base URL."""
base_url = base_url.rstrip("/")
if "/api" in base_url:
if base_url.endswith("/generate"):
return base_url
return f"{base_url}/generate"
return f"{base_url}/api/generate"
def _call_ollama(base_url: str, prompt: str) -> str:
"""Invoke a local Ollama instance using the configured model."""
url = normalize_ollama_url(base_url)
model = os.getenv("OLLAMA_MODEL", "llama3")
payload = {"model": model, "prompt": prompt, "stream": False}
response = requests.post(url, json=payload, timeout=30)
response.raise_for_status()
data: Dict[str, str] = (
response.json() if response.headers.get("content-type", "").startswith("application/json") else {}
)
return data.get("response") or data.get("output") or response.text
def call_llm_with_fallback(prompt: str) -> str:
"""Try Claude first, then HackGPT, finally return a placeholder."""
order: List[Tuple[str, str]] = []
claude_url = os.getenv("CLAUDE_API_URL")
hackgpt_url = os.getenv("HACKGPT_API_URL")
ollama_base = os.getenv("OLLAMA_API_URL") or os.getenv("OLLAMA_BASE_URL")
if claude_url:
order.append(("claude", claude_url))
if hackgpt_url:
order.append(("hackgpt", hackgpt_url))
if ollama_base:
order.append(("ollama", ollama_base))
errors: List[str] = []
for name, url in order:
try:
if name == "ollama":
return _call_ollama(url, prompt)
return _call_provider(name, url, prompt)
except Exception as exc: # pragma: no cover - network dependent
errors.append(f"{name} failed: {exc}")
continue
if errors:
raise LLMProviderError("; ".join(errors))
return "LLM response placeholder. Configure CLAUDE_API_URL or HACKGPT_API_URL to enable live replies."