mirror of
https://github.com/mblanke/GooseStrike.git
synced 2026-03-01 05:50:22 -05:00
79 lines
2.8 KiB
Python
79 lines
2.8 KiB
Python
"""LLM routing helpers with Claude -> HackGPT fallback."""
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from typing import Dict, List, Tuple
|
|
|
|
import requests
|
|
|
|
|
|
class LLMProviderError(RuntimeError):
|
|
"""Raised when a downstream LLM provider fails."""
|
|
|
|
|
|
def _call_provider(name: str, url: str, prompt: str) -> str:
|
|
payload = {"prompt": prompt}
|
|
api_key = os.getenv(f"{name.upper()}_API_KEY")
|
|
headers = {"Content-Type": "application/json"}
|
|
if api_key:
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
response = requests.post(url, json=payload, headers=headers, timeout=30)
|
|
response.raise_for_status()
|
|
data: Dict[str, str] = response.json() if response.headers.get("content-type", "").startswith("application/json") else {}
|
|
return data.get("response") or data.get("answer") or data.get("text") or response.text
|
|
|
|
|
|
def normalize_ollama_url(base_url: str) -> str:
|
|
"""Return a usable Ollama generate endpoint for the supplied base URL."""
|
|
|
|
base_url = base_url.rstrip("/")
|
|
if "/api" in base_url:
|
|
if base_url.endswith("/generate"):
|
|
return base_url
|
|
return f"{base_url}/generate"
|
|
return f"{base_url}/api/generate"
|
|
|
|
|
|
def _call_ollama(base_url: str, prompt: str) -> str:
|
|
"""Invoke a local Ollama instance using the configured model."""
|
|
|
|
url = normalize_ollama_url(base_url)
|
|
model = os.getenv("OLLAMA_MODEL", "llama3")
|
|
payload = {"model": model, "prompt": prompt, "stream": False}
|
|
response = requests.post(url, json=payload, timeout=30)
|
|
response.raise_for_status()
|
|
data: Dict[str, str] = (
|
|
response.json() if response.headers.get("content-type", "").startswith("application/json") else {}
|
|
)
|
|
return data.get("response") or data.get("output") or response.text
|
|
|
|
|
|
def call_llm_with_fallback(prompt: str) -> str:
|
|
"""Try Claude first, then HackGPT, finally return a placeholder."""
|
|
|
|
order: List[Tuple[str, str]] = []
|
|
claude_url = os.getenv("CLAUDE_API_URL")
|
|
hackgpt_url = os.getenv("HACKGPT_API_URL")
|
|
ollama_base = os.getenv("OLLAMA_API_URL") or os.getenv("OLLAMA_BASE_URL")
|
|
if claude_url:
|
|
order.append(("claude", claude_url))
|
|
if hackgpt_url:
|
|
order.append(("hackgpt", hackgpt_url))
|
|
if ollama_base:
|
|
order.append(("ollama", ollama_base))
|
|
|
|
errors: List[str] = []
|
|
for name, url in order:
|
|
try:
|
|
if name == "ollama":
|
|
return _call_ollama(url, prompt)
|
|
return _call_provider(name, url, prompt)
|
|
except Exception as exc: # pragma: no cover - network dependent
|
|
errors.append(f"{name} failed: {exc}")
|
|
continue
|
|
|
|
if errors:
|
|
raise LLMProviderError("; ".join(errors))
|
|
|
|
return "LLM response placeholder. Configure CLAUDE_API_URL or HACKGPT_API_URL to enable live replies."
|