feat: Add interactive installer and multi-endpoint LLM load balancing

- Add install.ps1 (PowerShell) and install.sh (Bash) interactive installers
- Support local, networked, and cloud AI providers in installer
- Add multi-Ollama endpoint configuration with high-speed NIC support
- Implement load balancing strategies: round-robin, failover, random
- Update LLM router with endpoint health checking and automatic failover
- Add /endpoints API for monitoring all Ollama instances
- Update docker-compose with OLLAMA_ENDPOINTS and LOAD_BALANCE_STRATEGY
- Rebrand to GooseStrike with custom icon and flag assets
This commit is contained in:
2025-11-28 12:59:45 -05:00
parent b9428df6df
commit 486fd38aff
7 changed files with 1155 additions and 29 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 310 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

View File

@@ -3,7 +3,7 @@
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>StrikePackageGPT - Security Analysis Dashboard</title>
<title>GooseStrike - Security Analysis Dashboard</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://unpkg.com/alpinejs@3.x.x/dist/cdn.min.js" defer></script>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
@@ -62,9 +62,9 @@
<div class="flex items-center justify-between">
<div class="flex items-center gap-4">
<div class="flex items-center gap-3">
<img src="/static/icon.png" alt="StrikePackageGPT" class="h-10 w-10" onerror="this.style.display='none'">
<img src="/static/icon.png" alt="GooseStrike" class="h-10 w-10" onerror="this.style.display='none'">
<div>
<h1 class="text-2xl font-bold text-sp-red">StrikePackageGPT</h1>
<h1 class="text-2xl font-bold text-sp-red">GooseStrike</h1>
<span class="text-xs text-sp-white-muted">AI-Powered Penetration Testing Platform</span>
</div>
</div>
@@ -623,10 +623,8 @@
this.messages.push({
role: 'assistant',
phase: 'Reconnaissance',
content: `# Welcome to StrikePackageGPT! 🍁
I'm your AI-powered penetration testing assistant, following the **6-Phase Enterprise Methodology**:
phase: 'Reconnaissance',
content: `# Welcome to GooseStrike! 🍁I'm your AI-powered penetration testing assistant, following the **6-Phase Enterprise Methodology**:
| Phase | Purpose |
|-------|---------|

View File

@@ -1,6 +1,7 @@
"""
LLM Router Service
Routes requests to different LLM providers (OpenAI, Anthropic, Ollama)
Supports multiple Ollama endpoints with load balancing
"""
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
@@ -8,11 +9,15 @@ from pydantic import BaseModel
from typing import Optional, Literal
import httpx
import os
import random
import asyncio
from dataclasses import dataclass
from datetime import datetime, timedelta
app = FastAPI(
title="LLM Router",
description="Routes requests to multiple LLM providers",
version="0.1.0"
description="Routes requests to multiple LLM providers with load balancing",
version="0.2.0"
)
app.add_middleware(
@@ -26,7 +31,22 @@ app.add_middleware(
# Configuration from environment
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://192.168.1.50:11434")
# Support multiple Ollama endpoints (comma-separated)
OLLAMA_ENDPOINTS_STR = os.getenv("OLLAMA_ENDPOINTS", os.getenv("OLLAMA_BASE_URL", "http://192.168.1.50:11434"))
OLLAMA_ENDPOINTS = [url.strip() for url in OLLAMA_ENDPOINTS_STR.split(",") if url.strip()]
LOAD_BALANCE_STRATEGY = os.getenv("LOAD_BALANCE_STRATEGY", "round-robin") # round-robin, random, failover
@dataclass
class EndpointHealth:
url: str
healthy: bool = True
last_check: datetime = None
failure_count: int = 0
models: list = None
# Track endpoint health
endpoint_health: dict[str, EndpointHealth] = {url: EndpointHealth(url=url, models=[]) for url in OLLAMA_ENDPOINTS}
current_endpoint_index = 0
class ChatMessage(BaseModel):
@@ -52,33 +72,119 @@ class ChatResponse(BaseModel):
@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy", "service": "llm-router"}
return {"status": "healthy", "service": "llm-router", "endpoints": len(OLLAMA_ENDPOINTS)}
async def check_endpoint_health(url: str) -> tuple[bool, list]:
"""Check if an Ollama endpoint is healthy and get its models"""
try:
async with httpx.AsyncClient() as client:
response = await client.get(f"{url}/api/tags", timeout=5.0)
if response.status_code == 200:
data = response.json()
models = [m["name"] for m in data.get("models", [])]
return True, models
except Exception:
pass
return False, []
async def get_healthy_endpoint() -> Optional[str]:
"""Get a healthy Ollama endpoint based on load balancing strategy"""
global current_endpoint_index
# Refresh health status for stale checks (older than 30 seconds)
now = datetime.now()
for url, health in endpoint_health.items():
if health.last_check is None or (now - health.last_check) > timedelta(seconds=30):
is_healthy, models = await check_endpoint_health(url)
health.healthy = is_healthy
health.models = models
health.last_check = now
if is_healthy:
health.failure_count = 0
healthy_endpoints = [url for url, h in endpoint_health.items() if h.healthy]
if not healthy_endpoints:
return None
if LOAD_BALANCE_STRATEGY == "random":
return random.choice(healthy_endpoints)
elif LOAD_BALANCE_STRATEGY == "failover":
# Always use first available healthy endpoint
return healthy_endpoints[0]
else: # round-robin (default)
# Find next healthy endpoint in rotation
for _ in range(len(OLLAMA_ENDPOINTS)):
current_endpoint_index = (current_endpoint_index + 1) % len(OLLAMA_ENDPOINTS)
url = OLLAMA_ENDPOINTS[current_endpoint_index]
if url in healthy_endpoints:
return url
return healthy_endpoints[0]
@app.get("/providers")
async def list_providers():
"""List available LLM providers and their status"""
# Dynamically fetch Ollama models
ollama_models = []
ollama_available = False
try:
async with httpx.AsyncClient() as client:
response = await client.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5.0)
if response.status_code == 200:
data = response.json()
ollama_models = [m["name"] for m in data.get("models", [])]
ollama_available = True
except Exception:
ollama_models = ["llama3", "mistral", "codellama"] # fallback
# Check all Ollama endpoints
ollama_info = []
all_models = set()
any_available = False
for url in OLLAMA_ENDPOINTS:
is_healthy, models = await check_endpoint_health(url)
endpoint_health[url].healthy = is_healthy
endpoint_health[url].models = models
endpoint_health[url].last_check = datetime.now()
ollama_info.append({
"url": url,
"available": is_healthy,
"models": models
})
if is_healthy:
any_available = True
all_models.update(models)
providers = {
"openai": {"available": bool(OPENAI_API_KEY), "models": ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"]},
"anthropic": {"available": bool(ANTHROPIC_API_KEY), "models": ["claude-sonnet-4-20250514", "claude-3-5-haiku-20241022"]},
"ollama": {"available": ollama_available, "base_url": OLLAMA_BASE_URL, "models": ollama_models}
"ollama": {
"available": any_available,
"endpoints": ollama_info,
"load_balance_strategy": LOAD_BALANCE_STRATEGY,
"models": list(all_models) if all_models else ["llama3", "mistral", "codellama"]
}
}
return providers
@app.get("/endpoints")
async def list_endpoints():
"""List all Ollama endpoints with detailed status"""
results = []
for url in OLLAMA_ENDPOINTS:
is_healthy, models = await check_endpoint_health(url)
endpoint_health[url].healthy = is_healthy
endpoint_health[url].models = models
endpoint_health[url].last_check = datetime.now()
results.append({
"url": url,
"healthy": is_healthy,
"models": models,
"failure_count": endpoint_health[url].failure_count
})
return {
"strategy": LOAD_BALANCE_STRATEGY,
"endpoints": results,
"healthy_count": sum(1 for r in results if r["healthy"]),
"total_count": len(results)
}
@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest):
"""Route chat request to specified LLM provider"""
@@ -174,11 +280,16 @@ async def _call_anthropic(request: ChatRequest) -> ChatResponse:
async def _call_ollama(request: ChatRequest) -> ChatResponse:
"""Call Ollama API (local models)"""
"""Call Ollama API with load balancing across endpoints"""
endpoint = await get_healthy_endpoint()
if not endpoint:
raise HTTPException(status_code=503, detail="No healthy Ollama endpoints available")
async with httpx.AsyncClient() as client:
try:
response = await client.post(
f"{OLLAMA_BASE_URL}/api/chat",
f"{endpoint}/api/chat",
json={
"model": request.model,
"messages": [m.model_dump() for m in request.messages],
@@ -192,8 +303,15 @@ async def _call_ollama(request: ChatRequest) -> ChatResponse:
)
if response.status_code != 200:
# Mark endpoint as failed
endpoint_health[endpoint].failure_count += 1
if endpoint_health[endpoint].failure_count >= 3:
endpoint_health[endpoint].healthy = False
raise HTTPException(status_code=response.status_code, detail=response.text)
# Reset failure count on success
endpoint_health[endpoint].failure_count = 0
data = response.json()
return ChatResponse(
provider="ollama",
@@ -201,11 +319,22 @@ async def _call_ollama(request: ChatRequest) -> ChatResponse:
content=data["message"]["content"],
usage={
"prompt_tokens": data.get("prompt_eval_count", 0),
"completion_tokens": data.get("eval_count", 0)
"completion_tokens": data.get("eval_count", 0),
"endpoint": endpoint
}
)
except httpx.ConnectError:
raise HTTPException(status_code=503, detail="Ollama service not available")
# Mark endpoint as unhealthy
endpoint_health[endpoint].healthy = False
endpoint_health[endpoint].failure_count += 1
# Try another endpoint if available
other_endpoint = await get_healthy_endpoint()
if other_endpoint and other_endpoint != endpoint:
# Recursive call will use different endpoint
return await _call_ollama(request)
raise HTTPException(status_code=503, detail="All Ollama endpoints unavailable")
if __name__ == "__main__":