feat: Separate Local and Networked Ollama providers

- Add distinct 'Local Ollama' and 'Networked Ollama' options in dropdown
- Local uses localhost, Networked uses remote endpoints with load balancing
- Color-coded: Yellow for Local, Blue for Networked
- Icons:  Local,  Networked
- Backend supports OLLAMA_LOCAL_URL and OLLAMA_NETWORK_URLS env vars
- Updated installer to generate new env var format
- Legacy 'ollama' provider still works for backward compatibility
This commit is contained in:
2025-11-28 14:34:18 -05:00
parent 523eba9613
commit 707232ff83
3 changed files with 188 additions and 87 deletions

View File

@@ -425,24 +425,24 @@ if (-not (Test-Path $configDir)) {
New-Item -ItemType Directory -Path $configDir -Force | Out-Null New-Item -ItemType Directory -Path $configDir -Force | Out-Null
} }
# Build OLLAMA_ENDPOINTS string # Build OLLAMA_LOCAL_URL and OLLAMA_NETWORK_URLS strings
$ollamaEndpoints = @() $ollamaLocalUrl = ""
$ollamaNetworkUrls = @()
if ($config.local.enabled) { if ($config.local.enabled) {
$ollamaEndpoints += $config.local.url $ollamaLocalUrl = $config.local.url
} }
foreach ($ep in $config.networked.endpoints) { foreach ($ep in $config.networked.endpoints) {
if ($ep.enabled) { if ($ep.enabled) {
if ($ep.prefer_high_speed -and $ep.alt_url) { if ($ep.prefer_high_speed -and $ep.alt_url) {
$ollamaEndpoints += $ep.alt_url $ollamaNetworkUrls += $ep.alt_url
} else { } else {
$ollamaEndpoints += $ep.url $ollamaNetworkUrls += $ep.url
} }
} }
} }
$ollamaEndpointsStr = $ollamaEndpoints -join "," $ollamaNetworkUrlsStr = $ollamaNetworkUrls -join ","
if ([string]::IsNullOrEmpty($ollamaEndpointsStr)) {
$ollamaEndpointsStr = "http://localhost:11434"
}
# Generate .env file # Generate .env file
Write-Step "Generating .env file..." Write-Step "Generating .env file..."
@@ -454,8 +454,11 @@ $envLines = @(
"# Generated by installer on $timestamp", "# Generated by installer on $timestamp",
"# ======================================================================", "# ======================================================================",
"", "",
"# Ollama Endpoints (comma-separated for load balancing)", "# Local Ollama (on this machine)",
"OLLAMA_ENDPOINTS=$ollamaEndpointsStr", "OLLAMA_LOCAL_URL=$ollamaLocalUrl",
"",
"# Networked Ollama (comma-separated for load balancing)",
"OLLAMA_NETWORK_URLS=$ollamaNetworkUrlsStr",
"", "",
"# Load Balancing Strategy: round-robin, failover, random", "# Load Balancing Strategy: round-robin, failover, random",
"LOAD_BALANCE_STRATEGY=$($config.load_balancing)", "LOAD_BALANCE_STRATEGY=$($config.load_balancing)",

View File

@@ -106,7 +106,8 @@
</div> </div>
<select x-model="selectedProvider" @change="updateModels()" <select x-model="selectedProvider" @change="updateModels()"
class="bg-sp-grey border border-sp-grey-mid rounded px-3 py-1.5 text-sm text-sp-white focus:border-sp-red focus:outline-none"> class="bg-sp-grey border border-sp-grey-mid rounded px-3 py-1.5 text-sm text-sp-white focus:border-sp-red focus:outline-none">
<option value="ollama">🦙 Ollama</option> <option value="ollama-local">🦙 Local Ollama</option>
<option value="ollama-network">🌐 Networked Ollama</option>
<option value="openai">🤖 OpenAI</option> <option value="openai">🤖 OpenAI</option>
<option value="anthropic">🧠 Anthropic</option> <option value="anthropic">🧠 Anthropic</option>
</select> </select>
@@ -738,7 +739,7 @@
userInput: '', userInput: '',
isLoading: false, isLoading: false,
services: {}, services: {},
selectedProvider: 'ollama', selectedProvider: 'ollama-local',
selectedModel: 'llama3.2', selectedModel: 'llama3.2',
availableModels: ['llama3.2', 'codellama', 'mistral'], availableModels: ['llama3.2', 'codellama', 'mistral'],
providers: {}, providers: {},
@@ -1158,7 +1159,8 @@ Select a phase above to begin, or use the quick actions in the sidebar!`
// AI Provider display helpers // AI Provider display helpers
getProviderIcon(provider) { getProviderIcon(provider) {
const icons = { const icons = {
ollama: '🦙', 'ollama-local': '🦙',
'ollama-network': '🌐',
openai: '🤖', openai: '🤖',
anthropic: '🧠' anthropic: '🧠'
}; };
@@ -1167,7 +1169,8 @@ Select a phase above to begin, or use the quick actions in the sidebar!`
getProviderName(provider) { getProviderName(provider) {
const names = { const names = {
ollama: 'Ollama', 'ollama-local': 'Local',
'ollama-network': 'Networked',
openai: 'OpenAI', openai: 'OpenAI',
anthropic: 'Anthropic' anthropic: 'Anthropic'
}; };
@@ -1176,7 +1179,8 @@ Select a phase above to begin, or use the quick actions in the sidebar!`
getProviderStyle(provider) { getProviderStyle(provider) {
const styles = { const styles = {
ollama: 'bg-yellow-500/20 border-yellow-500/50 text-yellow-400', 'ollama-local': 'bg-yellow-500/20 border-yellow-500/50 text-yellow-400',
'ollama-network': 'bg-blue-500/20 border-blue-500/50 text-blue-400',
openai: 'bg-green-500/20 border-green-500/50 text-green-400', openai: 'bg-green-500/20 border-green-500/50 text-green-400',
anthropic: 'bg-orange-500/20 border-orange-500/50 text-orange-400' anthropic: 'bg-orange-500/20 border-orange-500/50 text-orange-400'
}; };
@@ -1185,7 +1189,8 @@ Select a phase above to begin, or use the quick actions in the sidebar!`
getProviderBadgeStyle(provider) { getProviderBadgeStyle(provider) {
const styles = { const styles = {
ollama: 'bg-yellow-500/20 text-yellow-400 border border-yellow-500/30', 'ollama-local': 'bg-yellow-500/20 text-yellow-400 border border-yellow-500/30',
'ollama-network': 'bg-blue-500/20 text-blue-400 border border-blue-500/30',
openai: 'bg-green-500/20 text-green-400 border border-green-500/30', openai: 'bg-green-500/20 text-green-400 border border-green-500/30',
anthropic: 'bg-orange-500/20 text-orange-400 border border-orange-500/30' anthropic: 'bg-orange-500/20 text-orange-400 border border-orange-500/30'
}; };

View File

@@ -31,9 +31,13 @@ app.add_middleware(
# Configuration from environment # Configuration from environment
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "") ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
# Support multiple Ollama endpoints (comma-separated)
OLLAMA_ENDPOINTS_STR = os.getenv("OLLAMA_ENDPOINTS", os.getenv("OLLAMA_BASE_URL", "http://192.168.1.50:11434")) # Separate local and networked Ollama endpoints
OLLAMA_ENDPOINTS = [url.strip() for url in OLLAMA_ENDPOINTS_STR.split(",") if url.strip()] OLLAMA_LOCAL_URL = os.getenv("OLLAMA_LOCAL_URL", "http://localhost:11434")
OLLAMA_NETWORK_URLS_STR = os.getenv("OLLAMA_NETWORK_URLS", os.getenv("OLLAMA_ENDPOINTS", os.getenv("OLLAMA_BASE_URL", "")))
OLLAMA_NETWORK_URLS = [url.strip() for url in OLLAMA_NETWORK_URLS_STR.split(",") if url.strip()]
# Legacy support: if only OLLAMA_ENDPOINTS is set, use it for network
LOAD_BALANCE_STRATEGY = os.getenv("LOAD_BALANCE_STRATEGY", "round-robin") # round-robin, random, failover LOAD_BALANCE_STRATEGY = os.getenv("LOAD_BALANCE_STRATEGY", "round-robin") # round-robin, random, failover
@dataclass @dataclass
@@ -44,9 +48,10 @@ class EndpointHealth:
failure_count: int = 0 failure_count: int = 0
models: list = None models: list = None
# Track endpoint health # Track endpoint health for both local and network
endpoint_health: dict[str, EndpointHealth] = {url: EndpointHealth(url=url, models=[]) for url in OLLAMA_ENDPOINTS} all_ollama_endpoints = [OLLAMA_LOCAL_URL] + OLLAMA_NETWORK_URLS if OLLAMA_LOCAL_URL else OLLAMA_NETWORK_URLS
current_endpoint_index = 0 endpoint_health: dict[str, EndpointHealth] = {url: EndpointHealth(url=url, models=[]) for url in all_ollama_endpoints}
current_network_endpoint_index = 0
class ChatMessage(BaseModel): class ChatMessage(BaseModel):
@@ -55,7 +60,7 @@ class ChatMessage(BaseModel):
class ChatRequest(BaseModel): class ChatRequest(BaseModel):
provider: Literal["openai", "anthropic", "ollama"] = "ollama" provider: Literal["openai", "anthropic", "ollama", "ollama-local", "ollama-network"] = "ollama-local"
model: str = "llama3.2" model: str = "llama3.2"
messages: list[ChatMessage] messages: list[ChatMessage]
temperature: float = 0.7 temperature: float = 0.7
@@ -72,7 +77,12 @@ class ChatResponse(BaseModel):
@app.get("/health") @app.get("/health")
async def health_check(): async def health_check():
"""Health check endpoint""" """Health check endpoint"""
return {"status": "healthy", "service": "llm-router", "endpoints": len(OLLAMA_ENDPOINTS)} return {
"status": "healthy",
"service": "llm-router",
"local_endpoint": OLLAMA_LOCAL_URL,
"network_endpoints": len(OLLAMA_NETWORK_URLS)
}
async def check_endpoint_health(url: str) -> tuple[bool, list]: async def check_endpoint_health(url: str) -> tuple[bool, list]:
@@ -89,13 +99,19 @@ async def check_endpoint_health(url: str) -> tuple[bool, list]:
return False, [] return False, []
async def get_healthy_endpoint() -> Optional[str]: async def get_healthy_endpoint(endpoints: list[str]) -> Optional[str]:
"""Get a healthy Ollama endpoint based on load balancing strategy""" """Get a healthy Ollama endpoint from the given list based on load balancing strategy"""
global current_endpoint_index global current_network_endpoint_index
if not endpoints:
return None
# Refresh health status for stale checks (older than 30 seconds) # Refresh health status for stale checks (older than 30 seconds)
now = datetime.now() now = datetime.now()
for url, health in endpoint_health.items(): for url in endpoints:
if url not in endpoint_health:
endpoint_health[url] = EndpointHealth(url=url, models=[])
health = endpoint_health[url]
if health.last_check is None or (now - health.last_check) > timedelta(seconds=30): if health.last_check is None or (now - health.last_check) > timedelta(seconds=30):
is_healthy, models = await check_endpoint_health(url) is_healthy, models = await check_endpoint_health(url)
health.healthy = is_healthy health.healthy = is_healthy
@@ -104,7 +120,7 @@ async def get_healthy_endpoint() -> Optional[str]:
if is_healthy: if is_healthy:
health.failure_count = 0 health.failure_count = 0
healthy_endpoints = [url for url, h in endpoint_health.items() if h.healthy] healthy_endpoints = [url for url in endpoints if endpoint_health.get(url, EndpointHealth(url=url)).healthy]
if not healthy_endpoints: if not healthy_endpoints:
return None return None
@@ -116,9 +132,9 @@ async def get_healthy_endpoint() -> Optional[str]:
return healthy_endpoints[0] return healthy_endpoints[0]
else: # round-robin (default) else: # round-robin (default)
# Find next healthy endpoint in rotation # Find next healthy endpoint in rotation
for _ in range(len(OLLAMA_ENDPOINTS)): for _ in range(len(endpoints)):
current_endpoint_index = (current_endpoint_index + 1) % len(OLLAMA_ENDPOINTS) current_network_endpoint_index = (current_network_endpoint_index + 1) % len(endpoints)
url = OLLAMA_ENDPOINTS[current_endpoint_index] url = endpoints[current_network_endpoint_index]
if url in healthy_endpoints: if url in healthy_endpoints:
return url return url
return healthy_endpoints[0] return healthy_endpoints[0]
@@ -127,61 +143,105 @@ async def get_healthy_endpoint() -> Optional[str]:
@app.get("/providers") @app.get("/providers")
async def list_providers(): async def list_providers():
"""List available LLM providers and their status""" """List available LLM providers and their status"""
# Check all Ollama endpoints providers = {
ollama_info = [] "openai": {"available": bool(OPENAI_API_KEY), "models": ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"]},
all_models = set() "anthropic": {"available": bool(ANTHROPIC_API_KEY), "models": ["claude-sonnet-4-20250514", "claude-3-5-haiku-20241022"]},
any_available = False }
for url in OLLAMA_ENDPOINTS: # Check local Ollama endpoint
if OLLAMA_LOCAL_URL:
is_healthy, models = await check_endpoint_health(OLLAMA_LOCAL_URL)
endpoint_health[OLLAMA_LOCAL_URL] = EndpointHealth(
url=OLLAMA_LOCAL_URL,
healthy=is_healthy,
models=models,
last_check=datetime.now()
)
providers["ollama-local"] = {
"available": is_healthy,
"endpoint": OLLAMA_LOCAL_URL,
"models": models if models else ["llama3", "mistral", "codellama"]
}
else:
providers["ollama-local"] = {"available": False, "models": []}
# Check networked Ollama endpoints
network_info = []
network_models = set()
any_network_available = False
for url in OLLAMA_NETWORK_URLS:
is_healthy, models = await check_endpoint_health(url) is_healthy, models = await check_endpoint_health(url)
endpoint_health[url].healthy = is_healthy endpoint_health[url] = EndpointHealth(
endpoint_health[url].models = models url=url,
endpoint_health[url].last_check = datetime.now() healthy=is_healthy,
models=models,
ollama_info.append({ last_check=datetime.now()
)
network_info.append({
"url": url, "url": url,
"available": is_healthy, "available": is_healthy,
"models": models "models": models
}) })
if is_healthy: if is_healthy:
any_available = True any_network_available = True
all_models.update(models) network_models.update(models)
providers = { providers["ollama-network"] = {
"openai": {"available": bool(OPENAI_API_KEY), "models": ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"]}, "available": any_network_available,
"anthropic": {"available": bool(ANTHROPIC_API_KEY), "models": ["claude-sonnet-4-20250514", "claude-3-5-haiku-20241022"]}, "endpoints": network_info,
"ollama": { "load_balance_strategy": LOAD_BALANCE_STRATEGY,
"available": any_available, "models": list(network_models) if network_models else ["llama3", "mistral", "codellama"]
"endpoints": ollama_info,
"load_balance_strategy": LOAD_BALANCE_STRATEGY,
"models": list(all_models) if all_models else ["llama3", "mistral", "codellama"]
}
} }
# Legacy: also provide combined "ollama" for backward compatibility
all_ollama_models = set()
if providers["ollama-local"]["available"]:
all_ollama_models.update(providers["ollama-local"]["models"])
if providers["ollama-network"]["available"]:
all_ollama_models.update(providers["ollama-network"]["models"])
providers["ollama"] = {
"available": providers["ollama-local"]["available"] or providers["ollama-network"]["available"],
"models": list(all_ollama_models) if all_ollama_models else ["llama3", "mistral", "codellama"]
}
return providers return providers
@app.get("/endpoints") @app.get("/endpoints")
async def list_endpoints(): async def list_endpoints():
"""List all Ollama endpoints with detailed status""" """List all Ollama endpoints with detailed status"""
results = [] results = {
for url in OLLAMA_ENDPOINTS: "local": None,
is_healthy, models = await check_endpoint_health(url) "network": []
endpoint_health[url].healthy = is_healthy }
endpoint_health[url].models = models
endpoint_health[url].last_check = datetime.now()
results.append({ # Local endpoint
if OLLAMA_LOCAL_URL:
is_healthy, models = await check_endpoint_health(OLLAMA_LOCAL_URL)
results["local"] = {
"url": OLLAMA_LOCAL_URL,
"healthy": is_healthy,
"models": models,
"failure_count": endpoint_health.get(OLLAMA_LOCAL_URL, EndpointHealth(url=OLLAMA_LOCAL_URL)).failure_count
}
# Network endpoints
for url in OLLAMA_NETWORK_URLS:
is_healthy, models = await check_endpoint_health(url)
results["network"].append({
"url": url, "url": url,
"healthy": is_healthy, "healthy": is_healthy,
"models": models, "models": models,
"failure_count": endpoint_health[url].failure_count "failure_count": endpoint_health.get(url, EndpointHealth(url=url)).failure_count
}) })
return { return {
"strategy": LOAD_BALANCE_STRATEGY, "strategy": LOAD_BALANCE_STRATEGY,
"endpoints": results, "endpoints": results,
"healthy_count": sum(1 for r in results if r["healthy"]), "network_healthy_count": sum(1 for r in results["network"] if r["healthy"]),
"total_count": len(results) "network_total_count": len(results["network"])
} }
@@ -193,8 +253,23 @@ async def chat(request: ChatRequest):
return await _call_openai(request) return await _call_openai(request)
elif request.provider == "anthropic": elif request.provider == "anthropic":
return await _call_anthropic(request) return await _call_anthropic(request)
elif request.provider == "ollama-local":
return await _call_ollama_local(request)
elif request.provider == "ollama-network":
return await _call_ollama_network(request)
elif request.provider == "ollama": elif request.provider == "ollama":
return await _call_ollama(request) # Legacy: try local first, then network
if OLLAMA_LOCAL_URL:
try:
return await _call_ollama_local(request)
except HTTPException:
if OLLAMA_NETWORK_URLS:
return await _call_ollama_network(request)
raise
elif OLLAMA_NETWORK_URLS:
return await _call_ollama_network(request)
else:
raise HTTPException(status_code=503, detail="No Ollama endpoints configured")
else: else:
raise HTTPException(status_code=400, detail=f"Unknown provider: {request.provider}") raise HTTPException(status_code=400, detail=f"Unknown provider: {request.provider}")
@@ -279,13 +354,8 @@ async def _call_anthropic(request: ChatRequest) -> ChatResponse:
) )
async def _call_ollama(request: ChatRequest) -> ChatResponse: async def _call_ollama_endpoint(request: ChatRequest, endpoint: str, provider_label: str) -> ChatResponse:
"""Call Ollama API with load balancing across endpoints""" """Call a specific Ollama endpoint"""
endpoint = await get_healthy_endpoint()
if not endpoint:
raise HTTPException(status_code=503, detail="No healthy Ollama endpoints available")
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
try: try:
response = await client.post( response = await client.post(
@@ -304,17 +374,19 @@ async def _call_ollama(request: ChatRequest) -> ChatResponse:
if response.status_code != 200: if response.status_code != 200:
# Mark endpoint as failed # Mark endpoint as failed
endpoint_health[endpoint].failure_count += 1 if endpoint in endpoint_health:
if endpoint_health[endpoint].failure_count >= 3: endpoint_health[endpoint].failure_count += 1
endpoint_health[endpoint].healthy = False if endpoint_health[endpoint].failure_count >= 3:
endpoint_health[endpoint].healthy = False
raise HTTPException(status_code=response.status_code, detail=response.text) raise HTTPException(status_code=response.status_code, detail=response.text)
# Reset failure count on success # Reset failure count on success
endpoint_health[endpoint].failure_count = 0 if endpoint in endpoint_health:
endpoint_health[endpoint].failure_count = 0
data = response.json() data = response.json()
return ChatResponse( return ChatResponse(
provider="ollama", provider=provider_label,
model=request.model, model=request.model,
content=data["message"]["content"], content=data["message"]["content"],
usage={ usage={
@@ -325,16 +397,37 @@ async def _call_ollama(request: ChatRequest) -> ChatResponse:
) )
except httpx.ConnectError: except httpx.ConnectError:
# Mark endpoint as unhealthy # Mark endpoint as unhealthy
endpoint_health[endpoint].healthy = False if endpoint in endpoint_health:
endpoint_health[endpoint].failure_count += 1 endpoint_health[endpoint].healthy = False
endpoint_health[endpoint].failure_count += 1
raise HTTPException(status_code=503, detail=f"Ollama endpoint unavailable: {endpoint}")
# Try another endpoint if available
other_endpoint = await get_healthy_endpoint()
if other_endpoint and other_endpoint != endpoint:
# Recursive call will use different endpoint
return await _call_ollama(request)
raise HTTPException(status_code=503, detail="All Ollama endpoints unavailable") async def _call_ollama_local(request: ChatRequest) -> ChatResponse:
"""Call local Ollama instance"""
if not OLLAMA_LOCAL_URL:
raise HTTPException(status_code=503, detail="Local Ollama not configured")
return await _call_ollama_endpoint(request, OLLAMA_LOCAL_URL, "ollama-local")
async def _call_ollama_network(request: ChatRequest) -> ChatResponse:
"""Call networked Ollama with load balancing across endpoints"""
if not OLLAMA_NETWORK_URLS:
raise HTTPException(status_code=503, detail="No networked Ollama endpoints configured")
endpoint = await get_healthy_endpoint(OLLAMA_NETWORK_URLS)
if not endpoint:
raise HTTPException(status_code=503, detail="No healthy networked Ollama endpoints available")
try:
return await _call_ollama_endpoint(request, endpoint, "ollama-network")
except HTTPException:
# Try another endpoint if available
other_endpoint = await get_healthy_endpoint(OLLAMA_NETWORK_URLS)
if other_endpoint and other_endpoint != endpoint:
return await _call_ollama_endpoint(request, other_endpoint, "ollama-network")
raise
if __name__ == "__main__": if __name__ == "__main__":