mirror of
https://github.com/mblanke/StrikePackageGPT.git
synced 2026-03-01 06:10:21 -05:00
feat: Separate Local and Networked Ollama providers
- Add distinct 'Local Ollama' and 'Networked Ollama' options in dropdown - Local uses localhost, Networked uses remote endpoints with load balancing - Color-coded: Yellow for Local, Blue for Networked - Icons: Local, Networked - Backend supports OLLAMA_LOCAL_URL and OLLAMA_NETWORK_URLS env vars - Updated installer to generate new env var format - Legacy 'ollama' provider still works for backward compatibility
This commit is contained in:
@@ -425,24 +425,24 @@ if (-not (Test-Path $configDir)) {
|
|||||||
New-Item -ItemType Directory -Path $configDir -Force | Out-Null
|
New-Item -ItemType Directory -Path $configDir -Force | Out-Null
|
||||||
}
|
}
|
||||||
|
|
||||||
# Build OLLAMA_ENDPOINTS string
|
# Build OLLAMA_LOCAL_URL and OLLAMA_NETWORK_URLS strings
|
||||||
$ollamaEndpoints = @()
|
$ollamaLocalUrl = ""
|
||||||
|
$ollamaNetworkUrls = @()
|
||||||
|
|
||||||
if ($config.local.enabled) {
|
if ($config.local.enabled) {
|
||||||
$ollamaEndpoints += $config.local.url
|
$ollamaLocalUrl = $config.local.url
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($ep in $config.networked.endpoints) {
|
foreach ($ep in $config.networked.endpoints) {
|
||||||
if ($ep.enabled) {
|
if ($ep.enabled) {
|
||||||
if ($ep.prefer_high_speed -and $ep.alt_url) {
|
if ($ep.prefer_high_speed -and $ep.alt_url) {
|
||||||
$ollamaEndpoints += $ep.alt_url
|
$ollamaNetworkUrls += $ep.alt_url
|
||||||
} else {
|
} else {
|
||||||
$ollamaEndpoints += $ep.url
|
$ollamaNetworkUrls += $ep.url
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$ollamaEndpointsStr = $ollamaEndpoints -join ","
|
$ollamaNetworkUrlsStr = $ollamaNetworkUrls -join ","
|
||||||
if ([string]::IsNullOrEmpty($ollamaEndpointsStr)) {
|
|
||||||
$ollamaEndpointsStr = "http://localhost:11434"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Generate .env file
|
# Generate .env file
|
||||||
Write-Step "Generating .env file..."
|
Write-Step "Generating .env file..."
|
||||||
@@ -454,8 +454,11 @@ $envLines = @(
|
|||||||
"# Generated by installer on $timestamp",
|
"# Generated by installer on $timestamp",
|
||||||
"# ======================================================================",
|
"# ======================================================================",
|
||||||
"",
|
"",
|
||||||
"# Ollama Endpoints (comma-separated for load balancing)",
|
"# Local Ollama (on this machine)",
|
||||||
"OLLAMA_ENDPOINTS=$ollamaEndpointsStr",
|
"OLLAMA_LOCAL_URL=$ollamaLocalUrl",
|
||||||
|
"",
|
||||||
|
"# Networked Ollama (comma-separated for load balancing)",
|
||||||
|
"OLLAMA_NETWORK_URLS=$ollamaNetworkUrlsStr",
|
||||||
"",
|
"",
|
||||||
"# Load Balancing Strategy: round-robin, failover, random",
|
"# Load Balancing Strategy: round-robin, failover, random",
|
||||||
"LOAD_BALANCE_STRATEGY=$($config.load_balancing)",
|
"LOAD_BALANCE_STRATEGY=$($config.load_balancing)",
|
||||||
|
|||||||
@@ -106,7 +106,8 @@
|
|||||||
</div>
|
</div>
|
||||||
<select x-model="selectedProvider" @change="updateModels()"
|
<select x-model="selectedProvider" @change="updateModels()"
|
||||||
class="bg-sp-grey border border-sp-grey-mid rounded px-3 py-1.5 text-sm text-sp-white focus:border-sp-red focus:outline-none">
|
class="bg-sp-grey border border-sp-grey-mid rounded px-3 py-1.5 text-sm text-sp-white focus:border-sp-red focus:outline-none">
|
||||||
<option value="ollama">🦙 Ollama</option>
|
<option value="ollama-local">🦙 Local Ollama</option>
|
||||||
|
<option value="ollama-network">🌐 Networked Ollama</option>
|
||||||
<option value="openai">🤖 OpenAI</option>
|
<option value="openai">🤖 OpenAI</option>
|
||||||
<option value="anthropic">🧠 Anthropic</option>
|
<option value="anthropic">🧠 Anthropic</option>
|
||||||
</select>
|
</select>
|
||||||
@@ -738,7 +739,7 @@
|
|||||||
userInput: '',
|
userInput: '',
|
||||||
isLoading: false,
|
isLoading: false,
|
||||||
services: {},
|
services: {},
|
||||||
selectedProvider: 'ollama',
|
selectedProvider: 'ollama-local',
|
||||||
selectedModel: 'llama3.2',
|
selectedModel: 'llama3.2',
|
||||||
availableModels: ['llama3.2', 'codellama', 'mistral'],
|
availableModels: ['llama3.2', 'codellama', 'mistral'],
|
||||||
providers: {},
|
providers: {},
|
||||||
@@ -1158,7 +1159,8 @@ Select a phase above to begin, or use the quick actions in the sidebar!`
|
|||||||
// AI Provider display helpers
|
// AI Provider display helpers
|
||||||
getProviderIcon(provider) {
|
getProviderIcon(provider) {
|
||||||
const icons = {
|
const icons = {
|
||||||
ollama: '🦙',
|
'ollama-local': '🦙',
|
||||||
|
'ollama-network': '🌐',
|
||||||
openai: '🤖',
|
openai: '🤖',
|
||||||
anthropic: '🧠'
|
anthropic: '🧠'
|
||||||
};
|
};
|
||||||
@@ -1167,7 +1169,8 @@ Select a phase above to begin, or use the quick actions in the sidebar!`
|
|||||||
|
|
||||||
getProviderName(provider) {
|
getProviderName(provider) {
|
||||||
const names = {
|
const names = {
|
||||||
ollama: 'Ollama',
|
'ollama-local': 'Local',
|
||||||
|
'ollama-network': 'Networked',
|
||||||
openai: 'OpenAI',
|
openai: 'OpenAI',
|
||||||
anthropic: 'Anthropic'
|
anthropic: 'Anthropic'
|
||||||
};
|
};
|
||||||
@@ -1176,7 +1179,8 @@ Select a phase above to begin, or use the quick actions in the sidebar!`
|
|||||||
|
|
||||||
getProviderStyle(provider) {
|
getProviderStyle(provider) {
|
||||||
const styles = {
|
const styles = {
|
||||||
ollama: 'bg-yellow-500/20 border-yellow-500/50 text-yellow-400',
|
'ollama-local': 'bg-yellow-500/20 border-yellow-500/50 text-yellow-400',
|
||||||
|
'ollama-network': 'bg-blue-500/20 border-blue-500/50 text-blue-400',
|
||||||
openai: 'bg-green-500/20 border-green-500/50 text-green-400',
|
openai: 'bg-green-500/20 border-green-500/50 text-green-400',
|
||||||
anthropic: 'bg-orange-500/20 border-orange-500/50 text-orange-400'
|
anthropic: 'bg-orange-500/20 border-orange-500/50 text-orange-400'
|
||||||
};
|
};
|
||||||
@@ -1185,7 +1189,8 @@ Select a phase above to begin, or use the quick actions in the sidebar!`
|
|||||||
|
|
||||||
getProviderBadgeStyle(provider) {
|
getProviderBadgeStyle(provider) {
|
||||||
const styles = {
|
const styles = {
|
||||||
ollama: 'bg-yellow-500/20 text-yellow-400 border border-yellow-500/30',
|
'ollama-local': 'bg-yellow-500/20 text-yellow-400 border border-yellow-500/30',
|
||||||
|
'ollama-network': 'bg-blue-500/20 text-blue-400 border border-blue-500/30',
|
||||||
openai: 'bg-green-500/20 text-green-400 border border-green-500/30',
|
openai: 'bg-green-500/20 text-green-400 border border-green-500/30',
|
||||||
anthropic: 'bg-orange-500/20 text-orange-400 border border-orange-500/30'
|
anthropic: 'bg-orange-500/20 text-orange-400 border border-orange-500/30'
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -31,9 +31,13 @@ app.add_middleware(
|
|||||||
# Configuration from environment
|
# Configuration from environment
|
||||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||||
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
||||||
# Support multiple Ollama endpoints (comma-separated)
|
|
||||||
OLLAMA_ENDPOINTS_STR = os.getenv("OLLAMA_ENDPOINTS", os.getenv("OLLAMA_BASE_URL", "http://192.168.1.50:11434"))
|
# Separate local and networked Ollama endpoints
|
||||||
OLLAMA_ENDPOINTS = [url.strip() for url in OLLAMA_ENDPOINTS_STR.split(",") if url.strip()]
|
OLLAMA_LOCAL_URL = os.getenv("OLLAMA_LOCAL_URL", "http://localhost:11434")
|
||||||
|
OLLAMA_NETWORK_URLS_STR = os.getenv("OLLAMA_NETWORK_URLS", os.getenv("OLLAMA_ENDPOINTS", os.getenv("OLLAMA_BASE_URL", "")))
|
||||||
|
OLLAMA_NETWORK_URLS = [url.strip() for url in OLLAMA_NETWORK_URLS_STR.split(",") if url.strip()]
|
||||||
|
|
||||||
|
# Legacy support: if only OLLAMA_ENDPOINTS is set, use it for network
|
||||||
LOAD_BALANCE_STRATEGY = os.getenv("LOAD_BALANCE_STRATEGY", "round-robin") # round-robin, random, failover
|
LOAD_BALANCE_STRATEGY = os.getenv("LOAD_BALANCE_STRATEGY", "round-robin") # round-robin, random, failover
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -44,9 +48,10 @@ class EndpointHealth:
|
|||||||
failure_count: int = 0
|
failure_count: int = 0
|
||||||
models: list = None
|
models: list = None
|
||||||
|
|
||||||
# Track endpoint health
|
# Track endpoint health for both local and network
|
||||||
endpoint_health: dict[str, EndpointHealth] = {url: EndpointHealth(url=url, models=[]) for url in OLLAMA_ENDPOINTS}
|
all_ollama_endpoints = [OLLAMA_LOCAL_URL] + OLLAMA_NETWORK_URLS if OLLAMA_LOCAL_URL else OLLAMA_NETWORK_URLS
|
||||||
current_endpoint_index = 0
|
endpoint_health: dict[str, EndpointHealth] = {url: EndpointHealth(url=url, models=[]) for url in all_ollama_endpoints}
|
||||||
|
current_network_endpoint_index = 0
|
||||||
|
|
||||||
|
|
||||||
class ChatMessage(BaseModel):
|
class ChatMessage(BaseModel):
|
||||||
@@ -55,7 +60,7 @@ class ChatMessage(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class ChatRequest(BaseModel):
|
class ChatRequest(BaseModel):
|
||||||
provider: Literal["openai", "anthropic", "ollama"] = "ollama"
|
provider: Literal["openai", "anthropic", "ollama", "ollama-local", "ollama-network"] = "ollama-local"
|
||||||
model: str = "llama3.2"
|
model: str = "llama3.2"
|
||||||
messages: list[ChatMessage]
|
messages: list[ChatMessage]
|
||||||
temperature: float = 0.7
|
temperature: float = 0.7
|
||||||
@@ -72,7 +77,12 @@ class ChatResponse(BaseModel):
|
|||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health_check():
|
async def health_check():
|
||||||
"""Health check endpoint"""
|
"""Health check endpoint"""
|
||||||
return {"status": "healthy", "service": "llm-router", "endpoints": len(OLLAMA_ENDPOINTS)}
|
return {
|
||||||
|
"status": "healthy",
|
||||||
|
"service": "llm-router",
|
||||||
|
"local_endpoint": OLLAMA_LOCAL_URL,
|
||||||
|
"network_endpoints": len(OLLAMA_NETWORK_URLS)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async def check_endpoint_health(url: str) -> tuple[bool, list]:
|
async def check_endpoint_health(url: str) -> tuple[bool, list]:
|
||||||
@@ -89,13 +99,19 @@ async def check_endpoint_health(url: str) -> tuple[bool, list]:
|
|||||||
return False, []
|
return False, []
|
||||||
|
|
||||||
|
|
||||||
async def get_healthy_endpoint() -> Optional[str]:
|
async def get_healthy_endpoint(endpoints: list[str]) -> Optional[str]:
|
||||||
"""Get a healthy Ollama endpoint based on load balancing strategy"""
|
"""Get a healthy Ollama endpoint from the given list based on load balancing strategy"""
|
||||||
global current_endpoint_index
|
global current_network_endpoint_index
|
||||||
|
|
||||||
|
if not endpoints:
|
||||||
|
return None
|
||||||
|
|
||||||
# Refresh health status for stale checks (older than 30 seconds)
|
# Refresh health status for stale checks (older than 30 seconds)
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
for url, health in endpoint_health.items():
|
for url in endpoints:
|
||||||
|
if url not in endpoint_health:
|
||||||
|
endpoint_health[url] = EndpointHealth(url=url, models=[])
|
||||||
|
health = endpoint_health[url]
|
||||||
if health.last_check is None or (now - health.last_check) > timedelta(seconds=30):
|
if health.last_check is None or (now - health.last_check) > timedelta(seconds=30):
|
||||||
is_healthy, models = await check_endpoint_health(url)
|
is_healthy, models = await check_endpoint_health(url)
|
||||||
health.healthy = is_healthy
|
health.healthy = is_healthy
|
||||||
@@ -104,7 +120,7 @@ async def get_healthy_endpoint() -> Optional[str]:
|
|||||||
if is_healthy:
|
if is_healthy:
|
||||||
health.failure_count = 0
|
health.failure_count = 0
|
||||||
|
|
||||||
healthy_endpoints = [url for url, h in endpoint_health.items() if h.healthy]
|
healthy_endpoints = [url for url in endpoints if endpoint_health.get(url, EndpointHealth(url=url)).healthy]
|
||||||
|
|
||||||
if not healthy_endpoints:
|
if not healthy_endpoints:
|
||||||
return None
|
return None
|
||||||
@@ -116,9 +132,9 @@ async def get_healthy_endpoint() -> Optional[str]:
|
|||||||
return healthy_endpoints[0]
|
return healthy_endpoints[0]
|
||||||
else: # round-robin (default)
|
else: # round-robin (default)
|
||||||
# Find next healthy endpoint in rotation
|
# Find next healthy endpoint in rotation
|
||||||
for _ in range(len(OLLAMA_ENDPOINTS)):
|
for _ in range(len(endpoints)):
|
||||||
current_endpoint_index = (current_endpoint_index + 1) % len(OLLAMA_ENDPOINTS)
|
current_network_endpoint_index = (current_network_endpoint_index + 1) % len(endpoints)
|
||||||
url = OLLAMA_ENDPOINTS[current_endpoint_index]
|
url = endpoints[current_network_endpoint_index]
|
||||||
if url in healthy_endpoints:
|
if url in healthy_endpoints:
|
||||||
return url
|
return url
|
||||||
return healthy_endpoints[0]
|
return healthy_endpoints[0]
|
||||||
@@ -127,61 +143,105 @@ async def get_healthy_endpoint() -> Optional[str]:
|
|||||||
@app.get("/providers")
|
@app.get("/providers")
|
||||||
async def list_providers():
|
async def list_providers():
|
||||||
"""List available LLM providers and their status"""
|
"""List available LLM providers and their status"""
|
||||||
# Check all Ollama endpoints
|
providers = {
|
||||||
ollama_info = []
|
"openai": {"available": bool(OPENAI_API_KEY), "models": ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"]},
|
||||||
all_models = set()
|
"anthropic": {"available": bool(ANTHROPIC_API_KEY), "models": ["claude-sonnet-4-20250514", "claude-3-5-haiku-20241022"]},
|
||||||
any_available = False
|
}
|
||||||
|
|
||||||
for url in OLLAMA_ENDPOINTS:
|
# Check local Ollama endpoint
|
||||||
|
if OLLAMA_LOCAL_URL:
|
||||||
|
is_healthy, models = await check_endpoint_health(OLLAMA_LOCAL_URL)
|
||||||
|
endpoint_health[OLLAMA_LOCAL_URL] = EndpointHealth(
|
||||||
|
url=OLLAMA_LOCAL_URL,
|
||||||
|
healthy=is_healthy,
|
||||||
|
models=models,
|
||||||
|
last_check=datetime.now()
|
||||||
|
)
|
||||||
|
providers["ollama-local"] = {
|
||||||
|
"available": is_healthy,
|
||||||
|
"endpoint": OLLAMA_LOCAL_URL,
|
||||||
|
"models": models if models else ["llama3", "mistral", "codellama"]
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
providers["ollama-local"] = {"available": False, "models": []}
|
||||||
|
|
||||||
|
# Check networked Ollama endpoints
|
||||||
|
network_info = []
|
||||||
|
network_models = set()
|
||||||
|
any_network_available = False
|
||||||
|
|
||||||
|
for url in OLLAMA_NETWORK_URLS:
|
||||||
is_healthy, models = await check_endpoint_health(url)
|
is_healthy, models = await check_endpoint_health(url)
|
||||||
endpoint_health[url].healthy = is_healthy
|
endpoint_health[url] = EndpointHealth(
|
||||||
endpoint_health[url].models = models
|
url=url,
|
||||||
endpoint_health[url].last_check = datetime.now()
|
healthy=is_healthy,
|
||||||
|
models=models,
|
||||||
ollama_info.append({
|
last_check=datetime.now()
|
||||||
|
)
|
||||||
|
network_info.append({
|
||||||
"url": url,
|
"url": url,
|
||||||
"available": is_healthy,
|
"available": is_healthy,
|
||||||
"models": models
|
"models": models
|
||||||
})
|
})
|
||||||
if is_healthy:
|
if is_healthy:
|
||||||
any_available = True
|
any_network_available = True
|
||||||
all_models.update(models)
|
network_models.update(models)
|
||||||
|
|
||||||
providers = {
|
providers["ollama-network"] = {
|
||||||
"openai": {"available": bool(OPENAI_API_KEY), "models": ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"]},
|
"available": any_network_available,
|
||||||
"anthropic": {"available": bool(ANTHROPIC_API_KEY), "models": ["claude-sonnet-4-20250514", "claude-3-5-haiku-20241022"]},
|
"endpoints": network_info,
|
||||||
"ollama": {
|
"load_balance_strategy": LOAD_BALANCE_STRATEGY,
|
||||||
"available": any_available,
|
"models": list(network_models) if network_models else ["llama3", "mistral", "codellama"]
|
||||||
"endpoints": ollama_info,
|
|
||||||
"load_balance_strategy": LOAD_BALANCE_STRATEGY,
|
|
||||||
"models": list(all_models) if all_models else ["llama3", "mistral", "codellama"]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Legacy: also provide combined "ollama" for backward compatibility
|
||||||
|
all_ollama_models = set()
|
||||||
|
if providers["ollama-local"]["available"]:
|
||||||
|
all_ollama_models.update(providers["ollama-local"]["models"])
|
||||||
|
if providers["ollama-network"]["available"]:
|
||||||
|
all_ollama_models.update(providers["ollama-network"]["models"])
|
||||||
|
|
||||||
|
providers["ollama"] = {
|
||||||
|
"available": providers["ollama-local"]["available"] or providers["ollama-network"]["available"],
|
||||||
|
"models": list(all_ollama_models) if all_ollama_models else ["llama3", "mistral", "codellama"]
|
||||||
|
}
|
||||||
|
|
||||||
return providers
|
return providers
|
||||||
|
|
||||||
|
|
||||||
@app.get("/endpoints")
|
@app.get("/endpoints")
|
||||||
async def list_endpoints():
|
async def list_endpoints():
|
||||||
"""List all Ollama endpoints with detailed status"""
|
"""List all Ollama endpoints with detailed status"""
|
||||||
results = []
|
results = {
|
||||||
for url in OLLAMA_ENDPOINTS:
|
"local": None,
|
||||||
is_healthy, models = await check_endpoint_health(url)
|
"network": []
|
||||||
endpoint_health[url].healthy = is_healthy
|
}
|
||||||
endpoint_health[url].models = models
|
|
||||||
endpoint_health[url].last_check = datetime.now()
|
|
||||||
|
|
||||||
results.append({
|
# Local endpoint
|
||||||
|
if OLLAMA_LOCAL_URL:
|
||||||
|
is_healthy, models = await check_endpoint_health(OLLAMA_LOCAL_URL)
|
||||||
|
results["local"] = {
|
||||||
|
"url": OLLAMA_LOCAL_URL,
|
||||||
|
"healthy": is_healthy,
|
||||||
|
"models": models,
|
||||||
|
"failure_count": endpoint_health.get(OLLAMA_LOCAL_URL, EndpointHealth(url=OLLAMA_LOCAL_URL)).failure_count
|
||||||
|
}
|
||||||
|
|
||||||
|
# Network endpoints
|
||||||
|
for url in OLLAMA_NETWORK_URLS:
|
||||||
|
is_healthy, models = await check_endpoint_health(url)
|
||||||
|
results["network"].append({
|
||||||
"url": url,
|
"url": url,
|
||||||
"healthy": is_healthy,
|
"healthy": is_healthy,
|
||||||
"models": models,
|
"models": models,
|
||||||
"failure_count": endpoint_health[url].failure_count
|
"failure_count": endpoint_health.get(url, EndpointHealth(url=url)).failure_count
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"strategy": LOAD_BALANCE_STRATEGY,
|
"strategy": LOAD_BALANCE_STRATEGY,
|
||||||
"endpoints": results,
|
"endpoints": results,
|
||||||
"healthy_count": sum(1 for r in results if r["healthy"]),
|
"network_healthy_count": sum(1 for r in results["network"] if r["healthy"]),
|
||||||
"total_count": len(results)
|
"network_total_count": len(results["network"])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -193,8 +253,23 @@ async def chat(request: ChatRequest):
|
|||||||
return await _call_openai(request)
|
return await _call_openai(request)
|
||||||
elif request.provider == "anthropic":
|
elif request.provider == "anthropic":
|
||||||
return await _call_anthropic(request)
|
return await _call_anthropic(request)
|
||||||
|
elif request.provider == "ollama-local":
|
||||||
|
return await _call_ollama_local(request)
|
||||||
|
elif request.provider == "ollama-network":
|
||||||
|
return await _call_ollama_network(request)
|
||||||
elif request.provider == "ollama":
|
elif request.provider == "ollama":
|
||||||
return await _call_ollama(request)
|
# Legacy: try local first, then network
|
||||||
|
if OLLAMA_LOCAL_URL:
|
||||||
|
try:
|
||||||
|
return await _call_ollama_local(request)
|
||||||
|
except HTTPException:
|
||||||
|
if OLLAMA_NETWORK_URLS:
|
||||||
|
return await _call_ollama_network(request)
|
||||||
|
raise
|
||||||
|
elif OLLAMA_NETWORK_URLS:
|
||||||
|
return await _call_ollama_network(request)
|
||||||
|
else:
|
||||||
|
raise HTTPException(status_code=503, detail="No Ollama endpoints configured")
|
||||||
else:
|
else:
|
||||||
raise HTTPException(status_code=400, detail=f"Unknown provider: {request.provider}")
|
raise HTTPException(status_code=400, detail=f"Unknown provider: {request.provider}")
|
||||||
|
|
||||||
@@ -279,13 +354,8 @@ async def _call_anthropic(request: ChatRequest) -> ChatResponse:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _call_ollama(request: ChatRequest) -> ChatResponse:
|
async def _call_ollama_endpoint(request: ChatRequest, endpoint: str, provider_label: str) -> ChatResponse:
|
||||||
"""Call Ollama API with load balancing across endpoints"""
|
"""Call a specific Ollama endpoint"""
|
||||||
endpoint = await get_healthy_endpoint()
|
|
||||||
|
|
||||||
if not endpoint:
|
|
||||||
raise HTTPException(status_code=503, detail="No healthy Ollama endpoints available")
|
|
||||||
|
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
try:
|
try:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
@@ -304,17 +374,19 @@ async def _call_ollama(request: ChatRequest) -> ChatResponse:
|
|||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
# Mark endpoint as failed
|
# Mark endpoint as failed
|
||||||
endpoint_health[endpoint].failure_count += 1
|
if endpoint in endpoint_health:
|
||||||
if endpoint_health[endpoint].failure_count >= 3:
|
endpoint_health[endpoint].failure_count += 1
|
||||||
endpoint_health[endpoint].healthy = False
|
if endpoint_health[endpoint].failure_count >= 3:
|
||||||
|
endpoint_health[endpoint].healthy = False
|
||||||
raise HTTPException(status_code=response.status_code, detail=response.text)
|
raise HTTPException(status_code=response.status_code, detail=response.text)
|
||||||
|
|
||||||
# Reset failure count on success
|
# Reset failure count on success
|
||||||
endpoint_health[endpoint].failure_count = 0
|
if endpoint in endpoint_health:
|
||||||
|
endpoint_health[endpoint].failure_count = 0
|
||||||
|
|
||||||
data = response.json()
|
data = response.json()
|
||||||
return ChatResponse(
|
return ChatResponse(
|
||||||
provider="ollama",
|
provider=provider_label,
|
||||||
model=request.model,
|
model=request.model,
|
||||||
content=data["message"]["content"],
|
content=data["message"]["content"],
|
||||||
usage={
|
usage={
|
||||||
@@ -325,16 +397,37 @@ async def _call_ollama(request: ChatRequest) -> ChatResponse:
|
|||||||
)
|
)
|
||||||
except httpx.ConnectError:
|
except httpx.ConnectError:
|
||||||
# Mark endpoint as unhealthy
|
# Mark endpoint as unhealthy
|
||||||
endpoint_health[endpoint].healthy = False
|
if endpoint in endpoint_health:
|
||||||
endpoint_health[endpoint].failure_count += 1
|
endpoint_health[endpoint].healthy = False
|
||||||
|
endpoint_health[endpoint].failure_count += 1
|
||||||
|
raise HTTPException(status_code=503, detail=f"Ollama endpoint unavailable: {endpoint}")
|
||||||
|
|
||||||
# Try another endpoint if available
|
|
||||||
other_endpoint = await get_healthy_endpoint()
|
|
||||||
if other_endpoint and other_endpoint != endpoint:
|
|
||||||
# Recursive call will use different endpoint
|
|
||||||
return await _call_ollama(request)
|
|
||||||
|
|
||||||
raise HTTPException(status_code=503, detail="All Ollama endpoints unavailable")
|
async def _call_ollama_local(request: ChatRequest) -> ChatResponse:
|
||||||
|
"""Call local Ollama instance"""
|
||||||
|
if not OLLAMA_LOCAL_URL:
|
||||||
|
raise HTTPException(status_code=503, detail="Local Ollama not configured")
|
||||||
|
return await _call_ollama_endpoint(request, OLLAMA_LOCAL_URL, "ollama-local")
|
||||||
|
|
||||||
|
|
||||||
|
async def _call_ollama_network(request: ChatRequest) -> ChatResponse:
|
||||||
|
"""Call networked Ollama with load balancing across endpoints"""
|
||||||
|
if not OLLAMA_NETWORK_URLS:
|
||||||
|
raise HTTPException(status_code=503, detail="No networked Ollama endpoints configured")
|
||||||
|
|
||||||
|
endpoint = await get_healthy_endpoint(OLLAMA_NETWORK_URLS)
|
||||||
|
|
||||||
|
if not endpoint:
|
||||||
|
raise HTTPException(status_code=503, detail="No healthy networked Ollama endpoints available")
|
||||||
|
|
||||||
|
try:
|
||||||
|
return await _call_ollama_endpoint(request, endpoint, "ollama-network")
|
||||||
|
except HTTPException:
|
||||||
|
# Try another endpoint if available
|
||||||
|
other_endpoint = await get_healthy_endpoint(OLLAMA_NETWORK_URLS)
|
||||||
|
if other_endpoint and other_endpoint != endpoint:
|
||||||
|
return await _call_ollama_endpoint(request, other_endpoint, "ollama-network")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user