Files
llm-gateway-sub-backup/workspace/services/model_service.py
2025-08-11 18:56:38 +09:00

70 lines
2.2 KiB
Python

import logging
from typing import Dict
import httpx
from config.setting import OLLAMA_URL
from fastapi.responses import JSONResponse
logger = logging.getLogger(__name__)
class ModelInfoService:
OUTER_MODELS = [
"claude-sonnet-4-20250514",
"claude-3-7-sonnet-20250219",
"claude-3-5-haiku-20241022",
"gemini-2.5-pro",
"gemini-2.5-flash",
"gpt-4.1",
"gpt-4o",
]
@staticmethod
async def get_ollama_model_map() -> Dict[str, str]:
model_url_map = {}
for url in OLLAMA_URL:
try:
async with httpx.AsyncClient(timeout=3.0) as client:
tags_url = url.replace("/api/generate", "/api/tags")
res = await client.get(tags_url)
res.raise_for_status()
models = res.json().get("models", [])
for m in models:
model_url_map[m["name"]] = url
except Exception as e:
logger.error(f"[ERROR] {url} 모델 조회 실패: {e}")
return model_url_map
@staticmethod
async def get_model_info() -> JSONResponse:
inner_models = []
for url in OLLAMA_URL:
try:
async with httpx.AsyncClient(timeout=3.0) as client:
tags_url = url.replace("/generate", "/tags")
res = await client.get(tags_url)
res.raise_for_status()
data = res.json()
models = [m["name"] for m in data.get("models", [])]
inner_models.extend(models)
except Exception as e:
logger.error(f"[API-INFO-ERROR] Ollama 모델 조회 실패 ({url}): {e}")
inner_models = list(set(inner_models))
return JSONResponse(
content={
"models": {
"inner_model": {
"default_model": "gpt-oss:20b", # gemma3:27b
"model_list": inner_models,
},
"outer_model": {
"default_model": "gpt-4.1",
"model_list": ModelInfoService.OUTER_MODELS,
},
}
}
)