Files
llm-gateway-sub-backup/workspace/utils/text_generator.py
2025-08-11 18:56:38 +09:00

480 lines
18 KiB
Python

import copy
import json
import logging
from collections import OrderedDict
from typing import Optional
import anthropic
import google.generativeai as genai
import requests
from anthropic._exceptions import BadRequestError, OverloadedError
from fastapi import HTTPException
from google.api_core.exceptions import ResourceExhausted
from openai import OpenAI
from utils.checking_keys import APIKeyLoader
from utils.logging_utils import log_generation_info, log_ollama_stats
from utils.text_formatter import PromptFormatter
logger = logging.getLogger(__name__)
# ✅ 1. Ollama Gen
class OllamaGenerator:
def __init__(
self, model="gemma3:27b", api_url="http://pgn_ollama_gemma:11534/api/generate"
):
self.model = model
self.api_url = api_url
# ✅ 1-1. Gen-General
def generate(
self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"
):
log_generation_info(custom_mode, user_prompt)
prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
# /no_think 자동 부착
if "qwen" in self.model.lower():
prompt += " /no_think"
payload = {"model": self.model, "prompt": prompt, "stream": False}
try:
response = requests.post(self.api_url, json=payload)
response.raise_for_status()
res = response.json()
if "response" not in res:
raise ValueError(
"[GENERATE-OLLAMA-ERROR] LLM 응답에 'response' 키가 없습니다."
)
log_ollama_stats(res)
return res["response"], self.model, self.api_url
# ☑️ GEMINI API 초과 시, exception
except Exception as e:
logger.error(f"[OLLAMA-ERROR] 서버 연결 실패: {e}")
raise HTTPException(
status_code=500,
detail="Ollama 서빙 서버에 연결할 수 없습니다.\n서버가 실행 중인지 확인하세요.",
)
# ✅ 1-2. Gen-Structure
def structured_generate(
self,
text,
user_prompt=None,
custom_mode=False,
schema_override: Optional[dict] = None,
):
log_generation_info(custom_mode, user_prompt)
prompt = PromptFormatter.format(text, user_prompt, custom_mode)
payload = {
"model": self.model,
"prompt": prompt,
"format": schema_override,
"stream": False,
}
response = requests.post(self.api_url, json=payload)
response.raise_for_status()
res = response.json()
if "response" not in res:
raise ValueError(
"[GENERATE-OLLAMA-ERROR] LLM 응답에 'response' 키가 없습니다."
)
# ✅ 추론 통계 정보 로그 추가
log_ollama_stats(res)
# ✅ 클래스 검증 제거 → JSON 파싱만 수행
try:
structured = json.loads(res["response"])
return structured, self.model, self.api_url
except json.JSONDecodeError as e:
logger.error(f"[PARSE-ERROR] LLM 응답이 JSON으로 파싱되지 않음: {e}")
raise ValueError("LLM 응답이 JSON 형식이 아닙니다.")
# ✅ 2. Gemini Gen
class GeminiGenerator:
def __init__(self, model="gemini-2.5-pro-exp-03-25"):
self.api_key = APIKeyLoader.load_gemini_key()
genai.configure(api_key=self.api_key)
self.model = genai.GenerativeModel(model)
def clean_schema_for_gemini(self, schema: dict) -> dict:
# Gemini는 title 등 일부 필드를 허용하지 않음
cleaned = dict(schema) # shallow copy
cleaned.pop("title", None)
cleaned.pop("$schema", None)
# 기타 필요 시 추가 제거
return cleaned
# ✅ 2-1. Gen-General
def generate(
self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"
):
log_generation_info(custom_mode, user_prompt)
prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
try:
response = self.model.generate_content(prompt)
if not response.text:
raise ValueError(
"[GENERATE-GEMINI-ERROR] LLM 응답에 'response' 가 없습니다."
)
return (
response.text,
self.model.model_name.split("/")[-1],
"google.generativeai SDK",
)
# ☑️ GEMINI API 초과 시, exception
except ResourceExhausted as e:
logger.error(f"[GEMINI-ERROR] 할당량 초과: {e}")
raise HTTPException(
status_code=500,
detail="Gemini 모델의 일일 API 사용량이 초과되었습니다.\n'claude-3-7-sonnet-20250219' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
)
# ✅ 2-2. Gen-Structure
def structured_generate(
self,
text,
user_prompt=None,
custom_mode=False,
schema_override: Optional[dict] = None,
):
log_generation_info(custom_mode, user_prompt)
prompt = PromptFormatter.format(text, user_prompt, custom_mode)
response_schema = (
self.clean_schema_for_gemini(schema_override) if schema_override else None
)
try:
response = self.model.generate_content(
contents=prompt,
generation_config=genai.GenerationConfig(
response_mime_type="application/json",
response_schema=response_schema,
),
)
if not response.text:
raise ValueError(
"❌ Gemini 응답에서 구조화된 데이터를 파싱하지 못했습니다."
)
parsed = json.loads(response.text)
if isinstance(parsed, list) and isinstance(parsed[0], dict):
structured = parsed[0]
elif isinstance(parsed, dict):
structured = parsed
elif isinstance(parsed, list) and isinstance(parsed[0], str):
structured = json.loads(parsed[0])
else:
raise ValueError("❌ 응답 형식이 예상과 다릅니다.")
# ✅ 필드 순서 정렬
if schema_override and "properties" in schema_override:
ordered_keys = list(schema_override["properties"].keys())
structured = OrderedDict(
(key, structured.get(key)) for key in ordered_keys
)
return (
structured,
self.model.model_name.split("/")[-1],
"google.generativeai SDK",
)
# ☑️ GEMINI API 초과 시, exception
except ResourceExhausted as e:
logger.error(f"[GEMINI-STRUCTURED-ERROR] 할당량 초과: {e}")
raise HTTPException(
status_code=500,
detail="'Gemini' 모델의 일일 API 사용량이 초과되었습니다.\n'claude-3-7-sonnet-20250219' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
)
except json.JSONDecodeError as e:
logger.error(f"[GEMINI-STRUCTURED-PARSE-ERROR] 응답 JSON 파싱 실패: {e}")
raise ValueError("Gemini 응답이 JSON 형식이 아닙니다.")
def generate_multimodal(self, images, prompt, schema_override=None):
import io
from PIL import Image
content = [prompt]
for image_bytes in images:
try:
img = Image.open(io.BytesIO(image_bytes))
content.append(img)
except Exception as e:
logger.error(f"[GEMINI-MULTIMODAL-ERROR] 이미지 처리 실패: {e}")
raise HTTPException(
status_code=400, detail=f"이미지 파일을 처리할 수 없습니다: {e}"
)
try:
response = self.model.generate_content(content)
if not response.text:
raise ValueError(
"[GENERATE-GEMINI-ERROR] LLM 응답에 'response' 가 없습니다."
)
return (
response.text,
self.model.model_name.split("/")[-1],
"google.generativeai SDK",
)
except ResourceExhausted as e:
logger.error(f"[GEMINI-MULTIMODAL-ERROR] 할당량 초과: {e}")
raise HTTPException(
status_code=500,
detail="Gemini 모델의 일일 API 사용량이 초과되었습니다.\n'claude-3-7-sonnet-20250219' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
)
except Exception as e:
logger.error(f"[GEMINI-MULTIMODAL-ERROR] Gemini 응답 파싱 실패: {e}")
raise HTTPException(
status_code=500, detail=f"❌ Gemini 응답 생성에 실패했습니다: {e}"
)
# ✅ 3. Cluade Gen
class ClaudeGenerator:
def __init__(self, model="claude-3-7-sonnet-20250219"):
self.api_key = APIKeyLoader.load_claude_key()
self.client = anthropic.Anthropic(api_key=self.api_key)
self.model = model
# ✅ 3-1. Gen-General
def generate(
self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"
):
log_generation_info(custom_mode, user_prompt)
prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
try:
response = self.client.messages.create(
model=self.model,
max_tokens=4096,
messages=[{"role": "user", "content": prompt}],
)
if not response.content[0].text:
raise ValueError(
"[GENERATE-CLAUDE-ERROR] LLM 응답에 'response' 가 없습니다."
)
return response.content[0].text, self.model, "anthropic.Anthropic SDK"
# ☑️ CLAUDE API 초과 시, exception
except (BadRequestError, OverloadedError) as e:
logger.error(f"[CLAUDE-STRUCTURED-ERROR] Claude API 에러 발생: {e}")
raise HTTPException(
status_code=500,
detail="Claude 모델의 일일 API 사용량이 초과되었습니다.\n'gemini-2.5-pro-exp-03-25' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
)
# ✅ 3-2. Gen-Structure
def structured_generate(
self,
text,
user_prompt=None,
custom_mode=False,
schema_override: Optional[dict] = None,
):
log_generation_info(custom_mode, user_prompt)
prompt = PromptFormatter.format(text, user_prompt, custom_mode)
# ✅ Claude는 JSON Schema의 key가 모두 영문이어야 함
if schema_override:
try:
for k in schema_override.get("properties", {}).keys():
if any(ord(ch) > 127 for ch in k):
# 한글 포함 여부 확인
raise HTTPException(
status_code=400,
detail="❌ Claude 모델은 JSON Schema의 필드명이 영어로만 구성되어 있어야 합니다. 필드명을 영문으로 수정해 주세요.",
)
except Exception as e:
raise HTTPException(
status_code=400, detail=f"스키마 처리 중 오류 발생: {str(e)}"
)
tools = [
{
"name": "build_text_analysis_result",
"description": "Extract structured fields from OCR text in document format",
"input_schema": schema_override,
}
]
try:
response = self.client.messages.create(
model=self.model,
max_tokens=4096,
messages=[{"role": "user", "content": prompt}],
tools=tools,
tool_choice={"type": "tool", "name": "build_text_analysis_result"},
)
structured = response.content[0].input
return structured, self.model, "anthropic.Anthropic SDK"
# ☑️ CLAUDE API 초과 시, exception
except (BadRequestError, OverloadedError) as e:
logger.error(f"[CLAUDE-STRUCTURED-ERROR] Claude API 에러 발생: {e}")
raise HTTPException(
status_code=500,
detail="Claude 모델의 일일 API 사용량이 초과되었습니다.\n'gemini-2.5-pro-exp-03-25' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
)
# ✅ 4. GPT Gen
class GptGenerator:
def __init__(self, model="gpt-4o"):
# ✅ OpenAI API Key 로딩 및 유효성 검증
raw = APIKeyLoader.load_gpt_key()
if not raw:
raise RuntimeError("OPENAI_API_KEY가 설정되지 않았습니다.")
self.api_key = raw.strip()
if not self.api_key.startswith(("sk-", "sk-proj-")):
raise RuntimeError("유효하지 않은 OpenAI API Key 형식입니다.")
self.client = OpenAI(api_key=self.api_key)
self.model = model
def enforce_strict_schema(self, schema: dict) -> dict:
strict_schema = copy.deepcopy(schema)
# ✅ required 자동 보완
props = strict_schema.get("properties", {})
existing_required = set(strict_schema.get("required", []))
all_keys = set(props.keys())
# 누락된 필드를 required에 추가
missing_required = all_keys - existing_required
strict_schema["required"] = list(existing_required | missing_required)
# ✅ additionalProperties 보장
if "additionalProperties" not in strict_schema:
strict_schema["additionalProperties"] = False
return strict_schema
# ✅ 4-1. Gen-General
def generate(
self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"
):
log_generation_info(custom_mode, user_prompt)
prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
try:
response = self.client.responses.create(model=self.model, input=prompt)
except Exception as e:
logger.error(f"[GENERATE-GPT-ERROR] OpenAI API 호출 중 예외 발생: {e}")
raise RuntimeError("GPT 생성 요청 중 오류가 발생했습니다.") from e
try:
if (
not response.output
or not response.output[0].content
or not response.output[0].content[0].text
):
raise ValueError("LLM 응답에 'response'가 없습니다.")
except Exception as e:
logger.error(
f"[GENERATE-GPT-ERROR] 응답 파싱 실패: {e} | 원본 응답: {response}"
)
raise RuntimeError("GPT 응답 파싱 중 오류가 발생했습니다.") from e
return response.output[0].content[0].text, self.model, "OpenAI Python SDK"
# ✅ 4-2. Gen-Structure
def structured_generate(
self,
text,
user_prompt=None,
custom_mode=False,
schema_override: Optional[dict] = None,
):
log_generation_info(custom_mode, user_prompt)
prompt = PromptFormatter.format(text, user_prompt, custom_mode)
schema = self.enforce_strict_schema(schema_override) if schema_override else {}
# ✅ Function Calling 방식으로 schema_override 전달
tools = [
{
"type": "function",
"function": {
"name": "build_summary",
"description": "Extract structured document summary from OCR text.",
"parameters": schema,
"strict": True,
},
}
]
try:
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": "You are an assistant that extracts structured document summary from OCR text.",
},
{"role": "user", "content": prompt},
],
tools=tools,
tool_choice={"type": "function", "function": {"name": "build_summary"}},
)
tool_call = response.choices[0].message.tool_calls[0]
arguments_json = tool_call.function.arguments
structured = json.loads(arguments_json)
return structured, self.model, "OpenAI Function Calling"
except Exception as e:
logger.error(f"[GPT-STRUCTURED-ERROR] GPT 응답 파싱 실패: {e}")
raise HTTPException(
status_code=500, detail="❌ GPT 구조화 응답 생성에 실패했습니다."
)
def generate_multimodal(self, images, prompt, schema_override=None):
import base64
content = [{"type": "text", "text": prompt}]
for image_bytes in images:
base64_image = base64.b64encode(image_bytes).decode("utf-8")
content.append(
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{base64_image}"},
}
)
messages = [{"role": "user", "content": content}]
try:
response = self.client.chat.completions.create(
model=self.model, messages=messages, max_tokens=4096
)
generated_text = response.choices[0].message.content
return generated_text, self.model, "OpenAI Python SDK"
except Exception as e:
logger.error(f"[GPT-MULTIMODAL-ERROR] GPT-4o 응답 파싱 실패: {e}")
raise HTTPException(
status_code=500, detail="❌ GPT-4o 응답 생성에 실패했습니다."
)