test/handlers/briefing/processor.py

# -*- coding: utf-8 -*-
"""
기획서(briefing) 처리 로직
- 1~2페이지 압축형 보고서
- Navy 양식
"""

import os
import json
from pathlib import Path
from flask import jsonify, session

from handlers.common import call_claude, extract_json, extract_html, load_prompt, client


class BriefingProcessor:
    """기획서 처리 클래스"""

    def __init__(self):
        self.prompts_dir = Path(__file__).parent / 'prompts'

    def _load_prompt(self, filename: str) -> str:
        """프롬프트 로드"""
        return load_prompt(str(self.prompts_dir), filename)

    def _get_step1_prompt(self) -> str:
        """1단계: 구조 추출 프롬프트"""
        prompt = self._load_prompt('step1_extract.txt')
        if prompt:
            return prompt
        return """HTML 문서를 분석하여 JSON 구조로 추출하세요.
원본 텍스트를 그대로 보존하고, 구조만 정확히 파악하세요."""

    def _get_step1_5_prompt(self) -> str:
        """1.5단계: 배치 계획 프롬프트"""
        prompt = self._load_prompt('step1_5_plan.txt')
        if prompt:
            return prompt
        return """JSON 구조를 분석하여 페이지 배치 계획을 수립하세요."""

    def _get_step2_prompt(self) -> str:
        """2단계: HTML 생성 프롬프트"""
        prompt = self._load_prompt('step2_generate.txt')
        if prompt:
            return prompt
        return """JSON 구조를 각인된 양식의 HTML로 변환하세요.
Navy 색상 테마, A4 크기, Noto Sans KR 폰트를 사용하세요."""

    def _content_too_long(self, html: str, max_sections_per_page: int = 4) -> bool:
        """페이지당 콘텐츠 양 체크"""
        from bs4 import BeautifulSoup
        soup = BeautifulSoup(html, 'html.parser')

        sheets = soup.find_all('div', class_='sheet')
        for sheet in sheets:
            sections = sheet.find_all('div', class_='section')
            if len(sections) > max_sections_per_page:
                return True

            all_li = sheet.find_all('li')
            if len(all_li) > 12:
                return True

            steps = sheet.find_all('div', class_='process-step')
            if len(steps) > 6:
                return True

        return False

    def generate(self, content: str, options: dict) -> dict:
        """기획서 생성"""
        try:
            if not content.strip():
                return {'error': '내용을 입력하거나 파일을 업로드해주세요.'}

            page_option = options.get('page_option', '1')
            department = options.get('department', '총괄기획실')
            additional_prompt = options.get('instruction', '')

            # ============== 1단계: 구조 추출 ==============
            step1_prompt = self._get_step1_prompt()
            step1_message = f"""다음 HTML 문서의 구조를 분석하여 JSON으로 추출해주세요.

## 원본 HTML
{content}

---
위 문서를 분석하여 JSON 구조로 출력하세요. 설명 없이 JSON만 출력."""

            step1_response = call_claude(step1_prompt, step1_message, max_tokens=4000)
            structure_json = extract_json(step1_response)

            if not structure_json:
                structure_json = {"raw_content": content, "parse_failed": True}

            # ============== 1.5단계: 배치 계획 ==============
            step1_5_prompt = self._get_step1_5_prompt()
            step1_5_message = f"""다음 JSON 구조를 분석하여 페이지 배치 계획을 수립해주세요.

## 문서 구조 (JSON)
{json.dumps(structure_json, ensure_ascii=False, indent=2)}

## 페이지 수
{page_option}페이지

---
배치 계획 JSON만 출력하세요. 설명 없이 JSON만."""

            step1_5_response = call_claude(step1_5_prompt, step1_5_message, max_tokens=4000)
            page_plan = extract_json(step1_5_response)

            if not page_plan:
                page_plan = {"page_plan": {}, "parse_failed": True}

            # ============== 2단계: HTML 생성 ==============
            page_instructions = {
                '1': '1페이지로 핵심 내용만 압축하여 작성하세요.',
                '2': '2페이지로 작성하세요. 1페이지는 본문, 2페이지는 [첨부]입니다.',
                'n': '여러 페이지로 작성하세요. 1페이지는 본문, 나머지는 [첨부] 형태로 분할합니다.'
            }

            step2_prompt = self._get_step2_prompt()
            step2_message = f"""다음 배치 계획과 문서 구조를 기반으로 각인된 양식의 HTML 보고서를 생성해주세요.

## 배치 계획
{json.dumps(page_plan, ensure_ascii=False, indent=2)}

## 문서 구조 (JSON)
{json.dumps(structure_json, ensure_ascii=False, indent=2)}

## 페이지 옵션
{page_instructions.get(page_option, page_instructions['1'])}

## 부서명
{department}

## 추가 요청사항
{additional_prompt if additional_prompt else '없음'}

---
위 JSON을 바탕으로 완전한 HTML 문서를 생성하세요.
코드 블록(```) 없이 <!DOCTYPE html>부터 </html>까지 순수 HTML만 출력."""

            step2_response = call_claude(step2_prompt, step2_message, max_tokens=8000)
            html_content = extract_html(step2_response)

            # 후처리 검증
            if self._content_too_long(html_content):
                compress_message = f"""다음 HTML이 페이지당 콘텐츠가 너무 많습니다.
각 페이지당 섹션 3~4개, 리스트 항목 8개 이하로 압축해주세요.

{html_content}

코드 블록 없이 압축된 완전한 HTML만 출력하세요."""

                compress_response = call_claude(step2_prompt, compress_message, max_tokens=8000)
                html_content = extract_html(compress_response)

            # 세션에 저장
            session['original_html'] = content
            session['current_html'] = html_content
            session['structure_json'] = json.dumps(structure_json, ensure_ascii=False)
            session['conversation'] = []

            return {
                'success': True,
                'html': html_content,
                'structure': structure_json
            }

        except Exception as e:
            import traceback
            return {'error': str(e), 'trace': traceback.format_exc()}

    def refine(self, feedback: str, current_html: str, original_html: str = '') -> dict:
        """피드백 반영"""
        try:
            if not feedback.strip():
                return {'error': '피드백 내용을 입력해주세요.'}

            if not current_html:
                return {'error': '수정할 HTML이 없습니다.'}

            refine_prompt = f"""당신은 HTML 보고서 수정 전문가입니다.

사용자의 피드백을 반영하여 현재 HTML을 수정합니다.

## 규칙
1. 피드백에서 언급된 부분만 정확히 수정
2. 나머지 구조와 스타일은 그대로 유지
3. 완전한 HTML 문서로 출력 (<!DOCTYPE html> ~ </html>)
4. 코드 블록(```) 없이 순수 HTML만 출력
5. 원본 문서의 텍스트를 참조하여 누락된 내용 복구 가능

## 원본 HTML (참고용)
{original_html[:3000] if original_html else '없음'}...

## 현재 HTML
{current_html}

## 사용자 피드백
{feedback}

---
위 피드백을 반영하여 수정된 완전한 HTML을 출력하세요."""

            response = call_claude("", refine_prompt, max_tokens=8000)
            new_html = extract_html(response)

            session['current_html'] = new_html

            return {
                'success': True,
                'html': new_html
            }

        except Exception as e:
            return {'error': str(e)}

    def refine_selection(self, current_html: str, selected_text: str, user_request: str) -> dict:
        """선택된 부분만 수정"""
        try:
            if not current_html or not selected_text or not user_request:
                return {'error': '필수 데이터가 없습니다.'}

            message = client.messages.create(
                model="claude-sonnet-4-20250514",
                max_tokens=8000,
                messages=[{
                    "role": "user",
                    "content": f"""HTML 문서에서 지정된 부분만 수정해주세요.

## 전체 문서 (컨텍스트 파악용)
{current_html}

## 수정 대상 텍스트
"{selected_text}"

## 수정 요청
{user_request}

## 규칙
1. 요청을 분석하여 수정 유형을 판단:
   - TEXT: 텍스트 내용만 수정 (요약, 문장 변경, 단어 수정, 번역 등)
   - STRUCTURE: HTML 구조 변경 필요 (표 생성, 박스 추가, 레이아웃 변경 등)

2. 반드시 다음 형식으로만 출력:

TYPE: (TEXT 또는 STRUCTURE)
CONTENT:
(수정된 내용)

3. TEXT인 경우: 순수 텍스트만 출력 (HTML 태그 없이)
4. STRUCTURE인 경우: 완전한 HTML 요소 출력 (기존 클래스명 유지)
5. 개조식 문체 유지 (~임, ~함, ~필요)
"""
                }]
            )

            result = message.content[0].text
            result = result.replace('```html', '').replace('```', '').strip()

            edit_type = 'TEXT'
            content = result

            if 'TYPE:' in result and 'CONTENT:' in result:
                type_line = result.split('CONTENT:')[0]
                if 'STRUCTURE' in type_line:
                    edit_type = 'STRUCTURE'
                content = result.split('CONTENT:')[1].strip()

            return {
                'success': True,
                'type': edit_type,
                'html': content
            }

        except Exception as e:
            return {'error': str(e)}