test/app.py

# -*- coding: utf-8 -*-
"""
글벗 Light v2.0
2단계 API 변환 + 대화형 피드백 시스템

Flask + Claude API + Railway
"""

import os
import json
import anthropic
from flask import Flask, render_template, request, jsonify, Response, session
from datetime import datetime
import io
import re
from flask import send_file
from datetime import datetime
import tempfile
from converters.pipeline.router import process_document
from api_config import API_KEYS


app = Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max
app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'geulbeot-light-secret-key-v2')

# Claude API 클라이언트
client = anthropic.Anthropic(api_key=API_KEYS.get('CLAUDE_API_KEY', ''))


# ============== 프롬프트 로드 ==============

def load_prompt(filename):
    """프롬프트 파일 로드"""
    prompt_path = os.path.join(os.path.dirname(__file__), 'prompts', filename)
    try:
        with open(prompt_path, 'r', encoding='utf-8') as f:
            return f.read()
    except FileNotFoundError:
        return None


def get_step1_prompt():
    """1단계: 구조 추출 프롬프트"""
    prompt = load_prompt('step1_extract.txt')
    if prompt:
        return prompt
    # 기본 프롬프트 (파일 없을 경우)
    return """HTML 문서를 분석하여 JSON 구조로 추출하세요.
원본 텍스트를 그대로 보존하고, 구조만 정확히 파악하세요."""


def get_step2_prompt():
    """2단계: HTML 생성 프롬프트"""
    prompt = load_prompt('step2_generate.txt')
    if prompt:
        return prompt
    # 기본 프롬프트 (파일 없을 경우)
    return """JSON 구조를 각인된 양식의 HTML로 변환하세요.
Navy 색상 테마, A4 크기, Noto Sans KR 폰트를 사용하세요."""

def get_step1_5_prompt():
    """1.5단계: 배치 계획 프롬프트"""
    prompt = load_prompt('step1_5_plan.txt')
    if prompt:
        return prompt
    return """JSON 구조를 분석하여 페이지 배치 계획을 수립하세요."""

def get_refine_prompt():
    """피드백 반영 프롬프트"""
    return """당신은 HTML 보고서 수정 전문가입니다.

사용자의 피드백을 반영하여 현재 HTML을 수정합니다.

## 규칙
1. 피드백에서 언급된 부분만 정확히 수정
2. 나머지 구조와 스타일은 그대로 유지
3. 완전한 HTML 문서로 출력 (<!DOCTYPE html> ~ </html>)
4. 코드 블록(```) 없이 순수 HTML만 출력

## 현재 HTML
{current_html}

## 사용자 피드백
{feedback}

위 피드백을 반영하여 수정된 완전한 HTML을 출력하세요."""

# ============== API 호출 함수 ==============

def call_claude(system_prompt, user_message, max_tokens=8000):
    """Claude API 호출"""
    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=max_tokens,
        system=system_prompt,
        messages=[{"role": "user", "content": user_message}]
    )
    return response.content[0].text


def extract_json(text):
    """텍스트에서 JSON 추출"""
    # 코드 블록 제거
    if '```json' in text:
        text = text.split('```json')[1].split('```')[0]
    elif '```' in text:
        text = text.split('```')[1].split('```')[0]

    text = text.strip()

    # JSON 파싱 시도
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        # JSON 부분만 추출 시도
        match = re.search(r'\{[\s\S]*\}', text)
        if match:
            try:
                return json.loads(match.group())
            except:
                pass
    return None


def extract_html(text):
    """텍스트에서 HTML 추출"""
    # 코드 블록 제거
    if '```html' in text:
        text = text.split('```html')[1].split('```')[0]
    elif '```' in text:
        parts = text.split('```')
        if len(parts) >= 2:
            text = parts[1]

    text = text.strip()

    # <!DOCTYPE 또는 <html로 시작하는지 확인
    if not text.startswith('<!DOCTYPE') and not text.startswith('<html'):
        # HTML 부분만 추출
        match = re.search(r'(<!DOCTYPE html[\s\S]*</html>)', text, re.IGNORECASE)
        if match:
            text = match.group(1)

    return text

def content_too_long(html, max_sections_per_page=4):
    """페이지당 콘텐츠 양 체크"""
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(html, 'html.parser')

    sheets = soup.find_all('div', class_='sheet')
    for sheet in sheets:
        sections = sheet.find_all('div', class_='section')
        if len(sections) > max_sections_per_page:
            return True

        # 리스트 항목 체크
        all_li = sheet.find_all('li')
        if len(all_li) > 12:
            return True

        # 프로세스 스텝 체크
        steps = sheet.find_all('div', class_='process-step')
        if len(steps) > 6:
            return True

    return False


# ============== 라우트 ==============

@app.route('/')
def index():
    """메인 페이지"""
    return render_template('index.html')


@app.route('/generate', methods=['POST'])
def generate():
    """보고서 생성 API (2단계 처리)"""
    try:
        # 입력 받기
        content = ""

        if 'file' in request.files and request.files['file'].filename:
            file = request.files['file']
            content = file.read().decode('utf-8')
        elif 'content' in request.form:
            content = request.form.get('content', '')

        if not content.strip():
            return jsonify({'error': '내용을 입력하거나 파일을 업로드해주세요.'}), 400

        # 옵션
        page_option = request.form.get('page_option', '1')
        department = request.form.get('department', '총괄기획실')
        additional_prompt = request.form.get('additional_prompt', '')

        # ============== 1단계: 구조 추출 ==============
        step1_prompt = get_step1_prompt()
        step1_message = f"""다음 HTML 문서의 구조를 분석하여 JSON으로 추출해주세요.

## 원본 HTML
{content}

---
위 문서를 분석하여 JSON 구조로 출력하세요. 설명 없이 JSON만 출력."""

        step1_response = call_claude(step1_prompt, step1_message, max_tokens=4000)
        structure_json = extract_json(step1_response)

        if not structure_json:
            # JSON 추출 실패 시 원본 그대로 전달
            structure_json = {"raw_content": content, "parse_failed": True}


# ============== 1.5단계: 배치 계획 ==============
        step1_5_prompt = get_step1_5_prompt()
        step1_5_message = f"""다음 JSON 구조를 분석하여 페이지 배치 계획을 수립해주세요.

## 문서 구조 (JSON)
{json.dumps(structure_json, ensure_ascii=False, indent=2)}

## 페이지 수
{page_option}페이지

---
배치 계획 JSON만 출력하세요. 설명 없이 JSON만."""

        step1_5_response = call_claude(step1_5_prompt, step1_5_message, max_tokens=4000)
        page_plan = extract_json(step1_5_response)

        if not page_plan:
            page_plan = {"page_plan": {}, "parse_failed": True}


        # ============== 2단계: HTML 생성 ==============
        page_instructions = {
            '1': '1페이지로 핵심 내용만 압축하여 작성하세요. 내용이 넘치면 텍스트를 줄이거나 줄간격을 조정하세요.',
            '2': '2페이지로 작성하세요. 1페이지는 본문(개요, 핵심 내용), 2페이지는 [첨부]로 시작하는 상세 내용입니다.',
            'n': '여러 페이지로 작성하세요. 1페이지는 본문, 나머지는 [첨부 1], [첨부 2] 형태로 분할합니다.'
        }

        step2_prompt = get_step2_prompt()
        step2_message = f"""다음 배치 계획과 문서 구조를 기반으로 각인된 양식의 HTML 보고서를 생성해주세요.

## 배치 계획
{json.dumps(page_plan, ensure_ascii=False, indent=2)}

## 문서 구조 (JSON)
{json.dumps(structure_json, ensure_ascii=False, indent=2)}

## 페이지 옵션
{page_instructions.get(page_option, page_instructions['1'])}

## 부서명
{department}

## 추가 요청사항
{additional_prompt if additional_prompt else '없음'}

---
위 JSON을 바탕으로 완전한 HTML 문서를 생성하세요.
코드 블록(```) 없이 <!DOCTYPE html>부터 </html>까지 순수 HTML만 출력."""

        step2_response = call_claude(step2_prompt, step2_message, max_tokens=8000)
        html_content = extract_html(step2_response)

        # 후처리 검증: 콘텐츠가 너무 많으면 압축 재요청
        if content_too_long(html_content):
            compress_message = f"""다음 HTML이 페이지당 콘텐츠가 너무 많습니다.
각 페이지당 섹션 3~4개, 리스트 항목 8개 이하로 압축해주세요.
텍스트를 줄이거나 덜 중요한 내용은 생략하세요.

{html_content}

코드 블록 없이 압축된 완전한 HTML만 출력하세요."""

            compress_response = call_claude(step2_prompt, compress_message, max_tokens=8000)
            html_content = extract_html(compress_response)

        # 세션에 저장 (피드백용)
        session['original_html'] = content
        session['current_html'] = html_content
        session['structure_json'] = json.dumps(structure_json, ensure_ascii=False)
        session['conversation'] = []

        return jsonify({
            'success': True,
            'html': html_content,
            'structure': structure_json
        })

    except anthropic.APIError as e:
        return jsonify({'error': f'Claude API 오류: {str(e)}'}), 500
    except Exception as e:
        import traceback
        return jsonify({'error': f'서버 오류: {str(e)}', 'trace': traceback.format_exc()}), 500


@app.route('/refine', methods=['POST'])
def refine():
    """피드백 반영 API (대화형)"""
    try:
        feedback = request.json.get('feedback', '')
        current_html = request.json.get('current_html', '') or session.get('current_html', '')

        if not feedback.strip():
            return jsonify({'error': '피드백 내용을 입력해주세요.'}), 400

        if not current_html:
            return jsonify({'error': '수정할 HTML이 없습니다. 먼저 변환을 실행해주세요.'}), 400

        # 원본 HTML도 컨텍스트에 포함
        original_html = session.get('original_html', '')

        # 피드백 반영 프롬프트
        refine_prompt = f"""당신은 HTML 보고서 수정 전문가입니다.

사용자의 피드백을 반영하여 현재 HTML을 수정합니다.

## 규칙
1. 피드백에서 언급된 부분만 정확히 수정
2. 나머지 구조와 스타일은 그대로 유지
3. 완전한 HTML 문서로 출력 (<!DOCTYPE html> ~ </html>)
4. 코드 블록(```) 없이 순수 HTML만 출력
5. 원본 문서의 텍스트를 참조하여 누락된 내용 복구 가능

## 원본 HTML (참고용)
{original_html[:3000] if original_html else '없음'}...

## 현재 HTML
{current_html}

## 사용자 피드백
{feedback}

---
위 피드백을 반영하여 수정된 완전한 HTML을 출력하세요."""

        response = call_claude("", refine_prompt, max_tokens=8000)
        new_html = extract_html(response)

        # 세션 업데이트
        session['current_html'] = new_html

        # 대화 히스토리 저장
        conversation = session.get('conversation', [])
        conversation.append({'role': 'user', 'content': feedback})
        conversation.append({'role': 'assistant', 'content': '수정 완료'})
        session['conversation'] = conversation

        return jsonify({
            'success': True,
            'html': new_html
        })

    except anthropic.APIError as e:
        return jsonify({'error': f'Claude API 오류: {str(e)}'}), 500
    except Exception as e:
        return jsonify({'error': f'서버 오류: {str(e)}'}), 500


@app.route('/refine-selection', methods=['POST'])
def refine_selection():
    """선택된 부분만 수정"""
    try:
        data = request.json
        current_html = data.get('current_html', '')
        selected_text = data.get('selected_text', '')
        user_request = data.get('request', '')

        if not current_html or not selected_text or not user_request:
            return jsonify({'error': '필수 데이터가 없습니다.'}), 400

        # Claude API 호출
        message = client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=8000,
            messages=[{
                "role": "user",
"content" : f"""HTML 문서에서 지정된 부분만 수정해주세요.

## 전체 문서 (컨텍스트 파악용)
{current_html}

## 수정 대상 텍스트
"{selected_text}"

## 수정 요청
{user_request}

## 규칙
1. 요청을 분석하여 수정 유형을 판단:
   - TEXT: 텍스트 내용만 수정 (요약, 문장 변경, 단어 수정, 번역 등)
   - STRUCTURE: HTML 구조 변경 필요 (표 생성, 박스 추가, 레이아웃 변경 등)

2. 반드시 다음 형식으로만 출력:

TYPE: (TEXT 또는 STRUCTURE)
CONTENT:
(수정된 내용)

3. TEXT인 경우: 순수 텍스트만 출력 (HTML 태그 없이)
4. STRUCTURE인 경우: 완전한 HTML 요소 출력 (기존 클래스명 유지)
5. 개조식 문체 유지 (~임, ~함, ~필요)
"""
            }]
        )

        result = message.content[0].text
        result = result.replace('```html', '').replace('```', '').strip()

        # TYPE과 CONTENT 파싱
        edit_type = 'TEXT'
        content = result

        if 'TYPE:' in result and 'CONTENT:' in result:
            type_line = result.split('CONTENT:')[0]
            if 'STRUCTURE' in type_line:
                edit_type = 'STRUCTURE'
            content = result.split('CONTENT:')[1].strip()

        return jsonify({
            'success': True,
            'type': edit_type,
            'html': content
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500


@app.route('/download/html', methods=['POST'])
def download_html():
    """HTML 파일 다운로드"""
    html_content = request.form.get('html', '')
    if not html_content:
        return "No content", 400

    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f'report_{timestamp}.html'

    return Response(
        html_content,
        mimetype='text/html',
        headers={'Content-Disposition': f'attachment; filename={filename}'}
    )


@app.route('/download/pdf', methods=['POST'])
def download_pdf():
    """PDF 파일 다운로드"""
    try:
        from weasyprint import HTML

        html_content = request.form.get('html', '')
        if not html_content:
            return "No content", 400

        pdf_buffer = io.BytesIO()
        HTML(string=html_content).write_pdf(pdf_buffer)
        pdf_buffer.seek(0)

        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f'report_{timestamp}.pdf'

        return Response(
            pdf_buffer.getvalue(),
            mimetype='application/pdf',
            headers={'Content-Disposition': f'attachment; filename={filename}'}
        )
    except ImportError:
        return jsonify({'error': 'PDF 변환 미지원. HTML 다운로드 후 브라우저에서 인쇄하세요.'}), 501
    except Exception as e:
        return jsonify({'error': f'PDF 변환 오류: {str(e)}'}), 500


@app.route('/hwp-script')
def hwp_script():
    """HWP 변환 스크립트 안내"""
    return render_template('hwp_guide.html')

@app.route('/generate-report', methods=['POST'])
def generate_report_api():
    """보고서 생성 API (router 기반)"""
    try:
        data = request.get_json() or {}

        # HTML 내용 (폴더에서 읽거나 직접 입력)
        content = data.get('content', '')

        # 옵션
        options = {
            'folder_path': data.get('folder_path', ''),
            'cover': data.get('cover', False),
            'toc': data.get('toc', False),
            'divider': data.get('divider', False),
            'instruction': data.get('instruction', '')
        }

        if not content.strip():
            return jsonify({'error': '내용이 비어있습니다.'}), 400

        # router로 처리
        result = process_document(content, options)

        if result.get('success'):
            return jsonify(result)
        else:
            return jsonify({'error': result.get('error', '처리 실패')}), 500

    except Exception as e:
        import traceback
        return jsonify({'error': str(e), 'trace': traceback.format_exc()}), 500

@app.route('/assets/<path:filename>')
def serve_assets(filename):
    """로컬 assets 폴더 서빙"""
    assets_dir = r"D:\for python\geulbeot-light\geulbeot-light\output\assets"
    return send_file(os.path.join(assets_dir, filename))


@app.route('/health')
def health():
    """헬스 체크"""
    return jsonify({'status': 'healthy', 'version': '2.0.0'})


# ===== HWP 변환 =====
@app.route('/export-hwp', methods=['POST'])
def export_hwp():
    try:
        data = request.get_json()
        html_content = data.get('html', '')
        doc_type = data.get('doc_type', 'briefing')

        if not html_content:
            return jsonify({'error': 'HTML 내용이 없습니다'}), 400

        # 임시 파일 생성
        temp_dir = tempfile.gettempdir()
        html_path = os.path.join(temp_dir, 'geulbeot_temp.html')
        hwp_path = os.path.join(temp_dir, 'geulbeot_output.hwp')

        # HTML 저장
        with open(html_path, 'w', encoding='utf-8') as f:
            f.write(html_content)

        # 변환기 import 및 실행
        if doc_type == 'briefing':
            from converters.html_to_hwp_briefing import HtmlToHwpConverter
        else:
            from converters.html_to_hwp import HtmlToHwpConverter

        converter = HtmlToHwpConverter(visible=False)
        converter.convert(html_path, hwp_path)
        converter.close()

        # 파일 전송
        return send_file(
            hwp_path,
            as_attachment=True,
            download_name=f'report_{datetime.now().strftime("%Y%m%d_%H%M%S")}.hwp',
            mimetype='application/x-hwp'
        )

    except ImportError as e:
        return jsonify({'error': f'pyhwpx 필요: {str(e)}'}), 500
    except Exception as e:
        return jsonify({'error': str(e)}), 500


if __name__ == '__main__':
    port = int(os.environ.get('PORT', 5000))
    debug = os.environ.get('FLASK_DEBUG', 'False').lower() == 'true'
    app.run(host='0.0.0.0', port=port, debug=debug)