# -*- coding: utf-8 -*-
from dotenv import load_dotenv
load_dotenv()

"""
router.py

기능:
- HTML 파일 처리 로직의 메인 라우터
- 문서 길이에 따라 Short Pipeline 또는 Long Pipeline으로 분기
- Short Pipeline: 단순 HTML 변환 (step7, 8, 9 생략 가능)
- Long Pipeline: RAG 기반의 문서 재구성 (step3~9 전체 과정)
"""

import re
import os
from typing import Dict, Any

# 분기 기준 문서 길이
LONG_DOC_THRESHOLD = 5000  # 5000자 이상일 경우 Long Pipeline으로 분기

# 앱 내 assets 경로 (개발 및 배포용) - r prefix 사용 안함!
ASSETS_BASE_PATH = os.environ.get("ASSETS_BASE_PATH", "/tmp/assets")


def count_characters(html_content: str) -> int:
    """HTML 태그를 제외한 실제 텍스트 글자 수 계산"""
    # HTML 태그 제거
    text_only = re.sub(r'<[^>]+>', '', html_content)
    # 공백 정리
    text_only = ' '.join(text_only.split())
    return len(text_only)


def is_long_document(html_content: str) -> bool:
    """긴 문서 여부 판별"""
    char_count = count_characters(html_content)
    return char_count >= LONG_DOC_THRESHOLD


def convert_image_paths(html_content: str) -> str:
    """
    HTML 내의 이미지 경로를 상대 경로로 변경
    - assets/xxx.png -> /assets/xxx.png (Flask 정적 파일 대응)
    - 외부 경로는 그대로 유지
    """

    def replace_src(match):
        original_path = match.group(1)

        # 절대 경로 또는 URL인 경우 그대로 유지
        if original_path.startswith(('http://', 'https://', 'file://', 'D:', 'C:', '/')):
            return match.group(0)

        # assets/로 시작하면 /assets/로 변경 (Flask 대응)
        if original_path.startswith('assets/'):
            return f'src="/{original_path}"'

        return match.group(0)

    # src="..." 패턴을 찾아서 변경
    result = re.sub(r'src="([^"]+)"', replace_src, html_content)
    return result


def run_short_pipeline(html_content: str, options: dict) -> Dict[str, Any]:
    """
    단기 파이프라인 (5000자 미만)
    """
    try:
        # 이미지 경로 변환 로직
        processed_html = convert_image_paths(html_content)

        # TODO: step7, step8, step9 과정 최적화
        return {
            'success': True,
            'pipeline': 'short',
            'char_count': count_characters(html_content),
            'html': processed_html
        }
    except Exception as e:
        return {
            'success': False,
            'error': str(e),
            'pipeline': 'short'
        }


def inject_template_css(html_content: str, template_css: str) -> str:
    """
    HTML문서에 템플릿 CSS 주입
    - <style> 태그가 있으면 그 뒤에 추가
    - 없으면 <head>내에 추가
    """
    if not template_css:
        return html_content

    css_block = f"\n/* ===== 템플릿 스타일 추가 ===== */\n{template_css}\n"

    # 기존에 </style> 태그가 있는 경우
    if '</style>' in html_content:
        return html_content.replace('</style>', f'{css_block}</style>', 1)

    # <head> 태그 뒤에 추가
    elif '<head>' in html_content:
        return html_content.replace('<head>', f'<head>\n<style>{css_block}</style>', 1)

    # head가 없는 경우 맨 앞에 추가
    else:
        return f'<style>{css_block}</style>\n{html_content}'


def run_long_pipeline(html_content: str, options: dict) -> Dict[str, Any]:
    """
    장기 파이프라인 (5000자 이상)
    단계별 step 실행을 위한 준비
    """
    try:
        processed_html = convert_image_paths(html_content)

        folder_path = options.get('folder_path', '')
        write_mode = options.get('write_mode', 'restructure')

        if not folder_path:
            # 폴더가 없으면 HTML만 처리 (기존 로직)
            return {
                'success': True,
                'pipeline': 'long',
                'char_count': count_characters(html_content),
                'html': processed_html
            }

        # 이 단계 이후 /api/generate-toc 와 /api/generate-report-from-toc 에서 처리
        # router는 우선 HTML 통과만 담당함
        return {
            'success': True,
            'pipeline': 'long',
            'char_count': count_characters(html_content),
            'html': processed_html,
            'needs_pipeline': True  # 프론트엔드 분기 처리용
        }

    except Exception as e:
        return {'success': False, 'error': str(e), 'pipeline': 'long'}


def process_document(content: str, options: dict = None) -> Dict[str, Any]:
    """
    문서 처리를 위한 메인 진입점
    - 분기 로직: 문서 길이에 따라 다른 파이프라인 실행

    Args:
        content: HTML 내용
        options: 추가 옵션 (page_option, instruction 등)

    Returns:
        {'success': bool, 'html': str, 'pipeline': str, ...}
    """
    if options is None:
        options = {}

    if not content or not content.strip():
        return {
            'success': False,
            'error': '내용이 비어있습니다.'
        }

    char_count = count_characters(content)

    if is_long_document(content):
        result = run_long_pipeline(content, options)
    else:
        result = run_short_pipeline(content, options)

    # 공통 정보 추가
    result['char_count'] = char_count
    result['threshold'] = LONG_DOC_THRESHOLD

    # 템플릿 CSS 주입
    template_css = options.get('template_css')
    if template_css and result.get('success') and result.get('html'):
        result['html'] = inject_template_css(result['html'], template_css)

    return result