test/converters/hwpx_style_injector.py

"""
HWPX 스타일 주입기
pyhwpx로 생성된 HWPX 파일에 커스텀 스타일을 후처리로 주입

워크플로우:
1. HWPX 압축 해제
2. header.xml에 커스텀 스타일 정의 추가
3. section*.xml에서 역할별 styleIDRef 매핑
4. 다시 압축
"""

import os
import re
import zipfile
import shutil
import tempfile
from pathlib import Path
from typing import Dict, List, Optional
from dataclasses import dataclass


@dataclass
class StyleDefinition:
    """스타일 정의"""
    id: int
    name: str
    font_size: int      # hwpunit (pt * 100)
    font_bold: bool
    font_color: str     # #RRGGBB
    align: str          # LEFT, CENTER, RIGHT, JUSTIFY
    line_spacing: int   # percent (160 = 160%)
    indent_left: int    # hwpunit
    indent_first: int   # hwpunit
    space_before: int   # hwpunit
    space_after: int    # hwpunit
    outline_level: int = -1  # 🆕 개요 수준 (-1=없음, 0=1수준, 1=2수준, ...)


# 역할 → 스타일 정의 매핑
ROLE_STYLES: Dict[str, StyleDefinition] = {
    # 🆕 개요 문단 (자동 번호 매기기!)
    'H1': StyleDefinition(
        id=101, name='제1장 제목', font_size=2200, font_bold=True,
        font_color='#006400', align='CENTER', line_spacing=200,
        indent_left=0, indent_first=0, space_before=400, space_after=200,
        outline_level=0  # 🆕 제^1장
    ),
    'H2': StyleDefinition(
        id=102, name='1.1 제목', font_size=1500, font_bold=True,
        font_color='#03581d', align='LEFT', line_spacing=200,
        indent_left=0, indent_first=0, space_before=300, space_after=100,
        outline_level=1  # 🆕 ^1.^2
    ),
    'H3': StyleDefinition(
        id=103, name='1.1.1 제목', font_size=1400, font_bold=True,
        font_color='#228B22', align='LEFT', line_spacing=200,
        indent_left=500, indent_first=0, space_before=200, space_after=100,
        outline_level=2  # 🆕 ^1.^2.^3
    ),
    'H4': StyleDefinition(
        id=104, name='가. 제목', font_size=1300, font_bold=True,
        font_color='#000000', align='LEFT', line_spacing=200,
        indent_left=1000, indent_first=0, space_before=150, space_after=50,
        outline_level=3  # 🆕 ^4.
    ),
    'H5': StyleDefinition(
        id=105, name='1) 제목', font_size=1200, font_bold=True,
        font_color='#000000', align='LEFT', line_spacing=200,
        indent_left=1500, indent_first=0, space_before=100, space_after=50,
        outline_level=4  # 🆕 ^5)
    ),
    'H6': StyleDefinition(
        id=106, name='가) 제목', font_size=1150, font_bold=True,
        font_color='#000000', align='LEFT', line_spacing=200,
        indent_left=2000, indent_first=0, space_before=100, space_after=50,
        outline_level=5  # 🆕 ^6)
    ),
    'H7': StyleDefinition(
        id=115, name='① 제목', font_size=1100, font_bold=True,
        font_color='#000000', align='LEFT', line_spacing=200,
        indent_left=2300, indent_first=0, space_before=100, space_after=50,
        outline_level=6  # 🆕 ^7 (원문자)
    ),
    # 본문 스타일 (개요 아님)
    'BODY': StyleDefinition(
        id=107, name='○본문', font_size=1100, font_bold=False,
        font_color='#000000', align='JUSTIFY', line_spacing=200,
        indent_left=1500, indent_first=0, space_before=0, space_after=0
    ),
    'LIST_ITEM': StyleDefinition(
        id=108, name='●본문', font_size=1050, font_bold=False,
        font_color='#000000', align='JUSTIFY', line_spacing=200,
        indent_left=2500, indent_first=0, space_before=0, space_after=0
    ),
    'TABLE_CAPTION': StyleDefinition(
        id=109, name='<표 제목>', font_size=1100, font_bold=True,
        font_color='#000000', align='LEFT', line_spacing=130,
        indent_left=0, indent_first=0, space_before=200, space_after=100
    ),
    'FIGURE_CAPTION': StyleDefinition(
        id=110, name='<그림 제목>', font_size=1100, font_bold=True,
        font_color='#000000', align='CENTER', line_spacing=130,
        indent_left=0, indent_first=0, space_before=100, space_after=200
    ),
    'COVER_TITLE': StyleDefinition(
        id=111, name='표지제목', font_size=2800, font_bold=True,
        font_color='#1a365d', align='CENTER', line_spacing=150,
        indent_left=0, indent_first=0, space_before=0, space_after=200
    ),
    'COVER_SUBTITLE': StyleDefinition(
        id=112, name='표지부제', font_size=1800, font_bold=False,
        font_color='#2d3748', align='CENTER', line_spacing=150,
        indent_left=0, indent_first=0, space_before=0, space_after=100
    ),
    'TOC_1': StyleDefinition(
        id=113, name='목차1수준', font_size=1200, font_bold=True,
        font_color='#000000', align='LEFT', line_spacing=180,
        indent_left=0, indent_first=0, space_before=100, space_after=50
    ),
    'TOC_2': StyleDefinition(
        id=114, name='목차2수준', font_size=1100, font_bold=False,
        font_color='#000000', align='LEFT', line_spacing=180,
        indent_left=500, indent_first=0, space_before=0, space_after=0
    ),
}

# ⚠️ 개요 자동 번호 기능 활성화!
# idRef="0"은 numbering id=1을 참조하므로, 해당 패턴을 교체하면 동작함


class HwpxStyleInjector:
    """HWPX 스타일 주입기"""

    def __init__(self):
        self.temp_dir: Optional[Path] = None
        self.role_to_style_id: Dict[str, int] = {}
        self.role_to_para_id: Dict[str, int] = {}   # 🆕
        self.role_to_char_id: Dict[str, int] = {}   # 🆕
        self.next_char_id = 0
        self.next_para_id = 0
        self.next_style_id = 0

    def _find_max_ids(self):
        """기존 스타일 교체: 바탕글(id=0)만 유지, 나머지는 우리 스타일로 교체"""
        header_path = self.temp_dir / "Contents" / "header.xml"
        if not header_path.exists():
            self.next_char_id = 1
            self.next_para_id = 1
            self.next_style_id = 1
            return

        content = header_path.read_text(encoding='utf-8')

        # 🆕 기존 "본문", "개요 1~10" 등 스타일 제거 (id=1~22)
        # 바탕글(id=0)만 유지!

        # style id=1~30 제거 (바탕글 제외)
        content = re.sub(r'<hh:style id="([1-9]|[12]\d|30)"[^/]*/>\s*', '', content)

        # itemCnt는 나중에 _update_item_counts에서 자동 업데이트됨

        # 파일 저장
        header_path.write_text(content, encoding='utf-8')
        print(f"   [INFO] 기존 스타일(본문, 개요1~10 등) 제거 완료")

        # charPr, paraPr은 기존 것 다음부터 (참조 깨지지 않도록)
        char_ids = [int(m) for m in re.findall(r'<hh:charPr id="(\d+)"', content)]
        self.next_char_id = max(char_ids) + 1 if char_ids else 20

        para_ids = [int(m) for m in re.findall(r'<hh:paraPr id="(\d+)"', content)]
        self.next_para_id = max(para_ids) + 1 if para_ids else 20

        # 스타일은 1부터 시작! (Ctrl+2 = id=1, Ctrl+3 = id=2, ...)
        self.next_style_id = 1

    def inject(self, hwpx_path: str, role_positions: Dict[str, List[tuple]]) -> str:
        """
        HWPX 파일에 커스텀 스타일 주입

        Args:
            hwpx_path: 원본 HWPX 파일 경로
            role_positions: 역할별 위치 정보 {role: [(section_idx, para_idx), ...]}

        Returns:
            수정된 HWPX 파일 경로
        """
        print(f"\n🎨 HWPX 스타일 주입 시작...")
        print(f"   입력: {hwpx_path}")

        # 1. 임시 디렉토리에 압축 해제
        self.temp_dir = Path(tempfile.mkdtemp(prefix='hwpx_inject_'))
        print(f"   임시 폴더: {self.temp_dir}")

        try:
            with zipfile.ZipFile(hwpx_path, 'r') as zf:
                zf.extractall(self.temp_dir)

            # 압축 해제 직후 section 파일 크기 확인
            print(f"   [DEBUG] After unzip:")
            for sec in ['section0.xml', 'section1.xml', 'section2.xml']:
                sec_path = self.temp_dir / "Contents" / sec
                if sec_path.exists():
                    print(f"   [DEBUG] {sec} size: {sec_path.stat().st_size} bytes")

            # 🆕 기존 최대 ID 찾기 (연속 ID 할당을 위해)
            self._find_max_ids()
            print(f"   [DEBUG] Starting IDs: char={self.next_char_id}, para={self.next_para_id}, style={self.next_style_id}")

            # 2. header.xml에 스타일 정의 추가
            used_roles = set(role_positions.keys())
            self._inject_header_styles(used_roles)

            # 3. section*.xml에 styleIDRef 매핑
            self._inject_section_styles(role_positions)

            # 4. 다시 압축
            output_path = hwpx_path  # 원본 덮어쓰기
            self._repack_hwpx(output_path)

            print(f"   ✅ 스타일 주입 완료: {output_path}")
            return output_path

        finally:
            # 임시 폴더 정리
            if self.temp_dir and self.temp_dir.exists():
                shutil.rmtree(self.temp_dir)

    def _inject_header_styles(self, used_roles: set):
        """header.xml에 스타일 정의 추가 (모든 ROLE_STYLES 주입)"""
        header_path = self.temp_dir / "Contents" / "header.xml"
        if not header_path.exists():
            print("   [경고] header.xml 없음")
            return

        content = header_path.read_text(encoding='utf-8')

        # 🆕 모든 ROLE_STYLES 주입 (used_roles 무시)
        char_props = []
        para_props = []
        styles = []

        for role, style_def in ROLE_STYLES.items():
            char_id = self.next_char_id
            para_id = self.next_para_id
            style_id = self.next_style_id

            self.role_to_style_id[role] = style_id
            self.role_to_para_id[role] = para_id    # 🆕
            self.role_to_char_id[role] = char_id    # 🆕

            # charPr 생성
            char_props.append(self._make_char_pr(char_id, style_def))

            # paraPr 생성
            para_props.append(self._make_para_pr(para_id, style_def))

            # style 생성
            styles.append(self._make_style(style_id, style_def.name, para_id, char_id))

            self.next_char_id += 1
            self.next_para_id += 1
            self.next_style_id += 1

        if not styles:
            print("   [정보] 주입할 스타일 없음")
            return

        # charProperties에 추가
        content = self._insert_before_tag(
            content, '</hh:charProperties>', '\n'.join(char_props) + '\n'
        )

        # paraProperties에 추가
        content = self._insert_before_tag(
            content, '</hh:paraProperties>', '\n'.join(para_props) + '\n'
        )

        # styles에 추가
        content = self._insert_before_tag(
            content, '</hh:styles>', '\n'.join(styles) + '\n'
        )

        # 🆕 numbering id=1 패턴 교체 (idRef="0"이 참조하는 기본 번호 모양)
        # 이렇게 하면 개요 자동 번호가 "제1장, 1.1, 1.1.1..." 형식으로 동작!
        content = self._replace_default_numbering(content)

        # itemCnt 업데이트
        content = self._update_item_counts(content)

        header_path.write_text(content, encoding='utf-8')
        print(f"   → header.xml 수정 완료 ({len(styles)}개 스타일 추가)")

    def _make_char_pr(self, id: int, style: StyleDefinition) -> str:
        """charPr XML 생성 (한 줄로!)"""
        color = style.font_color.lstrip('#')
        font_id = "1" if style.font_bold else "0"

        return f'<hh:charPr id="{id}" height="{style.font_size}" textColor="#{color}" shadeColor="none" useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="1"><hh:fontRef hangul="{font_id}" latin="{font_id}" hanja="{font_id}" japanese="{font_id}" other="{font_id}" symbol="{font_id}" user="{font_id}"/><hh:ratio hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/><hh:spacing hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/><hh:relSz hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/><hh:offset hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/><hh:underline type="NONE" shape="SOLID" color="#000000"/><hh:strikeout shape="NONE" color="#000000"/><hh:outline type="NONE"/><hh:shadow type="NONE" color="#B2B2B2" offsetX="10" offsetY="10"/></hh:charPr>'

    def _make_para_pr(self, id: int, style: StyleDefinition) -> str:
        """paraPr XML 생성 (한 줄로!)"""
        # 개요 문단이면 type="OUTLINE", 아니면 type="NONE"
        # idRef="0"은 numbering id=1 (기본 번호 모양)을 참조
        if style.outline_level >= 0:
            heading = f'<hh:heading type="OUTLINE" idRef="0" level="{style.outline_level}"/>'
        else:
            heading = '<hh:heading type="NONE" idRef="0" level="0"/>'

        return f'<hh:paraPr id="{id}" tabPrIDRef="0" condense="0" fontLineHeight="0" snapToGrid="0" suppressLineNumbers="0" checked="0"><hh:align horizontal="{style.align}" vertical="BASELINE"/>{heading}<hh:breakSetting breakLatinWord="KEEP_WORD" breakNonLatinWord="KEEP_WORD" widowOrphan="0" keepWithNext="0" keepLines="0" pageBreakBefore="0" lineWrap="BREAK"/><hh:autoSpacing eAsianEng="0" eAsianNum="0"/><hh:margin><hc:intent value="{style.indent_first}" unit="HWPUNIT"/><hc:left value="{style.indent_left}" unit="HWPUNIT"/><hc:right value="0" unit="HWPUNIT"/><hc:prev value="{style.space_before}" unit="HWPUNIT"/><hc:next value="{style.space_after}" unit="HWPUNIT"/></hh:margin><hh:lineSpacing type="PERCENT" value="{style.line_spacing}" unit="HWPUNIT"/><hh:border borderFillIDRef="1" offsetLeft="0" offsetRight="0" offsetTop="0" offsetBottom="0" connect="0" ignoreMargin="0"/></hh:paraPr>'

    def _make_style(self, id: int, name: str, para_id: int, char_id: int) -> str:
        """style XML 생성"""
        safe_name = name.replace('<', '&lt;').replace('>', '&gt;')
        return f'<hh:style id="{id}" type="PARA" name="{safe_name}" engName="" paraPrIDRef="{para_id}" charPrIDRef="{char_id}" nextStyleIDRef="{id}" langID="1042" lockForm="0"/>'

    def _insert_before_tag(self, content: str, tag: str, insert_text: str) -> str:
        """특정 태그 앞에 텍스트 삽입"""
        return content.replace(tag, insert_text + tag)

    def _update_item_counts(self, content: str) -> str:
        """itemCnt 속성 업데이트"""
        # charProperties itemCnt
        char_count = content.count('<hh:charPr ')
        content = re.sub(
            r'<hh:charProperties itemCnt="(\d+)"',
            f'<hh:charProperties itemCnt="{char_count}"',
            content
        )

        # paraProperties itemCnt
        para_count = content.count('<hh:paraPr ')
        content = re.sub(
            r'<hh:paraProperties itemCnt="(\d+)"',
            f'<hh:paraProperties itemCnt="{para_count}"',
            content
        )

        # styles itemCnt
        style_count = content.count('<hh:style ')
        content = re.sub(
            r'<hh:styles itemCnt="(\d+)"',
            f'<hh:styles itemCnt="{style_count}"',
            content
        )

        # 🆕 numberings itemCnt
        numbering_count = content.count('<hh:numbering ')
        content = re.sub(
            r'<hh:numberings itemCnt="(\d+)"',
            f'<hh:numberings itemCnt="{numbering_count}"',
            content
        )

        return content

    def _replace_default_numbering(self, content: str) -> str:
        """numbering id=1의 패턴을 우리 패턴으로 교체"""
        # 우리가 원하는 개요 번호 패턴
        new_patterns = [
            {'level': '1', 'format': 'DIGIT', 'pattern': '제^1장'},
            {'level': '2', 'format': 'DIGIT', 'pattern': '^1.^2'},
            {'level': '3', 'format': 'DIGIT', 'pattern': '^1.^2.^3'},
            {'level': '4', 'format': 'HANGUL_SYLLABLE', 'pattern': '^4.'},
            {'level': '5', 'format': 'DIGIT', 'pattern': '^5)'},
            {'level': '6', 'format': 'HANGUL_SYLLABLE', 'pattern': '^6)'},
            {'level': '7', 'format': 'CIRCLED_DIGIT', 'pattern': '^7'},
        ]

        # numbering id="1" 찾기
        match = re.search(r'(<hh:numbering id="1"[^>]*>)(.*?)(</hh:numbering>)', content, re.DOTALL)
        if not match:
            print("   [경고] numbering id=1 없음, 교체 건너뜀")
            return content

        numbering_content = match.group(2)

        for np in new_patterns:
            level = np['level']
            fmt = np['format']
            pattern = np['pattern']

            # 해당 level의 paraHead 찾아서 교체
            def replace_parahead(m):
                tag = m.group(0)
                # numFormat 변경
                tag = re.sub(r'numFormat="[^"]*"', f'numFormat="{fmt}"', tag)
                # 패턴(텍스트 내용) 변경
                tag = re.sub(r'>([^<]*)</hh:paraHead>', f'>{pattern}</hh:paraHead>', tag)
                return tag

            numbering_content = re.sub(
                rf'<hh:paraHead[^>]*level="{level}"[^>]*>.*?</hh:paraHead>',
                replace_parahead,
                numbering_content
            )

        new_content = match.group(1) + numbering_content + match.group(3)
        print("   [INFO] numbering id=1 패턴 교체 완료 (제^1장, ^1.^2, ^1.^2.^3...)")
        return content.replace(match.group(0), new_content)

    def _adjust_tables(self, content: str) -> str:
        """표 셀 크기 자동 조정

        1. 행 높이: 최소 800 hwpunit (내용 잘림 방지)
        2. 열 너비: 표 전체 너비를 열 개수로 균등 분배 (또는 첫 열 좁게)
        """

        def adjust_table(match):
            tbl = match.group(0)

            # 표 전체 너비 추출
            sz_match = re.search(r'<hp:sz width="(\d+)"', tbl)
            table_width = int(sz_match.group(1)) if sz_match else 47624

            # 열 개수 추출
            col_match = re.search(r'colCnt="(\d+)"', tbl)
            col_cnt = int(col_match.group(1)) if col_match else 4

            # 열 너비 계산 (첫 열은 30%, 나머지 균등)
            first_col_width = int(table_width * 0.25)
            other_col_width = (table_width - first_col_width) // (col_cnt - 1) if col_cnt > 1 else table_width

            # 행 높이 최소값 설정
            min_height = 800  # 약 8mm

            # 셀 크기 조정
            col_idx = [0]  # closure용

            def adjust_cell_sz(cell_match):
                width = int(cell_match.group(1))
                height = int(cell_match.group(2))

                # 높이 조정
                new_height = max(height, min_height)

                return f'<hp:cellSz width="{width}" height="{new_height}"/>'

            tbl = re.sub(
                r'<hp:cellSz width="(\d+)" height="(\d+)"/>',
                adjust_cell_sz,
                tbl
            )

            return tbl

        return re.sub(r'<hp:tbl[^>]*>.*?</hp:tbl>', adjust_table, content, flags=re.DOTALL)

    def _inject_section_styles(self, role_positions: Dict[str, List[tuple]]):
        """section*.xml에 styleIDRef 매핑 (텍스트 매칭 방식)"""
        contents_dir = self.temp_dir / "Contents"

        # 🔍 디버그: role_to_style_id 확인
        print(f"   [DEBUG] role_to_style_id: {self.role_to_style_id}")

        # section 파일들 찾기
        section_files = sorted(contents_dir.glob("section*.xml"))
        print(f"   [DEBUG] section files: {[f.name for f in section_files]}")

        total_modified = 0

        for section_file in section_files:
            print(f"   [DEBUG] Processing: {section_file.name}")
            original_content = section_file.read_text(encoding='utf-8')
            print(f"   [DEBUG] File size: {len(original_content)} bytes")

            content = original_content  # 작업용 복사본

            # 🆕 머리말/꼬리말 영역 보존 (placeholder로 교체)
            header_footer_map = {}
            placeholder_idx = 0

            def save_header_footer(match):
                nonlocal placeholder_idx
                key = f"__HF_PLACEHOLDER_{placeholder_idx}__"
                header_footer_map[key] = match.group(0)
                placeholder_idx += 1
                return key

            # 머리말/꼬리말 임시 교체
            content = re.sub(r'<hp:header[^>]*>.*?</hp:header>', save_header_footer, content, flags=re.DOTALL)
            content = re.sub(r'<hp:footer[^>]*>.*?</hp:footer>', save_header_footer, content, flags=re.DOTALL)

            # 모든 <hp:p> 태그와 내부 텍스트 추출
            para_pattern = r'(<hp:p [^>]*>)(.*?)(</hp:p>)'

            section_modified = 0

            def replace_style(match):
                nonlocal total_modified, section_modified
                open_tag = match.group(1)
                inner = match.group(2)
                close_tag = match.group(3)

                # 텍스트 추출 (태그 제거)
                text = re.sub(r'<[^>]+>', '', inner).strip()
                if not text:
                    return match.group(0)

                # 텍스트 앞부분으로 역할 판단
                text_start = text[:50]  # 처음 50자로 판단

                matched_role = None
                matched_style_id = None
                matched_para_id = None
                matched_char_id = None

                # 제목 패턴 매칭 (앞에 특수문자 허용)
                # Unicode: ■\u25a0 ▸\u25b8 ◆\u25c6 ▶\u25b6 ●\u25cf ○\u25cb ▪\u25aa ►\u25ba ☞\u261e ★\u2605 ※\u203b ·\u00b7
                prefix = r'^[\u25a0\u25b8\u25c6\u25b6\u25cf\u25cb\u25aa\u25ba\u261e\u2605\u203b\u00b7\s]*'

                # 🆕 FIGURE_CAPTION: "[그림 1-1]", "[그림 1-2]" 등 (가장 먼저 체크!)
                # 그림 = \uadf8\ub9bc
                if re.match(r'^\[\uadf8\ub9bc\s*[\d-]+\]', text_start):
                    matched_role = 'FIGURE_CAPTION'
                # 🆕 TABLE_CAPTION: "<표 1-1>", "[표 1-1]" 등
                # 표 = \ud45c
                elif re.match(r'^[<\[]\ud45c\s*[\d-]+[>\]]', text_start):
                    matched_role = 'TABLE_CAPTION'
                # H1: "제1장", "1 개요" 등
                elif re.match(prefix + r'\uc81c?\s*\d+\uc7a5?\s', text_start) or re.match(prefix + r'[1-9]\s+[\uac00-\ud7a3]', text_start):
                    matched_role = 'H1'
                # H3: "1.1.1 " (H2보다 먼저 체크!)
                elif re.match(prefix + r'\d+\.\d+\.\d+\s', text_start):
                    matched_role = 'H3'
                # H2: "1.1 "
                elif re.match(prefix + r'\d+\.\d+\s', text_start):
                    matched_role = 'H2'
                # H4: "가. "
                elif re.match(prefix + r'[\uac00-\ud7a3]\.\s', text_start):
                    matched_role = 'H4'
                # H5: "1) "
                elif re.match(prefix + r'\d+\)\s', text_start):
                    matched_role = 'H5'
                # H6: "(1) " 또는 "가) "
                elif re.match(prefix + r'\(\d+\)\s', text_start):
                    matched_role = 'H6'
                elif re.match(prefix + r'[\uac00-\ud7a3]\)\s', text_start):
                    matched_role = 'H6'
                # LIST_ITEM: "○ ", "● ", "• " 등
                elif re.match(r'^[\u25cb\u25cf\u25e6\u2022\u2023\u25b8]\s', text_start):
                    matched_role = 'LIST_ITEM'
                elif re.match(r'^[-\u2013\u2014]\s', text_start):
                    matched_role = 'LIST_ITEM'

                # 매칭된 역할이 있고 스타일 ID가 있으면 적용
                if matched_role and matched_role in self.role_to_style_id:
                    matched_style_id = self.role_to_style_id[matched_role]
                    matched_para_id = self.role_to_para_id[matched_role]
                    matched_char_id = self.role_to_char_id[matched_role]
                elif 'BODY' in self.role_to_style_id and len(text) > 20:
                    # 긴 텍스트는 본문으로 간주
                    matched_role = 'BODY'
                    matched_style_id = self.role_to_style_id['BODY']
                    matched_para_id = self.role_to_para_id['BODY']
                    matched_char_id = self.role_to_char_id['BODY']

                if matched_style_id:
                    # 1. hp:p 태그의 styleIDRef 변경
                    if 'styleIDRef="' in open_tag:
                        new_open = re.sub(r'styleIDRef="[^"]*"', f'styleIDRef="{matched_style_id}"', open_tag)
                    else:
                        new_open = open_tag.replace('<hp:p ', f'<hp:p styleIDRef="{matched_style_id}" ')

                    # 2. hp:p 태그의 paraPrIDRef도 변경! (스타일의 paraPrIDRef와 일치!)
                    new_open = re.sub(r'paraPrIDRef="[^"]*"', f'paraPrIDRef="{matched_para_id}"', new_open)

                    # 3. inner에서 hp:run의 charPrIDRef도 변경! (스타일의 charPrIDRef와 일치!)
                    new_inner = re.sub(r'(<hp:run[^>]*charPrIDRef=")[^"]*(")', f'\\g<1>{matched_char_id}\\2', inner)

                    # 🆕 4. 개요 문단이면 수동 번호 제거 (자동 번호가 붙으니까!)
                    if matched_role in ROLE_STYLES and ROLE_STYLES[matched_role].outline_level >= 0:
                        new_inner = self._remove_manual_numbering(new_inner, matched_role)

                    total_modified += 1
                    section_modified += 1
                    return new_open + new_inner + close_tag

                return match.group(0)

            new_content = re.sub(para_pattern, replace_style, content, flags=re.DOTALL)

            # 🆕 표 크기 자동 조정
            new_content = self._adjust_tables(new_content)

            # 🆕 outlineShapeIDRef를 1로 변경 (우리가 교체한 numbering id=1 사용)
            new_content = re.sub(
                r'outlineShapeIDRef="[^"]*"',
                'outlineShapeIDRef="1"',
                new_content
            )


            # 🆕 머리말/꼬리말 복원
            for key, original in header_footer_map.items():
                new_content = new_content.replace(key, original)

            print(f"   [DEBUG] {section_file.name}: {section_modified} paras modified, content changed: {new_content != original_content}")

            if new_content != original_content:
                section_file.write_text(new_content, encoding='utf-8')
                print(f"   -> {section_file.name} saved")

        print(f"   -> Total {total_modified} paragraphs styled")

    def _update_para_style(self, content: str, para_idx: int, style_id: int) -> str:
        """특정 인덱스의 문단 styleIDRef 변경"""
        # <hp:p ...> 태그들 찾기
        pattern = r'<hp:p\s[^>]*>'
        matches = list(re.finditer(pattern, content))

        if para_idx >= len(matches):
            return content

        match = matches[para_idx]
        old_tag = match.group(0)

        # styleIDRef 속성 변경 또는 추가
        if 'styleIDRef=' in old_tag:
            new_tag = re.sub(r'styleIDRef="[^"]*"', f'styleIDRef="{style_id}"', old_tag)
        else:
            # 속성 추가
            new_tag = old_tag.replace('<hp:p ', f'<hp:p styleIDRef="{style_id}" ')

        return content[:match.start()] + new_tag + content[match.end():]

    def _remove_manual_numbering(self, inner: str, role: str) -> str:
        """🆕 개요 문단에서 수동 번호 제거 (자동 번호가 붙으니까!)

        HTML에서 "제1장 DX 개요" → "DX 개요" (자동으로 "제1장" 붙음)
        HTML에서 "1.1 측량 DX" → "측량 DX" (자동으로 "1.1" 붙음)
        """
        # 역할별 번호 패턴
        patterns = {
            'H1': r'^(제\s*\d+\s*장\s*)',          # "제1장 " → 제거
            'H2': r'^(\d+\.\d+\s+)',               # "1.1 " → 제거
            'H3': r'^(\d+\.\d+\.\d+\s+)',          # "1.1.1 " → 제거
            'H4': r'^([가-힣]\.\s+)',              # "가. " → 제거
            'H5': r'^(\d+\)\s+)',                  # "1) " → 제거
            'H6': r'^([가-힣]\)\s+|\(\d+\)\s+)',   # "가) " 또는 "(1) " → 제거
            'H7': r'^([①②③④⑤⑥⑦⑧⑨⑩]+\s*)',   # "① " → 제거
        }

        if role not in patterns:
            return inner

        pattern = patterns[role]

        # <hp:t> 태그 내 텍스트에서 번호 제거
        def remove_number(match):
            text = match.group(1)
            # 첫 번째 <hp:t> 내용에서만 번호 제거
            new_text = re.sub(pattern, '', text, count=1)
            return f'<hp:t>{new_text}</hp:t>'

        # 첫 번째 hp:t 태그만 처리
        new_inner = re.sub(r'<hp:t>([^<]*)</hp:t>', remove_number, inner, count=1)

        return new_inner

    def _repack_hwpx(self, output_path: str):
        """HWPX 재압축"""
        print(f"   [DEBUG] Repacking to: {output_path}")
        print(f"   [DEBUG] Source dir: {self.temp_dir}")

        # 압축 전 section 파일 크기 확인
        for sec in ['section0.xml', 'section1.xml', 'section2.xml']:
            sec_path = self.temp_dir / "Contents" / sec
            if sec_path.exists():
                print(f"   [DEBUG] {sec} size before zip: {sec_path.stat().st_size} bytes")

        # 🆕 임시 파일에 먼저 저장 (원본 파일 잠금 문제 회피)
        temp_output = output_path + ".tmp"

        with zipfile.ZipFile(temp_output, 'w', zipfile.ZIP_DEFLATED) as zf:
            # mimetype은 압축 없이 첫 번째로
            mimetype_path = self.temp_dir / "mimetype"
            if mimetype_path.exists():
                zf.write(mimetype_path, "mimetype", compress_type=zipfile.ZIP_STORED)

            # 나머지 파일들
            file_count = 0
            for root, dirs, files in os.walk(self.temp_dir):
                for file in files:
                    if file == "mimetype":
                        continue
                    file_path = Path(root) / file
                    arcname = file_path.relative_to(self.temp_dir)
                    zf.write(file_path, arcname)
                    file_count += 1

            print(f"   [DEBUG] Total files zipped: {file_count}")

        # 🆕 원본 삭제 후 임시 파일을 원본 이름으로 변경
        import time
        for attempt in range(3):
            try:
                if os.path.exists(output_path):
                    os.remove(output_path)
                os.rename(temp_output, output_path)
                break
            except PermissionError:
                print(f"   [DEBUG] 파일 잠금 대기 중... ({attempt + 1}/3)")
                time.sleep(0.5)
        else:
            # 3번 시도 실패 시 임시 파일 이름으로 유지
            print(f"   [경고] 원본 덮어쓰기 실패, 임시 파일 사용: {temp_output}")
            output_path = temp_output

        # 압축 후 결과 확인
        print(f"   [DEBUG] Output file size: {Path(output_path).stat().st_size} bytes")


def inject_styles_to_hwpx(hwpx_path: str, elements: list) -> str:
    """
    편의 함수: StyledElement 리스트로부터 역할 위치 추출 후 스타일 주입

    Args:
        hwpx_path: HWPX 파일 경로
        elements: StyleAnalyzer의 StyledElement 리스트

    Returns:
        수정된 HWPX 파일 경로
    """
    # 역할별 위치 수집
    # 참고: 현재는 section 0, para 순서대로 가정
    role_positions: Dict[str, List[tuple]] = {}

    for idx, elem in enumerate(elements):
        role = elem.role
        if role not in role_positions:
            role_positions[role] = []
        # (section_idx, para_idx) - 현재는 section 0 가정
        role_positions[role].append((0, idx))

    injector = HwpxStyleInjector()
    return injector.inject(hwpx_path, role_positions)


# 테스트
if __name__ == "__main__":
    # 테스트용
    test_positions = {
        'H1': [(0, 0), (0, 5)],
        'H2': [(0, 1), (0, 6)],
        'BODY': [(0, 2), (0, 3), (0, 4)],
    }

    # injector = HwpxStyleInjector()
    # injector.inject("test.hwpx", test_positions)
    print("HwpxStyleInjector 모듈 로드 완료")