Phase W + V' 완료: before→filled→after 파이프라인 + 조립 로직 수정

Phase W: - weight 비율 초기 배정 (space_allocator header 높이 반영) - block_assembler 공통 조립 함수 (filled/assembled 통합) - filled → Selenium 측정 → context 저장 - sidebar overflow 확장 + body 재배분 - sub_layouts 사전 계산 (이미지 누락 해결) Phase V': - 팝업 링크 우측상단 배치 (인라인 → position:absolute) - 표 내용 Kei 판단 (공란 크기 계산 → 행/열 산출 → Kei 요약) - 출처 라벨 삭제 + 이미지 아래 캡션 배치 - after 공란 제거 (결론 바로 위까지 body/sidebar 채움) 추가: - V-10 bold 키워드: 기계적 추출 → Kei 문맥 판단 - ** 마크다운 → <strong> 변환 - [이미지:] 마커 제거 (bold 변환 전 처리) - grid-template-rows AFTER 크기 반영 (Sonnet final) - assemble_stage2 CSS font-size override, white-space fix - 하드코딩 전수 검토 완료 - 본심 여러 topic 텍스트 합침 Phase X 계획 문서 작성 (동적 역할 구조) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 05:00:52 +09:00
parent 24eb1bc5ad
commit 1f7579cf64
64 changed files with 13955 additions and 696 deletions
--- a/src/block_reference.py
+++ b/src/block_reference.py
@@ -0,0 +1,557 @@
+"""Phase T-3: 참고 블록 선택 + 디자인 레퍼런스 HTML 생성.
+
+Stage 1.7에서 호출. relation_type + expression_hint → 참고 블록 결정론적 선택.
+블록을 "채울 틀"이 아니라 "참고할 디자인"으로 제공.
+
+핵심 차이 (Phase P~R vs Phase T):
+  P~R: 블록 선택 → 슬롯에 텍스트 채우기 (실패 — 구조 경직)
+  T:   블록을 참고 자료로 제공 → AI가 구조를 자유롭게 결정 (유연 + 다양)
+
+설계 근거:
+  - expression_hint 키워드 포함 매칭 (정확한 문자열 아님 — T-3 조사)
+  - LLM이 참고 HTML 구조를 70-90% 복사 (T-3 조사) → "디자인 레퍼런스" 프레이밍
+  - Gestalt 원칙: 폐합→벤, 근접→좌우, 연속→화살표 (T-3 조사)
+  - PPTAgent(EMNLP 2025): 참고 기반 생성의 효과 학술 입증
+"""
+from __future__ import annotations
+
+import logging
+import re
+from pathlib import Path
+from typing import Any
+
+import yaml
+from jinja2 import Environment, FileSystemLoader
+
+logger = logging.getLogger(__name__)
+
+# 템플릿 디렉토리
+TEMPLATES_DIR = Path(__file__).parent.parent / "templates"
+
+# Jinja2 환경 (블록 HTML 렌더링용)
+_jinja_env = None
+
+def _get_jinja_env() -> Environment:
+    global _jinja_env
+    if _jinja_env is None:
+        _jinja_env = Environment(
+            loader=FileSystemLoader(str(TEMPLATES_DIR)),
+            autoescape=False,
+        )
+    return _jinja_env
+
+
+# ══════════════════════════════════════
+# expression_hint → 블록 매핑 (키워드 포함 매칭)
+# ══════════════════════════════════════
+
+# 시각적 유형별 매칭 키워드 + 대응 블록
+# T-3 조사: 10개 고유 expression_hint → 5개 시각 유형 + 향후 2개
+VISUAL_TYPE_KEYWORDS: dict[str, dict[str, Any]] = {
+    "인과": {
+        "keywords": ["인과", "현상->결과", "야기", "원인", "문제 상황"],
+        "blocks": ["callout-warning", "dark-bullet-list"],
+    },
+    "나열_병렬": {
+        "keywords": ["독립적 나열", "병렬 나열", "개별 증거", "병렬"],
+        "blocks": ["dark-bullet-list", "card-icon-desc"],
+    },
+    "나열_정의": {
+        "keywords": ["독립적 정의", "참조용", "용어", "정의 나열"],
+        "blocks": ["card-numbered"],
+    },
+    "포함_계층": {
+        "keywords": ["상위-하위", "포함 관계", "계층적", "포함하는", "구성요소"],
+        "blocks": ["venn-diagram", "keyword-circle-row"],
+    },
+    "강조_결론": {
+        "keywords": ["핵심 메시지 강조", "임팩트", "한 줄 강조", "결론적 판단"],
+        "blocks": ["banner-gradient", "quote-big-mark"],
+    },
+    "비교": {
+        "keywords": ["대등 비교", "좌우 대조", "vs", "A vs B"],
+        "blocks": ["compare-2col-split", "compare-3col-badge", "comparison-2col"],
+    },
+    "순서": {
+        "keywords": ["시간 순서", "단계별", "A->B->C", "프로세스 흐름"],
+        "blocks": ["flow-arrow-horizontal", "process-horizontal"],
+    },
+}
+
+# 카테고리별 fallback 블록 (모든 필터 통과 실패 시)
+CATEGORY_FALLBACK: dict[str, str] = {
+    "cards": "card-numbered",
+    "emphasis": "dark-bullet-list",
+    "visuals": "venn-diagram",
+    "tables": "compare-2col-split",
+    "media": "image-side-text",
+    "headers": "topic-left-right",
+}
+
+# relation_type → 1차 필터 블록 카테고리 매핑
+RELATION_CATEGORY_MAP: dict[str, list[str]] = {
+    "hierarchy": ["visuals", "emphasis"],
+    "inclusion": ["visuals", "emphasis"],
+    "comparison": ["tables", "emphasis", "cards"],
+    "sequence": ["visuals"],
+    "definition": ["cards", "emphasis"],
+    "cause_effect": ["emphasis"],
+    "none": ["emphasis"],
+}
+
+
+# ══════════════════════════════════════
+# 카탈로그 로딩 (mtime 캐싱)
+# ══════════════════════════════════════
+
+_catalog_cache: dict[str, Any] = {"data": None, "mtime": 0}
+
+
+def _load_catalog() -> list[dict]:
+    """catalog.yaml 로드 (mtime 캐싱)."""
+    path = TEMPLATES_DIR / "catalog.yaml"
+    mtime = path.stat().st_mtime
+    if _catalog_cache["data"] is not None and _catalog_cache["mtime"] == mtime:
+        return _catalog_cache["data"]
+
+    data = yaml.safe_load(path.read_text(encoding="utf-8"))
+    blocks = data.get("blocks", [])
+    _catalog_cache["data"] = blocks
+    _catalog_cache["mtime"] = mtime
+    return blocks
+
+
+def _get_block_by_id(block_id: str) -> dict | None:
+    """블록 ID로 카탈로그 엔트리 조회."""
+    for b in _load_catalog():
+        if b["id"] == block_id:
+            return b
+    return None
+
+
+# ══════════════════════════════════════
+# 블록 선택 (2단계 필터)
+# ══════════════════════════════════════
+
+def _match_visual_type(expression_hint: str) -> tuple[str, list[str]]:
+    """expression_hint에서 키워드를 찾아 시각적 유형과 후보 블록 반환.
+
+    키워드 포함(substring) 매칭 — 정확한 문자열 매칭이 아님.
+    T-3 조사: expression_hint는 긴 문장이므로 부분 매칭 필수.
+    """
+    for vtype, spec in VISUAL_TYPE_KEYWORDS.items():
+        if any(kw in expression_hint for kw in spec["keywords"]):
+            return vtype, spec["blocks"]
+    return "default", []
+
+
+# 배경 역할에서 제외할 다크 계열 블록
+DARK_BLOCKS = {"dark-bullet-list", "card-dark-overlay"}
+
+
+def select_reference_block(
+    relation_type: str,
+    expression_hint: str,
+    container_height_px: int,
+    zone: str = "body",
+    role: str = "",
+) -> dict[str, Any]:
+    """참고 블록 선택 (2단계 필터 + 역할 제약 + 컨테이너 적합성 + fallback).
+
+    Returns:
+        {
+            "block_id": str,
+            "variant": str,
+            "visual_type": str,
+            "catalog_entry": dict,  # catalog.yaml의 해당 블록 전체
+        }
+    """
+    catalog = _load_catalog()
+
+    # ── 1차 필터: relation_type → 카테고리 ──
+    allowed_categories = RELATION_CATEGORY_MAP.get(relation_type, ["emphasis"])
+    candidates_1 = [
+        b for b in catalog
+        if b.get("category") in allowed_categories
+    ]
+
+    # ── 2차 필터: expression_hint 키워드 매칭 ──
+    visual_type, hint_blocks = _match_visual_type(expression_hint)
+    if hint_blocks:
+        candidates_2 = [b for b in candidates_1 if b["id"] in hint_blocks]
+        if not candidates_2:
+            candidates_2 = [b for b in catalog if b["id"] in hint_blocks]
+    else:
+        candidates_2 = candidates_1
+
+    # ── TP-1: 배경 역할은 다크 블록 제외 ──
+    if role == "배경":
+        candidates_2 = [b for b in candidates_2 if b["id"] not in DARK_BLOCKS]
+        if not candidates_2:
+            # 다크 제외 후 후보 없으면 라이트 fallback
+            candidates_2 = [b for b in candidates_1 if b["id"] not in DARK_BLOCKS]
+
+    # ── 3차 필터: 컨테이너 크기 적합성 ──
+    candidates_3 = [
+        b for b in candidates_2
+        if b.get("min_height_px", 0) <= container_height_px
+    ]
+
+    # ── sidebar 제약: visuals/media 금지 ──
+    if zone == "sidebar":
+        candidates_3 = [
+            b for b in candidates_3
+            if b.get("category") not in ("visuals", "media")
+            and b.get("zone") != "full-width-only"
+        ]
+
+    # ── 최종 선택 ──
+    if candidates_3:
+        selected = candidates_3[0]
+    elif candidates_2:
+        selected = candidates_2[0]  # 크기 안 맞아도 최선
+        logger.warning(
+            f"[T-3] 컨테이너({container_height_px}px)에 맞는 블록 없음. "
+            f"최선 선택: {selected['id']} (min_height_px={selected.get('min_height_px')})"
+        )
+    else:
+        # fallback: 카테고리별 기본 블록
+        fallback_category = allowed_categories[0] if allowed_categories else "emphasis"
+        fallback_id = CATEGORY_FALLBACK.get(fallback_category, "dark-bullet-list")
+        selected = _get_block_by_id(fallback_id) or catalog[0]
+        visual_type = "fallback"
+        logger.warning(f"[T-3] 후보 없음. fallback: {selected['id']}")
+
+    # variant 선택: compact variant가 있고, 컨테이너가 블록 min_height_px 근처면 compact
+    variant = "default"
+    variants = selected.get("variants", [])
+    block_min_h = selected.get("min_height_px", 0)
+    if variants:
+        for v in variants:
+            # compact: 컨테이너 높이가 블록 min_height의 2배 미만이면 compact 사용
+            if v.get("id") == "compact" and container_height_px < block_min_h * 2:
+                variant = "compact"
+                break
+
+    return {
+        "block_id": selected["id"],
+        "variant": variant,
+        "visual_type": visual_type,
+        "catalog_entry": selected,
+    }
+
+
+# ══════════════════════════════════════
+# 디자인 레퍼런스 HTML 생성
+# ══════════════════════════════════════
+
+# 블록별 샘플 데이터 (Jinja2 변수 치환용)
+_SAMPLE_DATA: dict[str, dict[str, Any]] = {
+    # emphasis
+    "dark-bullet-list": {
+        "title": "핵심 요약",
+        "bullets": ["첫 번째 포인트", "두 번째 포인트", "세 번째 포인트"],
+    },
+    "callout-warning": {
+        "title": "주의사항",
+        "description": "현재 접근 방식에 잠재적 문제가 있습니다.",
+        "icon": "⚠️",
+    },
+    "callout-solution": {
+        "title": "해결 방향",
+        "description": "체계적 접근이 필요합니다.",
+        "icon": "💡",
+    },
+    "banner-gradient": {
+        "text": "핵심 메시지 한 줄",
+        "sub_text": "부연 설명",
+    },
+    "comparison-2col": {
+        "left_title": "항목 A",
+        "left_content": "A의 특징과 설명",
+        "right_title": "항목 B",
+        "right_content": "B의 특징과 설명",
+    },
+    "quote-big-mark": {
+        "quote_text": "중요한 인용문 텍스트",
+        "source": "출처",
+    },
+    # cards
+    "card-numbered": {
+        "items": [
+            {"title": "항목 1", "description": "첫 번째 항목 설명"},
+            {"title": "항목 2", "description": "두 번째 항목 설명"},
+            {"title": "항목 3", "description": "세 번째 항목 설명"},
+        ],
+    },
+    "card-icon-desc": {
+        "cards": [
+            {"icon": "🏗️", "title": "기술 A", "description": "기술 A 설명"},
+            {"icon": "🌍", "title": "기술 B", "description": "기술 B 설명"},
+            {"icon": "🔮", "title": "기술 C", "description": "기술 C 설명"},
+        ],
+    },
+    # visuals
+    "venn-diagram": {
+        "center_label": "DX",
+        "center_sub": "디지털 전환",
+        "items": [
+            {"label": "BIM", "color": "#ff6b35"},
+            {"label": "GIS", "color": "#00d4aa"},
+            {"label": "DT", "color": "#ffd700"},
+        ],
+    },
+    "keyword-circle-row": {
+        "keywords": [
+            {"letter": "B", "label": "BIM", "description": "건물정보모델링"},
+            {"letter": "G", "label": "GIS", "description": "지리정보시스템"},
+            {"letter": "D", "label": "DX", "description": "디지털 전환"},
+        ],
+    },
+    "flow-arrow-horizontal": {
+        "steps": [
+            {"label": "분석"},
+            {"label": "설계"},
+            {"label": "시공"},
+            {"label": "관리"},
+        ],
+    },
+    "process-horizontal": {
+        "steps": [
+            {"number": "1", "title": "현황 분석", "description": "현재 상태 진단"},
+            {"number": "2", "title": "전략 수립", "description": "로드맵 설계"},
+            {"number": "3", "title": "실행", "description": "단계적 도입"},
+        ],
+    },
+    # tables
+    "compare-2col-split": {
+        "left_title": "기존",
+        "right_title": "개선",
+        "rows": [
+            {"left": "수작업", "center": "프로세스", "right": "자동화"},
+            {"left": "2D 도면", "center": "설계 도구", "right": "3D BIM"},
+        ],
+    },
+    "compare-3col-badge": {
+        "headers": ["구분", "항목 A", "항목 B"],
+        "rows": [
+            ["범위", "넓음", "좁음"],
+            ["목적", "혁신", "관리"],
+        ],
+    },
+}
+
+
+def generate_design_reference(
+    block_id: str,
+    variant: str = "default",
+    catalog_entry: dict | None = None,
+) -> str:
+    """블록의 디자인 레퍼런스 HTML 생성.
+
+    Jinja2 변수를 샘플 데이터로 치환한 완성 HTML + 구조 의도 주석.
+    LLM이 이 구조를 70~90% 복사 → "발명"하지 않고 검증된 구조를 따름.
+    """
+    if catalog_entry is None:
+        catalog_entry = _get_block_by_id(block_id)
+    if catalog_entry is None:
+        logger.warning(f"[T-3] 블록 {block_id} 카탈로그에 없음")
+        return ""
+
+    # 템플릿 경로 결정
+    template_path = catalog_entry.get("template", "")
+    if variant != "default":
+        for v in catalog_entry.get("variants", []):
+            if v.get("id") == variant and v.get("template"):
+                template_path = v["template"]
+                break
+
+    if not template_path:
+        logger.warning(f"[T-3] 블록 {block_id} 템플릿 경로 없음")
+        return ""
+
+    # 샘플 데이터로 Jinja2 렌더링
+    sample = _SAMPLE_DATA.get(block_id, {})
+
+    try:
+        env = _get_jinja_env()
+        template = env.get_template(template_path)
+        rendered = template.render(**sample)
+    except Exception as e:
+        logger.warning(f"[T-3] 블록 {block_id} 렌더링 실패: {e}")
+        # 렌더링 실패 시 템플릿 원본 반환 (Jinja 변수 포함)
+        try:
+            raw = (TEMPLATES_DIR / template_path).read_text(encoding="utf-8")
+            rendered = raw
+        except Exception:
+            return ""
+
+    # 구조 의도 주석 추가
+    visual = catalog_entry.get("visual", "")
+    visual_diff = catalog_entry.get("visual_diff", "")
+    when = catalog_entry.get("when", "")
+
+    header = f"<!-- {block_id}: {visual[:80]} -->\n"
+    if visual_diff:
+        header += f"<!-- 차별점: {visual_diff[:100]} -->\n"
+    header += f"<!-- 적합 상황: {when[:80]} -->\n"
+
+    # schema 정보를 SLOT 주석으로 변환
+    schema = catalog_entry.get("schema", {})
+    if schema:
+        schema_comments = []
+        for slot_name, spec in schema.items():
+            if slot_name.startswith("max_"):
+                body_val = spec.get("body", "")
+                schema_comments.append(f"<!-- SLOT: {slot_name} = {body_val} -->")
+            else:
+                ml = spec.get("max_lines", "?")
+                fs = spec.get("font_size", "?")
+                rc = spec.get("ref_chars", {}).get("body", "?")
+                schema_comments.append(
+                    f"<!-- SLOT: {slot_name} ({ml}줄, {fs}px, max {rc}자) -->"
+                )
+        header += "\n".join(schema_comments) + "\n"
+
+    return header + rendered
+
+
+def select_and_generate_references(
+    topics: list[dict[str, Any]],
+    containers: dict[str, Any],
+    page_structure: dict[str, Any],
+) -> dict[str, dict[str, Any]]:
+    """역할별 참고 블록 선택 + 디자인 레퍼런스 HTML 생성.
+
+    Stage 1.7에서 호출. 각 역할(본심/배경/첨부/결론)에 대해
+    relation_type + expression_hint 기반으로 참고 블록을 선택하고
+    디자인 레퍼런스 HTML을 생성.
+
+    Returns:
+        {"본심": {"block_id": ..., "design_reference_html": ..., ...}, ...}
+    """
+    references: dict[str, list[dict[str, Any]]] = {}
+    topic_map = {t.get("id"): t for t in topics}
+
+    for role, info in page_structure.items():
+        if not isinstance(info, dict):
+            continue
+        topic_ids = info.get("topic_ids", [])
+        if not topic_ids:
+            continue
+
+        # 컨테이너 정보
+        container = containers.get(role)
+        if container is None:
+            continue
+        if hasattr(container, "height_px"):
+            total_height_px = container.height_px
+            zone = container.zone
+        else:
+            total_height_px = container.get("height_px", 0)  # 이전 Stage에서 반드시 제공
+            zone = container.get("zone", "body")
+
+        # V-1 + Phase V: 같은 영역 꼭지들의 layer 관계에 따라 블록 구조 결정
+        # layer가 다르면 → 주종 관계 → 블록 1개 (주 꼭지 기준, 종속은 하위 요소)
+        # layer가 같으면 → 동급 → 블록 N개 병렬
+        topic_layers = {tid: topic_map.get(tid, {}).get("layer", "") for tid in topic_ids}
+        unique_layers = set(topic_layers.values())
+        is_hierarchical = len(unique_layers) > 1 and len(topic_ids) > 1
+
+        from src.fit_verifier import _load_design_tokens
+        _tokens = _load_design_tokens()
+        gap_between = _tokens["spacing_small"]
+
+        if is_hierarchical:
+            # 주종 관계: 주 꼭지(intro/core) 기준으로 블록 1개 선택
+            # 종속 꼭지(supporting)는 블록 안에 하위 요소로 포함
+            primary_tid = None
+            supporting_tids = []
+            # layer 우선순위: core > intro > supporting > conclusion
+            layer_priority = {"core": 0, "intro": 1, "conclusion": 2, "supporting": 3}
+            sorted_tids = sorted(topic_ids, key=lambda t: layer_priority.get(topic_layers.get(t, ""), 9))
+            primary_tid = sorted_tids[0]
+            supporting_tids = sorted_tids[1:]
+
+            primary_topic = topic_map.get(primary_tid, {})
+            relation_type = primary_topic.get("relation_type", "none")
+            expression_hint = primary_topic.get("expression_hint", "")
+
+            selection = select_reference_block(
+                relation_type=relation_type,
+                expression_hint=expression_hint,
+                container_height_px=total_height_px,
+                zone=zone,
+                role=role,
+            )
+            ref_html = generate_design_reference(
+                block_id=selection["block_id"],
+                variant=selection["variant"],
+                catalog_entry=selection["catalog_entry"],
+            )
+            schema_info = selection["catalog_entry"].get("schema", {})
+
+            # 블록 1개에 모든 꼭지 정보를 담음
+            role_refs = [{
+                "block_id": selection["block_id"],
+                "variant": selection["variant"],
+                "visual_type": selection["visual_type"],
+                "schema_info": schema_info,
+                "design_reference_html": ref_html,
+                "topic_id": primary_tid,
+                "supporting_topic_ids": supporting_tids,
+                "is_hierarchical": True,
+            }]
+            logger.info(
+                f"[V-1] {role}: 주종 관계 → 블록 1개 ({selection['block_id']}), "
+                f"주={primary_tid}, 종={supporting_tids}"
+            )
+        else:
+            # 동급: 꼭지별 블록 선택
+            topic_count = len(topic_ids)
+            available_for_topics = total_height_px - gap_between * max(0, topic_count - 1)
+            min_block_height = min(
+                (b.get("min_height_px", 0) for b in _load_catalog() if b.get("min_height_px", 0) > 0),
+                default=1,
+            )
+            per_topic_height = max(min_block_height, available_for_topics // topic_count)
+
+            role_refs = []
+            for tid in topic_ids:
+                topic = topic_map.get(tid, {})
+                relation_type = topic.get("relation_type", "none")
+                expression_hint = topic.get("expression_hint", "")
+
+                selection = select_reference_block(
+                    relation_type=relation_type,
+                    expression_hint=expression_hint,
+                    container_height_px=per_topic_height,
+                    zone=zone,
+                    role=role,
+                )
+                ref_html = generate_design_reference(
+                    block_id=selection["block_id"],
+                    variant=selection["variant"],
+                    catalog_entry=selection["catalog_entry"],
+                )
+
+                schema_info = selection["catalog_entry"].get("schema", {})
+
+                role_refs.append({
+                    "block_id": selection["block_id"],
+                    "variant": selection["variant"],
+                    "visual_type": selection["visual_type"],
+                    "schema_info": schema_info,
+                    "design_reference_html": ref_html,
+                    "topic_id": tid,
+                })
+
+                logger.info(
+                    f"[V-1] {role}/꼭지{tid}: {selection['block_id']} "
+                    f"(visual_type={selection['visual_type']}, variant={selection['variant']}, "
+                    f"budget={per_topic_height}px)"
+                )
+
+        references[role] = role_refs
+
+    return references