C.E.L_Slide_test2/src/block_assembler_b2.py

"""유형 B'' 조립 함수 — slide-base.html + 블록 템플릿 사용.

변경 이력:
- 기존: f-string 하드코딩 HTML
- 현재: slide-base.html 래핑 + templates/blocks/ 블록 Jinja2 렌더링 + font_hierarchy 적용

원칙:
- 블록 CSS의 글씨 크기를 font_hierarchy에 맞게 조정 (프로세스 내 조정)
- 콘텐츠는 PipelineContext에서 가져옴 (하드코딩 아님)
- 블록은 콘텐츠에 맞게 재구성 (items 수 동적)
"""
from __future__ import annotations

import base64
import re
from pathlib import Path
from typing import TYPE_CHECKING

from jinja2 import Environment, FileSystemLoader

if TYPE_CHECKING:
    from src.pipeline_context import PipelineContext

BLOCKS_DIR = Path("templates/blocks")
SVG_DIR = BLOCKS_DIR / "svg"

_env = Environment(loader=FileSystemLoader(str(BLOCKS_DIR)), autoescape=False)


def _img_b64(filename: str) -> str:
    """SVG/PNG → data URI."""
    p = SVG_DIR / filename
    if not p.exists():
        return ""
    ext = "svg+xml" if filename.endswith(".svg") else "png"
    return f"data:image/{ext};base64," + base64.b64encode(p.read_bytes()).decode()


def _strip_comments(html: str) -> str:
    return re.sub(r'<!--.*?-->', '', html, flags=re.DOTALL).strip()


def _render_slide_base(title: str, body_html: str, footer_text: str) -> str:
    """slide-base.html로 래핑. 공통 함수."""
    sb = _strip_comments((BLOCKS_DIR / "slide-base.html").read_text(encoding="utf-8"))
    r = sb.replace('{{ title|default("슬라이드") }}', title)
    r = r.replace('{{ title|default("슬라이드 제목") }}', title)
    r = r.replace('{% block body %}{% endblock %}', body_html)

    pill = _img_b64("pill_scroll.png")
    r = r.replace('{% if footer_text %}', '').replace('{% if footer_pill_bg %}', '')
    r = r.replace('{{ footer_pill_bg }}', pill).replace('{% else %}', '')
    r = r.replace('<div class="slide-footer-bg slide-footer--css"></div>', '')
    li = r.rfind('{% endif %}')
    if li > 0:
        r = r[:li] + r[li + len('{% endif %}'):]
    r = r.replace('{% endif %}', '').replace('{{ footer_text|safe }}', footer_text)
    r = r.replace('src="svg/bg_slide_texture.png"', f'src="{_img_b64("bg_slide_texture.png")}"')
    r = r.replace('src="svg/line_divider.svg"', f'src="{_img_b64("line_divider.svg")}"')
    return r


def _assemble_slide_html_type_b_double_prime(ctx: "PipelineContext", title_text: str = "") -> str:
    """유형 B'' — slide-base.html + 블록 템플릿 + font_hierarchy.

    블록 선택: PipelineContext.references에서 가져옴.
    콘텐츠: PipelineContext.normalized.sections + structured_text에서 가져옴.
    글씨 크기: font_hierarchy(core/bg/sidebar/key_msg)에서 가져옴.
    """
    font_h = ctx.font_hierarchy
    title = title_text or ctx.analysis.title or ""
    core_message = ctx.analysis.core_message or ""
    ps = ctx.page_structure.roles
    norm_sections = ctx.normalized.sections or []
    norm_tables = ctx.normalized.tables or []
    enh = ctx.enhancement_result or {}
    bold_kw = enh.get("bold_keywords", {}) if isinstance(enh.get("bold_keywords"), dict) else {}

    # zone 분류
    zones = {}
    for role_name, info in ps.items():
        if isinstance(info, dict):
            zones[info.get("zone", "")] = (role_name, info)

    top_role = zones.get("top")
    bl_role = zones.get("bottom_left")
    br_role = zones.get("bottom_right")
    footer_role = zones.get("footer")

    def _bold(text, role=""):
        for kw in bold_kw.get(role, []):
            if kw in text:
                text = text.replace(kw, f"<strong>{kw}</strong>")
        return text

    # ── 상단: 블록 레퍼런스에서 block_id 확인 → 블록 템플릿 렌더링 ──
    top_html = _render_top_zone(ctx, norm_sections, font_h, _bold)

    # ── 하단: process-product-2col 또는 블록 레퍼런스 기반 ──
    bottom_html = _render_bottom_zone(ctx, norm_sections, norm_tables, font_h, _bold)

    # ── font_hierarchy CSS override ──
    font_css = f"""<style>
/* font_hierarchy: key_msg={font_h.key_msg}px, core={font_h.core}px, bg={font_h.bg}px, sidebar={font_h.sidebar}px */
.p3c-heading {{ font-size: {font_h.core}px !important; line-height: 1.5 !important; }}
.p3c-desc {{ font-size: {font_h.sidebar}px !important; line-height: 1.6 !important; }}
.p3c-desc .bul {{ padding-left: 12px; text-indent: -12px; }}
.p3c-vlabel {{ font-size: {font_h.key_msg}px !important; }}
.p3c-vlabel-sub {{ font-size: {font_h.core}px !important; }}
.p3c-kanji {{ display: none !important; }}
.p3c-vlabel-area {{ width: 56px !important; }}
.p3c-section {{ left: 60px !important; right: 6px !important; }}
.p3c-mid-line {{ left: 56px !important; }}
.p3c-col {{ min-height: 0 !important; height: 100% !important; }}
.block-p3c {{ height: 100% !important; }}
.pp2-header-text {{ font-size: {font_h.core + 1}px !important; font-weight: 900 !important; }}
.pp2-header-text--right {{ color: #ffffff !important; }}
.pp2-mid-title {{ font-size: {font_h.core}px !important; line-height: 1.5 !important; margin-top: 4px !important; }}
.pp2-mid-title:first-child {{ margin-top: 0 !important; }}
.pp2-body-text {{ font-size: {font_h.sidebar}px !important; line-height: 1.6 !important; padding-left: 12px !important; text-indent: -12px !important; font-weight: 500 !important; }}
</style>"""

    # ── zone 제목 추출 ──
    # 상단: 첫 번째 level=2 (콘텐츠 없는 대제목)
    # 하단: level=3 직전의 level=2 (하단 대제목)
    top_zone_title = ""
    bottom_zone_title = ""
    for i, s in enumerate(norm_sections):
        if s.get("level") == 2:
            if not s.get("content", "").strip():
                # 콘텐츠 없는 level=2 = zone 제목
                # 다음 section이 level=3이면 하단 제목
                if i + 1 < len(norm_sections) and norm_sections[i + 1].get("level") == 3:
                    bottom_zone_title = s.get("title", "")
                elif not top_zone_title:
                    top_zone_title = s.get("title", "")

    # ── 조립 ──
    body = f"""{font_css}
<div style="height:38%;margin-bottom:1%;padding-top:8px;">
  <div style="font-weight:700;font-size:{font_h.core + 1}px;color:#1a365d;margin-bottom:8px;">
    {top_zone_title}
  </div>
  <div style="height:calc(100% - 28px);padding:0 12px 0 24px;">{top_html}</div>
</div>
<div style="height:60%;margin-top:12px;">
  <div style="font-weight:700;font-size:{font_h.core + 1}px;color:#1a365d;margin-bottom:8px;">
    {bottom_zone_title}
  </div>
  <div style="height:calc(100% - 28px);padding:0 12px 0 24px;">{bottom_html}</div>
</div>"""

    footer_text_html = f'{core_message}'.replace(
        '기대할 수 있다', '<em>기대할 수 있다</em>'
    ) if core_message else ""

    return _render_slide_base(title, body, footer_text_html)


def _get_zone_title(sections, level=2, index=0):
    """normalized.sections에서 level=N인 제목을 index번째 가져옴."""
    count = 0
    for s in sections:
        if s.get("level") == level:
            if count == index:
                return s.get("title", "")
            count += 1
    return ""


def _render_top_zone(ctx, sections, font_h, bold_fn):
    """상단 zone 렌더링 — normalized sections의 level=2 카테고리를 직접 사용."""
    # 상단 topic_ids에 해당하는 sections 가져오기
    ps = ctx.page_structure.roles
    top_zone = None
    for role_name, info in ps.items():
        if isinstance(info, dict) and info.get("zone") == "top":
            top_zone = (role_name, info)
            break

    if not top_zone:
        return "<div>상단 zone 없음</div>"

    top_topic_ids = top_zone[1].get("topic_ids", [])
    topic_map = {t.id: t for t in ctx.topics}

    # 각 topic의 structured_text 또는 normalized section에서 콘텐츠 가져오기
    categories = []
    for tid in top_topic_ids:
        topic = topic_map.get(tid)
        if not topic:
            continue

        cat_name = topic.title or ""
        # structured_text 우선, 없으면 normalized sections에서 찾기
        content = topic.structured_text or ""
        if not content:
            for s in sections:
                if s.get("title") == cat_name and s.get("content"):
                    content = s["content"]
                    break

        if not content:
            continue

        # D1/D2 마커 기반 파싱
        headings = []
        current_heading = None
        for line in content.split("\n"):
            stripped = line.strip()
            if not stripped:
                continue
            stripped = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', stripped)

            dm = re.match(r'^D(\d+):\s*', stripped)
            depth = int(dm.group(1)) if dm else 0
            if dm:
                stripped = re.sub(r'^D\d+:\s*', '', stripped)
            clean = stripped.lstrip("•- ").strip()
            if not clean:
                continue

            if depth <= 1 and '<strong>' in clean:
                current_heading = {"title": clean, "bullets": []}
                headings.append(current_heading)
            else:
                if current_heading:
                    current_heading["bullets"].append(clean)
                elif headings:
                    headings[-1]["bullets"].append(clean)
                else:
                    headings.append({"title": "", "bullets": [clean]})

        categories.append({"name": cat_name, "headings": headings})
        import logging
        logging.getLogger(__name__).info(f"[B'' top] cat={cat_name}, headings={len(headings)}")

    if not categories:
        return "<div>콘텐츠 없음</div>"

    # 블록 CSS 가져오기
    p3c_raw = (BLOCKS_DIR / "new" / "prerequisites-3col.html").read_text(encoding="utf-8")
    p3c_css = re.search(r'<style>(.*?)</style>', p3c_raw, re.DOTALL)
    css_html = p3c_css.group(0) if p3c_css else ""

    # 동적 열 생성
    bar_gradients = [
        "linear-gradient(180deg, #0D78D0 0%, #023056 100%)",
        "linear-gradient(180deg, #FF9A23 0%, #CC5200 100%)",
        "linear-gradient(180deg, #39BE49 0%, #23742C 100%)",
        "linear-gradient(180deg, #7c3aed 0%, #4c1d95 100%)",
    ]
    heading_gradients = [
        "linear-gradient(180deg, #0D78D0 0%, #134D7F 100%)",
        "linear-gradient(180deg, #CC5200 0%, #883700 100%)",
        "linear-gradient(180deg, #39BE49 0%, #1E6328 100%)",
        "linear-gradient(180deg, #7c3aed 0%, #5b21b6 100%)",
    ]

    cols_html = ""
    for ci, cat in enumerate(categories):
        # 카테고리명에서 "기술(디지털)" → name="기술", sub="디지털"
        name_match = re.match(r'^(.+?)[（(](.+?)[）)]$', cat["name"])
        if name_match:
            name, sub = name_match.group(1), name_match.group(2)
        else:
            name, sub = cat["name"], ""

        bar = bar_gradients[ci % len(bar_gradients)]
        hgrad = heading_gradients[ci % len(heading_gradients)]

        # 항목 HTML — 동적 items 수
        items = cat["headings"]
        n = max(len(items), 1)
        items_html = ""
        for i, item in enumerate(items):
            if not item["title"] and not item["bullets"]:
                continue
            pct_h = int(95 / n)
            pct_top = int(3 + i * (95 / n))
            bul = "".join(f'<div class="bul">• {b}</div>' for b in item["bullets"])
            items_html += f"""
            <div class="p3c-section" style="position:absolute;left:60px;right:6px;top:{pct_top}%;height:{pct_h}%;">
              <div class="p3c-heading" style="background-image:{hgrad}">{item['title']}</div>
              <div class="p3c-desc">{bul}</div>
            </div>"""
            if i < n - 1 and len(items) > 1:
                line_top = pct_top + pct_h
                items_html += f'<div class="p3c-mid-line" style="position:absolute;left:56px;right:0;top:{line_top}%;border-top:1.2px dashed #000;"></div>'

        cols_html += f"""
        <div class="p3c-col" style="flex:1;position:relative;height:100%;border-top:1.2px solid #000;border-bottom:1.2px solid #000;">
          <div class="p3c-bar" style="background:{bar};position:absolute;left:0;top:0;width:56px;height:100%;"></div>
          <div class="p3c-vlabel-area" style="position:absolute;left:0;top:0;width:56px;height:100%;display:flex;flex-direction:column;align-items:center;justify-content:center;gap:4px;z-index:3;">
            <div class="p3c-vlabel">{name}</div>
            {'<div class="p3c-vlabel-sub">' + sub + '</div>' if sub else ''}
          </div>
          {items_html}
        </div>"""

    return f'<div class="block-p3c" style="display:flex;gap:12px;width:100%;height:100%;">{cols_html}</div>\n{css_html}'


def _render_bottom_zone(ctx, sections, tables, font_h, bold_fn):
    """하단 zone 렌더링 — 좌우 2분할, 소제목 행 정렬."""
    # 하단 콘텐츠: level=3인 sections
    sub_secs = []
    for s in sections:
        if s.get("level") == 3:
            sub_secs.append((s.get("title", ""), s.get("content", "")))

    if not sub_secs:
        return "<div>하단 콘텐츠 없음</div>"

    # 좌/우 분리 (첫 번째 sub_sec가 좌, 두 번째가 우)
    left_title = sub_secs[0][0] if sub_secs else ""
    right_title = sub_secs[1][0] if len(sub_secs) > 1 else ""

    # 좌측 소제목+불릿 파싱
    left_items = _parse_sub_content(sub_secs[0][1] if sub_secs else "", tables, bold_fn)
    right_items = _parse_sub_content(sub_secs[1][1] if len(sub_secs) > 1 else "", [], bold_fn)

    # 좌우 소제목 행 매칭
    max_rows = max(len(left_items), len(right_items))
    while len(left_items) < max_rows:
        left_items.append(("", []))
    while len(right_items) < max_rows:
        right_items.append(("", []))

    # 블록 CSS
    pp2_raw = (BLOCKS_DIR / "BEPs" / "process-product-2col.html").read_text(encoding="utf-8")
    pp2_css = re.search(r'<style>(.*?)</style>', pp2_raw, re.DOTALL)
    css_html = pp2_css.group(0) if pp2_css else ""

    arrow_uri = _img_b64("arrow_asis_tobe.png")

    # Grid 생성 — 행 높이 동기화 + 전체 열 gradient
    rows_html = ""
    for i, ((lt, lbullets), (rt, rbullets)) in enumerate(zip(left_items, right_items)):
        pad = "3px 16px" if i == 0 else "2px 16px"

        # 좌측
        left_cell = f'<div style="padding:{pad};">'
        if lt:
            left_cell += f'<div class="pp2-mid-title pp2-mid-title--left">{lt}</div>'
        # 테이블 (As-is → To-be) 이 있으면 첫 번째 행에 삽입
        if i == 0 and tables:
            left_cell += _render_compare_table(tables[0], arrow_uri, font_h)
        for b in lbullets:
            left_cell += f'<div class="pp2-body-text">• {b}</div>'
        left_cell += '</div>'

        # 우측
        right_cell = f'<div style="padding:{pad};">'
        if rt:
            right_cell += f'<div class="pp2-mid-title pp2-mid-title--right">{rt}</div>'
        for b in rbullets:
            right_cell += f'<div class="pp2-body-text">• {b}</div>'
        right_cell += '</div>'

        rows_html += left_cell + right_cell

    # 헤더
    header_html = f"""
    <div class="pp2-header-bar pp2-header-bar--left" style="background:linear-gradient(270deg,#a4a096 0%,#39311e 100%);border-radius:0 24px 24px 0;display:flex;align-items:center;justify-content:center;height:30px;margin-top:4px;">
      <span class="pp2-header-text pp2-header-text--left" style="color:#3e3523;">{left_title}</span>
    </div>
    <div class="pp2-header-bar pp2-header-bar--right" style="background:linear-gradient(90deg,#296b55 0%,#022017 100%);border-radius:24px 0 0 24px;display:flex;align-items:center;padding-left:20px;height:30px;margin-top:4px;">
      <span class="pp2-header-text pp2-header-text--right">{right_title}</span>
    </div>"""

    return f"""
<div style="position:relative;width:100%;height:100%;">
  <div style="position:absolute;left:0;top:0;width:50%;height:100%;background:linear-gradient(180deg,#ffffff 46%,#39311e 100%);z-index:0;"></div>
  <div style="position:absolute;left:50%;top:0;width:50%;height:100%;background:linear-gradient(0deg,#296b55 0%,#ffffff 56%);z-index:0;"></div>
  <div style="position:relative;z-index:1;display:grid;grid-template-columns:1fr 1fr;width:100%;height:100%;">
    {header_html}
    {rows_html}
  </div>
</div>
{css_html}"""


def _parse_sub_content(content, tables, bold_fn):
    """하위 콘텐츠를 소제목+불릿 리스트로 파싱."""
    content = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', content)
    items = []
    current_title = ""
    current_bullets = []

    # 테이블 텍스트 (중복 제거용)
    table_texts = set()
    for td in tables:
        for h in td.get("headers", []):
            table_texts.add(h.strip().lstrip("*").rstrip("*"))
        for row in td.get("rows", []):
            for c in row:
                table_texts.add(str(c).strip().lstrip("*").rstrip("*"))

    for line in content.split("\n"):
        stripped = line.strip()
        if not stripped:
            continue

        # D마커
        dm = re.match(r'^D(\d+):\s*', stripped)
        if dm:
            stripped = re.sub(r'^D\d+:\s*', '', stripped)

        clean = stripped.lstrip("•- ").strip()
        clean_plain = re.sub(r'<[^>]+>', '', clean).strip()

        if clean_plain in table_texts or clean_plain == "➠":
            continue
        if re.search(r'\[핵심요약:', clean):
            break
        if not clean:
            continue

        # 소제목 감지 (볼드)
        if '<strong>' in clean and len(clean) < 80:
            if current_title or current_bullets:
                items.append((current_title, current_bullets))
            current_title = clean
            current_bullets = []
        else:
            current_bullets.append(clean)

    if current_title or current_bullets:
        items.append((current_title, current_bullets))

    return items


def _render_compare_table(table_data, arrow_uri, font_h):
    """As-is → To-be 비교 테이블 렌더링."""
    headers = table_data.get("headers", [])
    rows = table_data.get("rows", [])
    if not headers or not rows:
        return ""

    def _clean_md(text):
        """**볼드** 마크다운 제거 — 테이블 셀은 일반 텍스트."""
        return re.sub(r'\*\*(.+?)\*\*', r'\1', str(text))

    html = '<div style="display:flex;align-items:center;gap:4px;margin-bottom:4px;">'
    html += '<div style="flex:1;">'
    for row in rows:
        html += f'<div class="pp2-body-text">• {_clean_md(row[0])}</div>'
    html += '</div>'
    html += f'<div style="flex-shrink:0;width:30px;text-align:center;"><img src="{arrow_uri}" style="width:30px;height:16px;object-fit:contain;" alt="→"></div>'
    html += '<div style="flex:1;">'
    for row in rows:
        val = row[2] if len(row) > 2 else ""
        html += f'<div class="pp2-body-text">• {_clean_md(val)}</div>'
    html += '</div></div>'
    return html