"""Phase Z-2 Composition Planner v0.

Pipeline 의 빠진 layer = MDX 덩어리들을 *최종 zone unit* 으로 묶는 결정 layer.

위치 :
  parse_mdx → align_sections_to_v4_granularity → [본 모듈] → render

원칙 (절대 룰) :
  - 특정 MDX / frame / section 하드코딩 X (예: "04-2 면" / "F16 이면")
  - 모든 결정 = catalog 메타 + V4 evidence parametric
  - 같은 코드가 MDX 02/03/04/05/06... 모두 처리 — 결과는 케이스마다 다름
  - drilling 결과 = 입력 (재료), composition planner 결과 = 출력 (zone units)
  - slide-level layout = zone 까지만 나눔. zone 내부 분할은 frame partial 책임

8 layout preset vocabulary :
  L1 single / L2 horizontal-2 / L3 vertical-2
  L4 top-1-bottom-2 / L5 top-2-bottom-1
  L6 left-1-right-2 / L7 left-2-right-1
  L8 grid-2x2
"""

from __future__ import annotations

import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional

import yaml


# ─── 8 Layout Preset Vocabulary — catalog-loaded (사용자 lock 2026-05-07) ───
#
# Source of truth = templates/phase_z2/layouts/layouts.yaml (사람이 보고 추가/수정 가능).
# 코드 hardcoded dict 폐기 (Step 7-A catalog 화). logic 변경 X — backward compat.
#
# catalog 의 추가 필드 (render_ready / default_selection / candidate_when) 는
# 기존 사용처에서 무시됨 — Step 7-B (multiple 후보) / Step 9 (layout × frame
# fit eval) 진입 시 입력.

_LAYOUTS_CATALOG_PATH = (
    Path(__file__).resolve().parent.parent
    / "templates" / "phase_z2" / "layouts" / "layouts.yaml"
)


def load_layout_presets() -> dict[str, dict]:
    """Load 8 layout presets from catalog.

    backward compat: returns same dict shape as old hardcoded LAYOUT_PRESETS —
    keys = layout id (single / horizontal-2 / ...),
    each value contains zones / topology / positions / css_areas / css_cols / css_rows.
    Additional fields (render_ready / default_selection / candidate_when)
    ignored by existing callers, consumed by Step 7-B / Step 9 (별 axis).
    """
    with open(_LAYOUTS_CATALOG_PATH, encoding="utf-8") as f:
        return yaml.safe_load(f) or {}


LAYOUT_PRESETS: dict[str, dict] = load_layout_presets()


def select_layout_candidates(unit_count: int) -> list[str]:
    """Return layout id candidates matching given unit_count.

    Step 7-B (사용자 lock 2026-05-07) — multiple 후보 generation.

    Args:
        unit_count: Final layout placement unit count (Step 4 output).
                    = section_count + promoted lead_orphans 등.
                    NOT raw MDX section count — Step 2 raw section count 가 아님.

    Returns:
        List of layout ids matching candidate_when.unit_count.
        Sort order:
          1. default_selection: true 먼저 (catalog 정의 순서)
          2. default_selection: false 그 다음 (catalog 정의 순서)
        Layouts with render_ready: false 는 제외.

    Raises:
        ValueError: if unit_count < 1 or > 4 (current catalog scope).

    Note:
        호출처 박힘 (Step 7-conn 2026-05-08) — phase_z2_pipeline.py 의
        step07 artifact 가 본 함수 결과 기록 (passive). 기존 select_layout_preset()
        은 default 결정 그대로. 후보 평가 / auto decision 은 Step 9 v1 (별 axis).
    """
    if unit_count < 1 or unit_count > 4:
        raise ValueError(
            f"unit_count {unit_count} out of catalog scope [1, 4]"
        )
    defaults: list[str] = []
    alternatives: list[str] = []
    for layout_id, spec in LAYOUT_PRESETS.items():
        if not spec.get("render_ready", False):
            continue
        cw = spec.get("candidate_when") or {}
        if cw.get("unit_count") != unit_count:
            continue
        if spec.get("default_selection", False):
            defaults.append(layout_id)
        else:
            alternatives.append(layout_id)
    return defaults + alternatives


# ─── Region Layout Catalog — Step 8-B-1 (사용자 lock 2026-05-07) ────────
#
# Source = templates/phase_z2/regions/region_layouts.yaml (SPEC §2.5).
# load 함수 + select_region_layout_candidates().
# 호출처 박힘 (Step 8-conn 2026-05-08) — phase_z2_pipeline.py 의 step08 artifact 가
# 본 함수 결과 기록 (placeholder signals: region_count=1, Step 3/4 부재 종속).

_REGION_LAYOUTS_CATALOG_PATH = (
    Path(__file__).resolve().parent.parent
    / "templates" / "phase_z2" / "regions" / "region_layouts.yaml"
)


def load_region_layouts() -> dict[str, dict]:
    """Load Internal Region layout catalog (SPEC §2.5, 6 entry).

    Returns same dict shape as catalog yaml.
    Step 7-A 와 같은 패턴 — source of truth = yaml, code 는 read 만.
    """
    with open(_REGION_LAYOUTS_CATALOG_PATH, encoding="utf-8") as f:
        return yaml.safe_load(f) or {}


REGION_LAYOUTS: dict[str, dict] = load_region_layouts()


def select_region_layout_candidates(
    region_count: int,
    content_type_mix: Optional[list[str]] = None,
    details_presence: bool = False,
    role_pattern: Optional[str] = None,
    ratio_asymmetric: bool = False,
    flow_type: Optional[str] = None,
    has_visual_element: bool = False,
    large_table: bool = False,
    long_text: bool = False,
) -> list[str]:
    """Return Internal Region layout candidates per SPEC §2.5 decision tree.

    Step 8-B-1 (사용자 lock 2026-05-07) — 후보 generation 함수.
    Step 7-B 와 다른 점: SPEC §2.5 는 *순차 결정 트리* (첫 매칭 채택).
    Step 7-B 는 단순 매칭 (unit_count 같은 모든 entry).

    Decision rule (sequential, first match wins) — catalog 와 1:1 일치:
      1. region_count == 1                                -> region-single
      2. details_presence / large_table / long_text       -> region-preview-details
      3. region_count == 4 AND flow_type == 'parallel_4'  -> region-grid-2x2
      4. region_count == 2 AND role_pattern ==
         'primary_supporting' AND ratio_asymmetric        -> region-main-support
      5. region_count == 2 AND has_visual_element         -> region-horizontal-split
      6. fallback (위 미매칭)                              -> region-vertical-stack

    Sort:
      region_count == 1 -> [region-single] (fallback X)
      region_count >= 2 -> [매칭, region-vertical-stack] 또는 [region-vertical-stack]

    Raises:
      ValueError: region_count < 1 or > 4 (SPEC §2.5 vocabulary scope).

    Note:
      호출처 박힘 (Step 8-conn 2026-05-08) — phase_z2_pipeline.py 의 step08 artifact
      가 본 함수 결과 기록. 현재 placeholder signals (region_count=1, content_type=
      "text_block") 종속 — 실제 신호 활성화는 Step 3/4 별 axis.
      Step 9 v0 (application_plan) 가 본 후보 list 를 application_candidates 로 해석.
    """
    if region_count < 1 or region_count > 4:
        raise ValueError(
            f"region_count {region_count} out of catalog scope [1, 4]"
        )

    fallback = "region-vertical-stack"

    # 1. region_count == 1
    if region_count == 1:
        return ["region-single"]

    # 2. details_presence / large_table / long_text
    if details_presence or large_table or long_text:
        match = "region-preview-details"
    # 3. region_count == 4 + parallel_4
    elif region_count == 4 and flow_type == "parallel_4":
        match = "region-grid-2x2"
    # 4. region_count == 2 + role_pattern primary_supporting + ratio_asymmetric
    elif (
        region_count == 2
        and role_pattern == "primary_supporting"
        and ratio_asymmetric
    ):
        match = "region-main-support"
    # 5. region_count == 2 + visual element
    elif region_count == 2 and has_visual_element:
        match = "region-horizontal-split"
    # 6. fallback
    else:
        return [fallback]

    # 매칭됨 + fallback (단 매칭 == fallback 인 경우 1개만)
    if match == fallback:
        return [fallback]
    return [match, fallback]


# ─── Display Strategy Catalog — Step 8-B-2 (사용자 lock 2026-05-07) ────
#
# Source = templates/phase_z2/regions/display_strategies.yaml (4 entry).
# load 함수 + select_display_strategy_candidates().
# 호출처 박힘 (Step 8-conn 2026-05-08) — phase_z2_pipeline.py 의 step08 artifact 가
# 본 함수 결과 기록 (placeholder signals: content_type="text_block", Step 3/4 부재 종속).

_DISPLAY_STRATEGIES_CATALOG_PATH = (
    Path(__file__).resolve().parent.parent
    / "templates" / "phase_z2" / "regions" / "display_strategies.yaml"
)


def load_display_strategies() -> dict[str, dict]:
    """Load display strategy catalog (4 entry).

    Returns same dict shape as catalog yaml.
    Step 7-A / 8-B-1 와 같은 패턴 — source of truth = yaml, code 는 read 만.
    """
    with open(_DISPLAY_STRATEGIES_CATALOG_PATH, encoding="utf-8") as f:
        return yaml.safe_load(f) or {}


DISPLAY_STRATEGIES: dict[str, dict] = load_display_strategies()


_KNOWN_CONTENT_TYPES = frozenset({
    "text_block", "table", "image", "details", "decorative_element",
})


def select_display_strategy_candidates(
    content_type: str,
    long_text: bool = False,
    large_table: bool = False,
    fits_in_region: Optional[bool] = None,
) -> list[str]:
    """Return display strategy candidates per catalog (display_strategies.yaml).

    Step 8-B-2 (사용자 lock 2026-05-07) — 후보 generation 함수.
    display_strategies.yaml 만 본다 (region_layouts / frame 은 Step 9 axis).

    Hard filter (catalog 박힌 절대 제약 — applies_to / forbidden_for):
      - content_type 이 strategy.applies_to 에 있어야 후보
      - content_type 이 strategy.forbidden_for 에 있으면 자동 제외
      - 핵심 user lock: text_block / table / image / details 는 dropped 절대 X
        (catalog forbidden_for 에 박혀 있음 — 원문 무손실 보존)

    Ranking (content_type + fit signal):
      decorative_element              -> [inline_full, dropped]
      image                           -> [inline_full]
      text_block / table / details
        long_text / large_table
        / fits_in_region == False     -> [inline_preview_with_details,
                                          details_only, inline_full]
        그 외                          -> [inline_full,
                                          inline_preview_with_details,
                                          details_only]

    Note:
      - fits_in_region 은 가벼운 hint 만. 실제 overflow 판단은 Step 9/14/17 axis.
      - dropped 는 decorative_element 의 후순위 (공간 부족 신호 전엔 일단 보여주기).

    Raises:
      ValueError: content_type 이 catalog scope 밖
                  (text_block / table / image / details / decorative_element 외).

    Note:
      호출처 박힘 (Step 8-conn 2026-05-08) — phase_z2_pipeline.py 의 step08 artifact
      가 본 함수 결과 기록. 현재 placeholder signal (content_type="text_block")
      종속 — 실제 신호 활성화는 Step 3/4 별 axis.
      Step 9 v0 (application_plan) 가 본 후보 list 를 application_candidates 의
      display_strategy axis 로 해석.
    """
    if content_type not in _KNOWN_CONTENT_TYPES:
        raise ValueError(
            f"content_type {content_type!r} out of catalog scope "
            f"(known: {sorted(_KNOWN_CONTENT_TYPES)})"
        )

    # Hard filter — applies_to / forbidden_for (catalog 직독)
    eligible = set()
    for name, meta in DISPLAY_STRATEGIES.items():
        applies_to = meta.get("applies_to") or []
        forbidden_for = meta.get("forbidden_for") or []
        if content_type in applies_to and content_type not in forbidden_for:
            eligible.add(name)

    # Ranking — content_type + fit signal
    if content_type == "decorative_element":
        order = ["inline_full", "dropped"]
    else:
        escalate = long_text or large_table or fits_in_region is False
        if escalate:
            order = [
                "inline_preview_with_details",
                "details_only",
                "inline_full",
            ]
        else:
            order = [
                "inline_full",
                "inline_preview_with_details",
                "details_only",
            ]

    return [s for s in order if s in eligible]


# ─── CompositionUnit ────────────────────────────────────────────

@dataclass
class CompositionUnit:
    """Slide 내 1 zone 후보 = MDX section(s) + 매칭된 frame.

    source_section_ids : 1 개 = single, 2+ = merged
    merge_type :
      - "single"                  : 단일 section
      - "parent_merged"           : parent V4 entry 존재 (v0)
      - "parent_merged_inferred"  : parent V4 entry 없음, child evidence 로 추론 (v0.1)
    frame_* : V4 evidence 그대로 (catalog 메타 X 하드코딩 X)
    score : 종합 점수
    rationale : score breakdown 추적
    review_required : True 면 자동 선택 X — debug 에만 노출, 사용자/AI 검토 후
                      별도 path (light_edit / restructure / AI restructuring) 로 처리
    review_reasons : 왜 review_required 가 True 인지 (자가검증용 — child label mix /
                     template_id 불일치 / cardinality 불호환 등)
    """
    source_section_ids: list[str]
    merge_type: str
    frame_template_id: str
    frame_id: str
    frame_number: int
    confidence: float
    label: str                         # use_as_is / light_edit / restructure / reject
    phase_z_status: str
    raw_content: str
    title: str
    v4_rank: Optional[int] = None
    selection_path: str = "rank_1"
    fallback_reason: Optional[str] = None
    score: float = 0.0
    rationale: dict = field(default_factory=dict)

    # 자동 파이프라인 단계 상태 (review/UI 개념 X — 현재는 자동 결정 + 명확한 실패 기록만)
    # auto_selectable=False 면 자동 선택 단계에서 제외. filter_reasons 가 그 이유.
    # 예: parent_merged_inferred 의 W1/W2/W3 (rep status / all reject / majority not-auto-renderable)
    # 사용자/AI 검토는 별 layer (interactive editor) 에서 처리. 본 dataclass 는 자동 결정 완결.
    auto_selectable: bool = True
    filter_reasons: list[str] = field(default_factory=list)
    # informational signals — auto_selectable 여부와 무관. future axis 가 점수화할 영역.
    # 예: "children disagree on rank-1 template_id" / "minority of children non-auto-renderable"
    notes: list[str] = field(default_factory=list)

    # Step 6-A axis 추가 (사용자 lock 2026-05-08).
    # V4 후보 list (V4Match-shape duck typed — composition module 은 V4Match dataclass 미import,
    # circular dep 회피). 각 entry attrs : template_id / frame_id / frame_number / confidence / label.
    # list 순서 = V4 rank (candidates[0] = rank-1 non-reject — 단일 frame_template_id /
    # frame_id / label / confidence 와 일치, backward compat lock).
    # 0 길이 = "no_non_reject_v4_candidate" 신호 (Step 9 application_plan input).
    v4_candidates: list = field(default_factory=list)

    # IMP-30 u2 — provisional first-render flag. True when the V4Match
    # backing this unit was synthesized via lookup_v4_match_with_fallback
    # (allow_provisional=True) after chain_exhausted, or when u3 inserts
    # a last-resort provisional fill for an uncovered section. Carried as
    # data (not re-derived from label/selection_path downstream) so the
    # render path / status / zone template can surface "needs adaptation"
    # uniformly. Default False keeps non-provisional units byte-identical.
    provisional: bool = False


# ─── Heading Tree ──────────────────────────────────────────────

def derive_parent_id(section_id: str) -> Optional[str]:
    """Section id -> parent id derivation by V4 key convention.

    IMP-08 B-3 : canonical ordinal `${parent}-sub-${n}` recognised first;
    legacy decimal `04-2.1` kept as fallback alias path.

    Examples (illustrative, not rules) :
      - "03-1-sub-2" -> "03-1"  (canonical ordinal, IMP-08)
      - "04-2.1"     -> "04-2"  (decimal suffix, legacy V4 key style)
      - "04-1"       -> None    (top-level, no parent)
      - "04"         -> None
    """
    m = re.fullmatch(r"(.+?)-sub-(\d+)", section_id)
    if m:
        return m.group(1)
    parts = section_id.split("-", 1)
    if len(parts) != 2:
        return None
    mdx_id, suffix = parts
    if "." in suffix:
        parent_suffix = suffix.split(".")[0]
        return f"{mdx_id}-{parent_suffix}"
    return None


def build_heading_tree(sections) -> dict:
    """Section list → tree {section_id: {section, children}}."""
    tree = {s.section_id: {"section": s, "children": []} for s in sections}
    for s in sections:
        parent = derive_parent_id(s.section_id)
        if parent and parent in tree:
            tree[parent]["children"].append(s.section_id)
    return tree


# ─── Candidate Generation ──────────────────────────────────────

def _apply_capacity_fit(candidate: CompositionUnit, capacity_fit_fn) -> None:
    """capacity_fit_fn 결과를 candidate 의 rationale + auto_selectable + filter_reasons 에 반영.

    fit_status 가 'ok' / 'no_contract' / 'unknown_source_shape' 이면 auto_selectable 영향 X
    (no_contract 는 catalog-only mapper 가 별도로 ValueError 처리).
    그 외 (strict_mismatch / exceeds_max / below_min / exceeds_truncate) 는 silent loss 또는
    mapper FitError 가 발생할 후보 → auto_selectable=False + filter_reasons 'C1: ...'.
    """
    if capacity_fit_fn is None:
        return
    fit = capacity_fit_fn(candidate.frame_template_id, candidate.raw_content)
    candidate.rationale["capacity_fit"] = fit
    if fit["fit_status"] in {"ok", "no_contract", "unknown_source_shape"}:
        return
    candidate.auto_selectable = False
    candidate.filter_reasons.append(
        f"C1: capacity mismatch ({fit['fit_status']}) — {fit['mismatch_reason']}"
    )


def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict,
                       auto_renderable_statuses: Optional[set[str]] = None,
                       capacity_fit_fn=None,
                       v4_candidates_lookup_fn=None):
    """Generate composition candidates.

    v0.1 candidate types :
      1. single                   : per leaf section (V4 entry 필수)
      2. parent_merged            : parent 자체에 V4 entry 존재 (parent 가 직접 매칭됨)
      3. parent_merged_inferred   : parent V4 없음. child evidence 로 representative
                                    template_id 추론

    원칙 :
      - 특정 section_id / template_id / frame 하드코딩 X
      - 모든 결정 = derive_parent_id() + V4 evidence + v4_label_to_status mapping + 주입된 fn (파라메트릭)

    Args:
        sections : align 결과
        v4_lookup_fn : (section_id) → V4Match | None  (rank-1 only, 기존 호환)
        v4_label_to_status : V4 label → Phase Z status mapping
        auto_renderable_statuses : 자동 렌더 허용 status set (W1/W3 판정 입력)
        capacity_fit_fn : Optional (template_id, content) → fit dict.
            제공되면 모든 candidate 에 적용 — capacity mismatch 시 auto_selectable=False
            (silent truncate / mapper FitError 사전 차단).
        v4_candidates_lookup_fn : Optional (section_id) → list[V4Match].
            Step 6-A axis (사용자 lock 2026-05-08). non-reject max-N 후보 list.
            제공되면 모든 candidate 에 v4_candidates 필드 채움.
            None 이면 v4_candidates = [] (backward compat).
            본 fn 이 V4 raw dict 구조를 흡수 — composition module 은 V4 yaml shape 모름.

    Returns:
        list[CompositionUnit]
    """
    if auto_renderable_statuses is None:
        auto_renderable_statuses = set()

    def _v4_cands(section_id: str) -> list:
        # v4_candidates_lookup_fn 미제공 시 빈 list (backward compat).
        return v4_candidates_lookup_fn(section_id) if v4_candidates_lookup_fn else []

    candidates = []

    # 1. Separate
    for s in sections:
        match = v4_lookup_fn(s.section_id)
        if match is None:
            continue
        c = CompositionUnit(
            source_section_ids=[s.section_id],
            merge_type="single",
            frame_template_id=match.template_id,
            frame_id=match.frame_id,
            frame_number=match.frame_number,
            confidence=match.confidence,
            label=match.label,
            phase_z_status=v4_label_to_status.get(match.label, "unknown"),
            v4_rank=getattr(match, "v4_rank", None),
            selection_path=getattr(match, "selection_path", "rank_1"),
            fallback_reason=getattr(match, "fallback_reason", None),
            raw_content=s.raw_content,
            title=s.title,
            v4_candidates=_v4_cands(s.section_id),
            provisional=getattr(match, "provisional", False),
        )
        _apply_capacity_fit(c, capacity_fit_fn)
        candidates.append(c)

    # parent → children 그룹화
    parent_to_children: dict[str, list] = {}
    for s in sections:
        pid = derive_parent_id(s.section_id)
        if pid:
            parent_to_children.setdefault(pid, []).append(s)

    # 2. parent_merged (parent 자체가 V4 에 매칭된 경우)
    for pid, children in parent_to_children.items():
        parent_match = v4_lookup_fn(pid)
        if parent_match is None:
            continue                    # branch 3 가 처리
        if len(children) < 2:
            continue                    # merge 의미 없음
        merged_raw = "\n\n".join(c.raw_content for c in children)
        c_pm = CompositionUnit(
            source_section_ids=[c.section_id for c in children],
            merge_type="parent_merged",
            frame_template_id=parent_match.template_id,
            frame_id=parent_match.frame_id,
            frame_number=parent_match.frame_number,
            confidence=parent_match.confidence,
            label=parent_match.label,
            phase_z_status=v4_label_to_status.get(parent_match.label, "unknown"),
            v4_rank=getattr(parent_match, "v4_rank", None),
            selection_path=getattr(parent_match, "selection_path", "rank_1"),
            fallback_reason=getattr(parent_match, "fallback_reason", None),
            raw_content=merged_raw,
            title=pid,
            v4_candidates=_v4_cands(pid),
            provisional=getattr(parent_match, "provisional", False),
        )
        _apply_capacity_fit(c_pm, capacity_fit_fn)
        candidates.append(c_pm)

    # 3. parent_merged_inferred (v0.1) — parent V4 없음, child evidence 기반
    for pid, children in parent_to_children.items():
        if v4_lookup_fn(pid) is not None:
            continue                    # branch 2 가 이미 처리
        if len(children) < 2:
            continue
        # children 중 V4 매칭 있는 것들만 evidence 로 사용
        child_matches: list[tuple] = []
        for c in children:
            m = v4_lookup_fn(c.section_id)
            if m is not None:
                child_matches.append((c, m))
        if len(child_matches) < 2:
            continue                    # 최소 2 child evidence 필요

        # representative = 가장 confidence 높은 child match (v0.1.1 단순 룰)
        # 향후 axes : top-k convergence, template family agreement, cardinality_fit 등
        rep_child, rep_match = max(child_matches, key=lambda cm: cm[1].confidence)

        # 자동 선택 가능 여부 = auto_selectable. default True (strong inferred merge).
        # 다음 weak 신호 중 하나라도 있으면 auto_selectable=False (filter_reasons 에 사유) :
        #   W1 : representative status 가 auto-renderable 아님 → 자동 렌더 자체가 막힘
        #   W2 : 모든 child 가 reject → merge 의미 자체가 없음
        #   W3 : auto-renderable 아닌 child label 이 majority (>50%)
        # informational notes (auto_selectable 영향 X, future axis 점수화 영역) :
        #   N1 : children 의 rank-1 template_id 가 서로 다름 → top-k / family compat
        #   N2 : non-auto-renderable child label 이 일부 (소수) 존재
        rep_status = v4_label_to_status.get(rep_match.label, "unknown")
        child_labels = [m.label for _, m in child_matches]
        child_template_ids_unique = sorted({m.template_id for _, m in child_matches})
        n_children = len(child_matches)
        n_not_auto = sum(
            1 for l in child_labels
            if v4_label_to_status.get(l) not in auto_renderable_statuses
        )

        filter_reasons: list[str] = []
        notes: list[str] = []

        if rep_status not in auto_renderable_statuses:
            filter_reasons.append(
                f"W1: representative status '{rep_status}' (label={rep_match.label}) "
                f"not in auto_renderable_statuses={sorted(auto_renderable_statuses)}."
            )
        if all(l == "reject" for l in child_labels):
            filter_reasons.append(
                "W2: all children labeled 'reject' — merge has no fit basis."
            )
        if n_children > 0 and n_not_auto * 2 > n_children:
            non_auto_labels = sorted({
                l for l in child_labels
                if v4_label_to_status.get(l) not in auto_renderable_statuses
            })
            filter_reasons.append(
                f"W3: majority of children ({n_not_auto}/{n_children}) have "
                f"non-auto-renderable labels {non_auto_labels}."
            )

        if len(child_template_ids_unique) > 1:
            notes.append(
                f"N1: children's rank-1 template_id differs ({child_template_ids_unique}). "
                f"representative='{rep_match.template_id}' (highest child confidence). "
                f"top-k / family compatibility 평가는 future axis."
            )
        if 0 < n_not_auto <= n_children // 2:
            non_auto_labels_minority = sorted({
                l for l in child_labels
                if v4_label_to_status.get(l) not in auto_renderable_statuses
            })
            notes.append(
                f"N2: minority ({n_not_auto}/{n_children}) of children non-auto-renderable "
                f"({non_auto_labels_minority}). representative is auto-renderable, merge proceeds."
            )

        auto_selectable = len(filter_reasons) == 0

        merged_raw = "\n\n".join(c.raw_content for c, _ in child_matches)
        c_inf = CompositionUnit(
            source_section_ids=[c.section_id for c, _ in child_matches],
            merge_type="parent_merged_inferred",
            frame_template_id=rep_match.template_id,
            frame_id=rep_match.frame_id,
            frame_number=rep_match.frame_number,
            confidence=rep_match.confidence,
            label=rep_match.label,
            phase_z_status=rep_status,
            v4_rank=getattr(rep_match, "v4_rank", None),
            selection_path=getattr(rep_match, "selection_path", "rank_1"),
            fallback_reason=getattr(rep_match, "fallback_reason", None),
            raw_content=merged_raw,
            title=pid,
            auto_selectable=auto_selectable,
            filter_reasons=filter_reasons,
            notes=notes,
            # rep_child 의 V4 후보 list (rep_match 와 같은 출처, frame_* 와 일관).
            v4_candidates=_v4_cands(rep_child.section_id),
            # IMP-30 u2 — rep_match drives frame selection so its provisional
            # flag flows here. If a non-rep child match is provisional but the
            # rep is not, this unit is not provisional (the rep frame is real).
            provisional=getattr(rep_match, "provisional", False),
        )
        _apply_capacity_fit(c_inf, capacity_fit_fn)
        candidates.append(c_inf)

    return candidates


# ─── Scoring ───────────────────────────────────────────────────

# v0 label weights — V4 label → score multiplier.
# 향후 axes 추가 (cardinality_fit / hierarchy_coherence / density) 시 확장.
V0_LABEL_WEIGHT = {
    "use_as_is": 1.0,
    "light_edit": 0.7,
    "restructure": 0.4,
    "reject": 0.0,
}


def score_candidate(c: CompositionUnit) -> CompositionUnit:
    """v0 scoring : confidence × label_weight.

    추후 추가될 axes (rationale 에 자리만 잡아둠) :
      - cardinality_fit : item_count vs frame ideal/min/max
      - hierarchy_coherence : merge_type 적합도
      - density_score : content 밀도 vs zone 크기
    """
    label_weight = V0_LABEL_WEIGHT.get(c.label, 0.0)
    frame_compat = c.confidence * label_weight
    c.score = frame_compat
    # 기존 rationale 보존 (예: collect_candidates 가 넣은 capacity_fit)
    c.rationale.update({
        "frame_compat": round(frame_compat, 4),
        "confidence": c.confidence,
        "label": c.label,
        "label_weight": label_weight,
        "merge_type": c.merge_type,
        # placeholders for future axes
        "hierarchy_coherence": None,
        "density_score": None,
    })
    return c


# ─── Selection ─────────────────────────────────────────────────

def select_composition_units(
    candidates,
    allowed_statuses: set[str],
    *,
    all_section_ids: Optional[list[str]] = None,
    allow_provisional_fill: bool = False,
) -> list[CompositionUnit]:
    """Greedy non-overlapping selection by score, with coverage tiebreak.

    1. 모든 candidate 점수 매김
    2. filter :
        - phase_z_status ∈ allowed_statuses
        - auto_selectable=True (W1/W2/W3 신호 통과)
    3. 정렬 키 = (score desc, source_section_ids 수 desc)
       — 동점이면 더 많은 section 을 cover 하는 후보 우선.
       parent_merged_inferred 가 같은 점수의 single 후보를 *coverage 우위* 로 이김.
    4. greedy : 이미 covered 된 section 을 가진 후보는 skip
    5. 최종 선택 = covered set 채워나감

    auto_selectable=False candidate 는 자동 선택 X. debug 의 candidates_summary 에는 남음.
    UI/editor layer 에서 사용자가 별도 처리 가능 (현 v0 범위 X).

    IMP-30 u3 — last-resort provisional fill (opt-in via allow_provisional_fill):
      After the normal greedy pass, sections in ``all_section_ids`` that are
      still uncovered are filled with the highest-score *provisional*
      candidate (``c.provisional == True``) that includes at least one
      uncovered section and does not collide with already-covered ones. A
      provisional candidate's backing V4Match was synthesized via
      ``lookup_v4_match_with_fallback(allow_provisional=True)`` (IMP-30 u1)
      after chain_exhausted; its ``phase_z_status`` is therefore typically
      *outside* ``allowed_statuses`` (extract_matched_zone / fallback_candidate),
      which is why it gets filtered out of the normal greedy pass. The fill
      preserves first-render invariant for sections whose rank-1~3 are all
      restructure/reject. Default ``allow_provisional_fill=False`` keeps
      pre-u3 behavior byte-identical (IMP-05 regression guard).

    Args:
        candidates: full candidate pool from collect_candidates().
        allowed_statuses: phase_z_status set considered auto-renderable.
        all_section_ids: ordered section id list (only consulted when
            allow_provisional_fill=True; required for coverage check).
        allow_provisional_fill: opt-in for last-resort provisional fill.
    """
    scored = [score_candidate(c) for c in candidates]
    viable = [
        c for c in scored
        if c.phase_z_status in allowed_statuses and c.auto_selectable
    ]
    viable.sort(key=lambda c: (c.score, len(c.source_section_ids)), reverse=True)

    selected = []
    covered = set()
    for c in viable:
        if any(sid in covered for sid in c.source_section_ids):
            continue
        selected.append(c)
        covered.update(c.source_section_ids)

    # IMP-30 u3 — last-resort provisional fill (opt-in, default off).
    # Honors first-render invariant by surfacing chain_exhausted sections as
    # provisional zones instead of dropping them. Skip reasons on
    # non-provisional filtered candidates are preserved (not mutated here).
    if allow_provisional_fill and all_section_ids:
        uncovered = {sid for sid in all_section_ids if sid not in covered}
        if uncovered:
            provisional_pool = [
                c for c in scored
                if c.provisional
                and any(sid in uncovered for sid in c.source_section_ids)
            ]
            provisional_pool.sort(
                key=lambda c: (c.score, len(c.source_section_ids)),
                reverse=True,
            )
            for c in provisional_pool:
                if any(sid in covered for sid in c.source_section_ids):
                    continue
                selected.append(c)
                covered.update(c.source_section_ids)

    return selected


# ─── Layout Preset Selection ───────────────────────────────────

def select_layout_preset(units: list[CompositionUnit]) -> Optional[str]:
    """v0 : count-based default selection.

    1 unit → single
    2 units → horizontal-2 (default. vertical-2 는 aspect signal 추가 시 분기)
    3 units → top-1-bottom-2 (default. 다른 3-zone variant 는 content-weight signal 추가 시 분기)
    4 units → grid-2x2

    v0 한계 :
      - aspect / content-weight 신호 미반영 → 2 units 는 항상 horizontal, 3 units 는 항상 top-1-bottom-2
      - 향후 unit.raw_content 기반 weight 산정 시 정교화
    """
    n = len(units)
    if n == 0:
        return None
    if n == 1:
        return "single"
    if n == 2:
        return "horizontal-2"
    if n == 3:
        return "top-1-bottom-2"
    if n == 4:
        return "grid-2x2"
    raise ValueError(
        f"Composition v0 : layout for {n} units not supported (max 4). "
        "Larger counts require split-into-multiple-slides decision (future)."
    )


# ─── Public entry — composition pipeline ───────────────────────

def plan_composition(sections, v4_lookup_fn, v4_label_to_status: dict,
                     allowed_statuses: set[str],
                     capacity_fit_fn=None,
                     v4_candidates_lookup_fn=None,
                     *,
                     allow_provisional_fill: bool = False) -> tuple[list[CompositionUnit], Optional[str], dict]:
    """Composition planner v0.2 entry.

    v0.2 변경 :
      - capacity_fit_fn 주입 시 모든 candidate 에 capacity 사전 검사
        (silent truncate / mapper FitError 사전 차단). 불일치 시 auto_selectable=False
        + filter_reason 'C1: ...'.

    Step 6-A axis (사용자 lock 2026-05-08) :
      - v4_candidates_lookup_fn 주입 시 모든 CompositionUnit 에 v4_candidates 채움.
        logic 변화 X — 단일 frame_template_id / frame_id / label / confidence 는 그대로.
        runtime 결과 무변. Step 9 application_plan input 위한 schema 확장.

    IMP-30 u3 — last-resort provisional fill (opt-in, default off):
      ``allow_provisional_fill`` is plumbed to select_composition_units().
      When True, uncovered sections receive a provisional fill from candidates
      whose backing V4Match was synthesized via ``allow_provisional=True``
      (IMP-30 u1). ``_candidate_state`` returns ``selected_provisional`` for
      those filled units so the debug summary distinguishes greedy selections
      from provisional fills. Default False keeps IMP-05 behavior identical.

    v0.1 / v0.1.1 동작 (유지) :
      - parent_merged_inferred candidate 생성 (parent V4 없어도)
      - review 개념 X. auto_selectable + filter_reasons 만으로 자동 결정
      - selection : score desc + coverage 우세 tiebreak

    Returns:
        units : 자동 선택된 composition units
        layout_preset : 8 vocabulary 중 하나 (또는 None)
        debug : 후보 전체 + capacity_fit + filter_reasons + preset 결정 근거
    """
    candidates = collect_candidates(
        sections, v4_lookup_fn, v4_label_to_status,
        auto_renderable_statuses=allowed_statuses,
        capacity_fit_fn=capacity_fit_fn,
        v4_candidates_lookup_fn=v4_candidates_lookup_fn,
    )
    scored_all = [score_candidate(c) for c in candidates]

    units = select_composition_units(
        candidates,
        allowed_statuses,
        all_section_ids=[s.section_id for s in sections] if allow_provisional_fill else None,
        allow_provisional_fill=allow_provisional_fill,
    )
    preset = select_layout_preset(units)

    def _candidate_state(c: CompositionUnit) -> str:
        if c in units:
            # IMP-30 u3 — provisional-fill units surface as a distinct state so
            # downstream debug consumers can tell greedy selection apart from
            # last-resort fill. unit.provisional flows from u1 (V4Match
            # synthesis) → u2 (CompositionUnit propagation).
            if c.provisional:
                return "selected_provisional"
            return "selected"
        if c.phase_z_status not in allowed_statuses:
            return "filtered_status"               # V4 label → status not auto-renderable
        if not c.auto_selectable:
            # filter_reasons prefix 로 capacity 와 weak 구분
            if any(r.startswith("C") for r in c.filter_reasons):
                return "filtered_capacity"          # C1 (capacity mismatch)
            return "filtered_weak"                  # W1/W2/W3 (parent_merged_inferred only)
        return "filtered_lost"                      # viable 였지만 coverage 충돌로 밀림

    candidates_summary = [
        {
            "source_section_ids": c.source_section_ids,
            "merge_type": c.merge_type,
            "template_id": c.frame_template_id,
            "label": c.label,
            "phase_z_status": c.phase_z_status,
            "v4_rank": c.v4_rank,
            "selection_path": c.selection_path,
            "fallback_reason": c.fallback_reason,
            "score": c.score,
            "selection_state": _candidate_state(c),
            "auto_selectable": c.auto_selectable,
            "filter_reasons": list(c.filter_reasons),
            "notes": list(c.notes),
            "capacity_fit": c.rationale.get("capacity_fit"),
        }
        for c in scored_all
    ]

    merge_candidates = [
        s for s in candidates_summary
        if s["merge_type"] in {"parent_merged", "parent_merged_inferred"}
    ]
    capacity_mismatches = [
        s for s in candidates_summary
        if s["selection_state"] == "filtered_capacity"
    ]

    debug = {
        "planner_version": "v0.2",
        "selection_rule": (
            "score desc, then source_section_ids count desc (coverage tiebreak). "
            "filter = phase_z_status ∉ allowed_statuses OR auto_selectable=False. "
            "auto_selectable=False 사유 : C1 (capacity mismatch — silent truncate / FitError 차단), "
            "W1 (rep not auto-renderable), W2 (all children reject), W3 (majority children non-auto-renderable)."
        ),
        "candidates_total": len(scored_all),
        "candidates_viable_auto": len([
            c for c in scored_all
            if c.phase_z_status in allowed_statuses and c.auto_selectable
        ]),
        "candidates_summary": candidates_summary,
        "merge_candidates": merge_candidates,
        "capacity_mismatches": capacity_mismatches,
        "selected_units_count": len(units),
        "layout_preset": preset,
        "layout_preset_rationale": (
            f"v0 count-based: {len(units)} units → {preset}"
            if preset else "no viable units"
        ),
    }

    return units, preset, debug