feat(#86): IMP-86 u1~u5 placeholder zones_data + invariant guard

Mapper FitError handler now appends a __empty__ placeholder to zones_data
and a matching debug_zone so the surviving cardinality stays in sync with
the active layout preset's grid rows. A pre-build_layout_css invariant
guard fails fast with preset/positions/count diagnostics if drift recurs.
Per-record telemetry (adapter_needed, mapper_fit_error, provisional) is
exposed on both placeholder records; authoritative slide_status.adapter_
needed_units schema is unchanged.

Closes mdx03 reject override regression: Step 12 AI router now reachable
without heights_px ValueError; default-path behavior unaffected.

u1 — FitError placeholder zones_data + debug_zone (src/phase_z2_pipeline.py)
u2 — pre-build_layout_css invariant guard (src/phase_z2_pipeline.py)
u3 — horizontal-2 normal+placeholder helper unit (test_compute_per_zone_geometry.py)
u4 — mdx03 reject override → Step 12 integration + default regression
u5 — placeholder telemetry surface (adapter_needed/mapper_fit_error/provisional)

Tests:
- u3 helper: 7 passed (0.06s)
- u4+u5 integration: 2 passed (7.87s)
- Phase Z2 + AI fallback regression: 544 passed (66.28s)
- Broader sweep (excl. matching/pipeline heavy): 1066 passed (96.12s)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-23 18:25:14 +09:00
parent cacc5b30db
commit c53722ad0b
3 changed files with 397 additions and 1 deletions

View File

@@ -4434,15 +4434,87 @@ def run_phase_z2_mvp1(
try:
slot_payload = map_mdx_to_slots(synth_section, unit.frame_template_id)
except FitError as e:
_fit_error_str = str(e)
_unit_provisional = bool(getattr(unit, "provisional", False))
adapter_record = {
"position": position,
"source_section_ids": unit.source_section_ids,
"merge_type": unit.merge_type,
"template_id": unit.frame_template_id,
"reason": "fit_error",
"fit_error": str(e),
"fit_error": _fit_error_str,
}
adapter_needed_units.append(adapter_record)
# IMP-86 u1 — placeholder zones_data + debug_zone keep the failed
# unit's preset position so downstream build_layout_css /
# _compute_per_zone_geometry observe len(zones_data) == active
# layout preset's css_areas rows (R). Without this both arrays
# drift relative to R, raising
# ValueError("heights_px length N != grid rows R=M") at
# _compute_per_zone_geometry. Mirrors the IMP-30 empty-shell
# pattern: zones_data uses template_id="__empty__" so render_slide
# short-circuits to empty partial_html; debug_zone keeps the
# original V4 evidence and adapter_needed_units stays the
# authoritative adapter signal.
# IMP-86 u5 — per-record telemetry. adapter_needed=True +
# mapper_fit_error=<str(FitError)> + provisional mirror the
# adapter signal directly on each placeholder record so
# debug.json / final.html consumers can identify the adapter
# contract surface from the zones array alone, without joining
# against slide_status.adapter_needed_units. adapter_needed_units
# itself is unchanged (still the authoritative per-slide list).
_placeholder_popup = compose_zone_popup_payload(unit, 0)
zones_data.append({
"position": position,
"template_id": "__empty__",
"slot_payload": {},
"content_weight": {"score": 0},
"min_height_px": min_height_px,
"assignment_source": "imp86_u1_adapter_needed",
"section_assignment_override": False,
"provisional": _unit_provisional,
"adapter_needed": True,
"mapper_fit_error": _fit_error_str,
**_placeholder_popup,
})
debug_zones.append({
"position": position,
"source_section_ids": list(unit.source_section_ids),
"merge_type": unit.merge_type,
"title": unit.title,
"v4_rank1_frame_id": unit.frame_id,
"v4_rank1_frame_number": unit.frame_number,
"v4_template_id": unit.frame_template_id,
"v4_label": unit.label,
"v4_confidence": float(unit.confidence or 0.0),
"v4_selected_rank": unit.v4_rank,
"selection_path": unit.selection_path,
"fallback_reason": unit.fallback_reason,
"fallback_used": bool(unit.selection_path and "fallback" in unit.selection_path),
"phase_z_status": unit.phase_z_status,
"composition_score": float(unit.score or 0.0),
"composition_rationale": dict(unit.rationale or {}),
"composition_notes": list(unit.notes),
"mapper_type": "adapter_needed",
"contract_id": unit.frame_template_id,
"contract_frame_id": contract_frame_id,
"builder": builder_name,
"min_height_px": min_height_px,
"slot_payload_keys": [],
"content_truncated_count": None,
"assets_dir": None,
"content_weight": {"score": 0},
"placement_trace": placement_trace,
"assignment_source": "imp86_u1_adapter_needed",
"section_assignment_override": False,
"replaced_auto_unit": None,
"skipped_collided_auto_units": [],
"uncovered_section_ids": [],
"skipped_reason": "imp86_u1_adapter_needed_mapper_fit_error",
"provisional": _unit_provisional,
"adapter_needed": True,
"mapper_fit_error": _fit_error_str,
})
print(f" adapter : zone--{position} {unit.source_section_ids}"
f"{unit.frame_template_id} FitError → adapter_needed (skip render)")
continue
@@ -4935,6 +5007,38 @@ def run_phase_z2_mvp1(
# 6. Build layout CSS — horizontal-2 = dynamic heights (regression preserve), 그 외 = fr default.
# Step D-ext : override_zone_geometries 가 들어오면 layout_css 강제.
# IMP-86 u2 — pre-build layout invariant guard. zones_data / debug_zones
# MUST be cardinality- and position-aligned with the active layout
# preset's css_areas tokens before build_layout_css derives heights_px
# / widths_px and _compute_per_zone_geometry validates them against R/C.
# If a mapper FitError path (IMP-86 u1) — or any future zone-drop path —
# forgets to append a placeholder, the resulting shape drift would
# surface as a confusing `heights_px length N != grid rows R=M`
# ValueError deep inside the geometry helper. This guard fails fast at
# the pipeline boundary with preset / expected positions / actual
# positions / count diagnostics so the root cause is obvious from the
# log line (factual_verification: value + path + upstream).
_active_preset = LAYOUT_PRESETS[layout_preset]
_expected_positions = _parse_css_areas(_active_preset["css_areas"])[1]
_actual_positions = [zd["position"] for zd in zones_data]
_debug_positions = [dz["position"] for dz in debug_zones]
if (
len(zones_data) != len(_expected_positions)
or len(debug_zones) != len(_expected_positions)
or sorted(_actual_positions) != sorted(_expected_positions)
or sorted(_debug_positions) != sorted(_expected_positions)
):
raise ValueError(
"phase_z2_pipeline pre-build layout invariant violation: "
f"layout_preset={layout_preset!r} "
f"css_areas={_active_preset['css_areas']!r} "
f"expected_positions={_expected_positions!r} "
f"zones_data_positions={_actual_positions!r} "
f"debug_zones_positions={_debug_positions!r} "
f"zones_count={len(zones_data)} "
f"debug_count={len(debug_zones)} "
f"expected_count={len(_expected_positions)}"
)
layout_css = build_layout_css(
layout_preset, zones_data, override_zone_geometries=override_zone_geometries
)

View File

@@ -99,3 +99,63 @@ def test_fr_default_single_returns_full_body():
per_zone = _compute_per_zone_geometry(layout_css, debug_zones, GRID_GAP)
assert per_zone[0]["zone_height_px"] == SLIDE_BODY_HEIGHT
assert per_zone[0]["zone_width_px"] == SLIDE_BODY_WIDTH
def _placeholder_zone(position: str) -> dict:
# IMP-86 u3 — mirror the u1 mapper-FitError placeholder zones_data
# record shape (`src/phase_z2_pipeline.py:4459-4469`) used to keep the
# failed unit's preset position in zones_data so build_layout_css /
# _compute_per_zone_geometry observe len(zones_data) == active preset's
# css_areas rows (R). content_weight.score == 0 ensures the placeholder
# does not steal weight from the surviving normal zone.
return {
"position": position,
"template_id": "__empty__",
"slot_payload": {},
"content_weight": {"score": 0},
"min_height_px": 100,
"assignment_source": "imp86_u1_adapter_needed",
"section_assignment_override": False,
"provisional": False,
}
def test_horizontal_2_normal_plus_placeholder_preserves_R2_cardinality():
"""IMP-86 u3 — horizontal-2 with one mapper-success zone + one
mapper-FitError placeholder (per IMP-86 u1) must keep heights_px /
debug_zones / per_zone cardinality locked at R=2.
Reproduces the bug scenario in the issue body (mdx03 reject override
where 03-2 hits adapter_needed) at the helper level: if the FitError
path forgets to append a placeholder, heights_px length 1 vs R=2
raises ValueError at `_compute_per_zone_geometry`. With the u1
placeholder, all three artifacts (heights_px, debug_zones, per_zone)
stay at length 2 and the geometry helper succeeds.
"""
zones = [_zone("top", 1.0), _placeholder_zone("bottom")]
layout_css = build_layout_css("horizontal-2", zones)
debug_zones = [{"position": "top"}, {"position": "bottom"}]
# heights_px length is locked to R=2 (parsed from preset css_areas).
assert layout_css["areas"] == '"top" "bottom"'
assert len(layout_css["heights_px"]) == 2
# widths_px length is locked to C=1.
assert len(layout_css["widths_px"]) == 1
# Placeholder zone (score=0) gets its min_height_px (100) and the
# surviving normal zone absorbs the remaining body height after gap.
assert layout_css["heights_px"][1] == 100
assert (
layout_css["heights_px"][0] + layout_css["heights_px"][1] + GRID_GAP
== SLIDE_BODY_HEIGHT
)
per_zone = _compute_per_zone_geometry(layout_css, debug_zones, GRID_GAP)
# per_zone cardinality matches debug_zones (which matches R=2).
assert len(per_zone) == 2
assert [pz["position"] for pz in per_zone] == ["top", "bottom"]
assert per_zone[0]["zone_height_px"] == layout_css["heights_px"][0]
assert per_zone[1]["zone_height_px"] == layout_css["heights_px"][1]
# Both zones share the single column => width == SLIDE_BODY_WIDTH.
assert per_zone[0]["zone_width_px"] == SLIDE_BODY_WIDTH
assert per_zone[1]["zone_width_px"] == SLIDE_BODY_WIDTH

View File

@@ -0,0 +1,232 @@
"""IMP-86 u4 — integration: mdx03 reject override reaches Step 12 AI audit
without heights_px ValueError; default-path regression.
Locks the Stage 2 guardrails from the IMP-86 issue body. Pre-u1, the mapper
``FitError`` handler appended only to ``adapter_needed_units`` and skipped
``zones_data`` / ``debug_zones`` — leaving them at ``len=1`` while
``build_layout_css("horizontal-2", ...)`` still returned ``R=2``. The
``_compute_per_zone_geometry`` invariant then raised
``ValueError("heights_px length 1 != grid rows R=2")`` BEFORE Step 12
fired, so the AI router was never reached and ``step12_ai_repair.json``
was never written. Post u1+u2 :
* u1 placeholder zone keeps cardinality at R=2;
* u2 pre-build invariant guard surfaces drift before
``_compute_per_zone_geometry`` raises;
* Step 12 AI router is reached and the audit artifact is produced;
* ``debug.json`` carries the adapter_needed marker (existing channel —
u5 will add finer per-record telemetry).
AI router is patched to a deterministic ``None``-returning stub so the
integration coverage proves the wiring without any network / API / model
dependency (``feedback_ai_isolation_contract`` — AI = fallback only).
The pair ``(ai_called=False, skip_reason='router_short_circuit')`` is
the gather-side surface (``src/phase_z2_ai_fallback/step12.py:210-213``)
for ``route_ai_fallback → None`` and is the explicit "router reached and
returned None" signal — distinct from earlier-gate skips
(``not_provisional`` / ``route_not_ai_adaptation:<hint>``) which would
indicate the router was NOT reached.
"""
from __future__ import annotations
import json
from pathlib import Path
from unittest.mock import MagicMock
import pytest
import src.phase_z2_ai_fallback.step12 as step12_mod
import src.phase_z2_pipeline as pz2
_REPO_ROOT = Path(__file__).resolve().parents[2]
_SAMPLE_MDX_PATH = _REPO_ROOT / "samples" / "mdx_batch" / "03.mdx"
def _stub_router_short_circuit() -> MagicMock:
"""AI-router stand-in returning ``None``.
Gather records ``(ai_called=False, skip_reason='router_short_circuit')``
per ``src/phase_z2_ai_fallback/step12.py:210-213`` when the router
returns ``None``. Asserting that pair on the reject record proves the
router was actually invoked — vs. the earlier ``not_provisional`` or
``route_not_ai_adaptation:<hint>`` skips which would indicate the
router was NEVER reached (the IMP-86 pre-fix failure mode, which
manifested as a ``ValueError`` crash before Step 12 even fired).
"""
return MagicMock(return_value=None)
@pytest.mark.integration
def test_integration_reject_override_reaches_step12_without_value_error(
tmp_path, monkeypatch
):
"""mdx03 + ``--override-frame 03-2=bim_dx_comparison_table`` must reach
Step 12 AI router without raising ``heights_px`` ValueError, and the
Step 12 audit artifact must reflect router reach on the provisional
reject unit.
"""
if not _SAMPLE_MDX_PATH.is_file():
pytest.skip(f"sample MDX not present: {_SAMPLE_MDX_PATH}")
monkeypatch.setattr(pz2, "RUNS_DIR", tmp_path / "runs")
router = _stub_router_short_circuit()
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
run_id = "imp86_u4_reject_override_integration"
pz2.run_phase_z2_mvp1(
_SAMPLE_MDX_PATH,
run_id=run_id,
override_frames={"03-2": "bim_dx_comparison_table"},
)
run_dir = tmp_path / "runs" / run_id / "phase_z2"
# Guardrail 1 — step12_ai_repair.json present. Pre-IMP-86, the
# pipeline crashed at _compute_per_zone_geometry before Step 12 fired
# so this artifact was MISSING under reject override (issue body
# primary symptom: "AI 호출 0 / step12_ai_repair.json 미생성").
step12_path = run_dir / "steps" / "step12_ai_repair.json"
assert step12_path.is_file(), (
f"step12_ai_repair.json missing at {step12_path} — pre-IMP-86 "
"pipeline crashed at _compute_per_zone_geometry ValueError "
"before Step 12 fired."
)
step12_data = json.loads(step12_path.read_text(encoding="utf-8"))
per_unit = step12_data["data"]["per_unit"]
assert len(per_unit) >= 1, "step12 per_unit must contain at least one record"
# Guardrail 2 — provisional reject unit reached the AI router.
# _apply_frame_override_to_unit (src/phase_z2_pipeline.py:1199-1208)
# promotes the unit to provisional + label='reject' when the override
# target matches a V4 reject judgment; the IMP-47B u1 route map then
# sends reject → 'ai_adaptation_required'.
reject_records = [
r for r in per_unit if r.get("source_section_ids") == ["03-2"]
]
assert len(reject_records) == 1, (
f"expected exactly one per_unit record for 03-2; got {reject_records}"
)
reject = reject_records[0]
assert reject["provisional"] is True, (
"03-2 unit must be provisional after reject override per "
"_apply_frame_override_to_unit reject-judgment promotion"
)
assert reject["route_hint"] == "ai_adaptation_required", (
f"03-2 route_hint must be 'ai_adaptation_required' "
f"(IMP-47B u1 reject→AI map); got {reject['route_hint']}"
)
# The stub router returned None → gather records router_short_circuit.
# This pair is the explicit "router reached" surface; the alternative
# skips (not_provisional / route_not_ai_adaptation:*) would mean the
# router was NEVER called — the IMP-86 pre-fix failure mode.
assert reject["skip_reason"] == "router_short_circuit", (
f"03-2 must reach the (stubbed) AI router and record "
f"router_short_circuit; got skip_reason={reject['skip_reason']}, "
f"error={reject.get('error')}"
)
assert reject["ai_called"] is False
assert reject["error"] is None
router.assert_called()
# Guardrail 3 — pipeline reached Step 20 (no heights_px ValueError).
# The pre-IMP-86 crash happened BEFORE the Step 7 layout artifact, so
# the Step 20 final-status artifact is the strongest "pipeline did
# not crash mid-flight" signal.
step20_path = run_dir / "steps" / "step20_slide_status.json"
assert step20_path.is_file(), (
f"step20_slide_status.json missing at {step20_path} — pipeline "
"did not reach final step (likely crashed mid-pipeline)."
)
# Guardrail 4 — debug.json adapter_needed marker. u1 placeholder
# writes a parallel zones_data + debug_zone record, but
# adapter_needed_units stays the authoritative adapter signal for
# debug.json consumers. u5 adds finer per-placeholder telemetry
# (adapter_needed=True / mapper_fit_error) on the placeholder records
# themselves (asserted in Guardrail 5 below).
# write_debug_json (src/phase_z2_pipeline.py:3254-3292) nests the
# slide_status payload from compute_slide_status (src/phase_z2_pipeline.py:2939-3128)
# under the top-level "slide_status" key, so adapter_needed_count and
# adapter_needed_units are addressed via debug["slide_status"][...]
# — not at the top level.
debug_path = run_dir / "debug.json"
assert debug_path.is_file(), f"debug.json missing at {debug_path}"
debug = json.loads(debug_path.read_text(encoding="utf-8"))
slide_status = debug.get("slide_status") or {}
assert slide_status.get("adapter_needed_count", 0) >= 1, (
f"debug.json slide_status.adapter_needed_count must reflect the "
f"FitError on 03-2; got {slide_status.get('adapter_needed_count')}"
)
adapter_units = slide_status.get("adapter_needed_units") or []
assert any(
"03-2" in (a.get("source_section_ids") or []) for a in adapter_units
), (
f"slide_status.adapter_needed_units must include the 03-2 reject "
f"override unit; got {adapter_units}"
)
# Guardrail 5 (IMP-86 u5) — per-placeholder telemetry on debug.json
# zones[] entry. Source: src/phase_z2_pipeline.py FitError handler
# appends adapter_needed=True + mapper_fit_error=<str(e)> + provisional
# on the placeholder debug_zone record so consumers reading the zones
# array alone (without joining against slide_status) can identify the
# adapter contract surface. adapter_needed_units stays the
# authoritative per-slide list (preserved by Guardrail 4 above).
debug_zones = debug.get("zones") or []
reject_zones = [
z for z in debug_zones if list(z.get("source_section_ids") or []) == ["03-2"]
]
assert len(reject_zones) == 1, (
f"expected exactly one debug zones[] record for 03-2 (placeholder "
f"from FitError handler); got {reject_zones}"
)
reject_zone = reject_zones[0]
assert reject_zone.get("adapter_needed") is True, (
f"debug zones[] placeholder for 03-2 must set adapter_needed=True; "
f"got {reject_zone.get('adapter_needed')!r}"
)
fit_err_msg = reject_zone.get("mapper_fit_error")
assert isinstance(fit_err_msg, str) and fit_err_msg, (
f"debug zones[] placeholder for 03-2 must carry a non-empty "
f"mapper_fit_error string (FitError raised by map_mdx_to_slots); "
f"got {fit_err_msg!r}"
)
assert reject_zone.get("provisional") is True, (
f"debug zones[] placeholder for 03-2 must mirror the unit "
f"provisional state (reject override → provisional=True); got "
f"{reject_zone.get('provisional')!r}"
)
@pytest.mark.integration
def test_integration_default_path_no_override_no_regression(
tmp_path, monkeypatch
):
"""mdx03 default path (no override) must still run end-to-end. The u1
placeholder + u2 invariant guard must be no-ops when the mapper
succeeds for every unit (default mdx03 path; the FitError branch is
not entered).
"""
if not _SAMPLE_MDX_PATH.is_file():
pytest.skip(f"sample MDX not present: {_SAMPLE_MDX_PATH}")
monkeypatch.setattr(pz2, "RUNS_DIR", tmp_path / "runs")
monkeypatch.setattr(
step12_mod, "route_ai_fallback", _stub_router_short_circuit()
)
run_id = "imp86_u4_default_path_regression"
pz2.run_phase_z2_mvp1(_SAMPLE_MDX_PATH, run_id=run_id)
run_dir = tmp_path / "runs" / run_id / "phase_z2"
# Pre-Step 12 crash would surface as both artifacts missing; default
# path must produce both.
assert (run_dir / "steps" / "step12_ai_repair.json").is_file(), (
"step12_ai_repair.json missing on default mdx03 path — u1/u2 "
"must be no-ops when mapper succeeds for every unit."
)
assert (run_dir / "steps" / "step20_slide_status.json").is_file(), (
"step20_slide_status.json missing on default mdx03 path — "
"pipeline did not complete."
)