From c53722ad0b31c0200d7a59c0ed934e977efbc111 Mon Sep 17 00:00:00 2001 From: kyeongmin Date: Sat, 23 May 2026 18:25:14 +0900 Subject: [PATCH] feat(#86): IMP-86 u1~u5 placeholder zones_data + invariant guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mapper FitError handler now appends a __empty__ placeholder to zones_data and a matching debug_zone so the surviving cardinality stays in sync with the active layout preset's grid rows. A pre-build_layout_css invariant guard fails fast with preset/positions/count diagnostics if drift recurs. Per-record telemetry (adapter_needed, mapper_fit_error, provisional) is exposed on both placeholder records; authoritative slide_status.adapter_ needed_units schema is unchanged. Closes mdx03 reject override regression: Step 12 AI router now reachable without heights_px ValueError; default-path behavior unaffected. u1 — FitError placeholder zones_data + debug_zone (src/phase_z2_pipeline.py) u2 — pre-build_layout_css invariant guard (src/phase_z2_pipeline.py) u3 — horizontal-2 normal+placeholder helper unit (test_compute_per_zone_geometry.py) u4 — mdx03 reject override → Step 12 integration + default regression u5 — placeholder telemetry surface (adapter_needed/mapper_fit_error/provisional) Tests: - u3 helper: 7 passed (0.06s) - u4+u5 integration: 2 passed (7.87s) - Phase Z2 + AI fallback regression: 544 passed (66.28s) - Broader sweep (excl. matching/pipeline heavy): 1066 passed (96.12s) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/phase_z2_pipeline.py | 106 +++++++- .../test_compute_per_zone_geometry.py | 60 +++++ ...test_reject_override_geometry_invariant.py | 232 ++++++++++++++++++ 3 files changed, 397 insertions(+), 1 deletion(-) create mode 100644 tests/phase_z2_ai_fallback/test_reject_override_geometry_invariant.py diff --git a/src/phase_z2_pipeline.py b/src/phase_z2_pipeline.py index 98133d2..20460ab 100644 --- a/src/phase_z2_pipeline.py +++ b/src/phase_z2_pipeline.py @@ -4434,15 +4434,87 @@ def run_phase_z2_mvp1( try: slot_payload = map_mdx_to_slots(synth_section, unit.frame_template_id) except FitError as e: + _fit_error_str = str(e) + _unit_provisional = bool(getattr(unit, "provisional", False)) adapter_record = { "position": position, "source_section_ids": unit.source_section_ids, "merge_type": unit.merge_type, "template_id": unit.frame_template_id, "reason": "fit_error", - "fit_error": str(e), + "fit_error": _fit_error_str, } adapter_needed_units.append(adapter_record) + # IMP-86 u1 — placeholder zones_data + debug_zone keep the failed + # unit's preset position so downstream build_layout_css / + # _compute_per_zone_geometry observe len(zones_data) == active + # layout preset's css_areas rows (R). Without this both arrays + # drift relative to R, raising + # ValueError("heights_px length N != grid rows R=M") at + # _compute_per_zone_geometry. Mirrors the IMP-30 empty-shell + # pattern: zones_data uses template_id="__empty__" so render_slide + # short-circuits to empty partial_html; debug_zone keeps the + # original V4 evidence and adapter_needed_units stays the + # authoritative adapter signal. + # IMP-86 u5 — per-record telemetry. adapter_needed=True + + # mapper_fit_error= + provisional mirror the + # adapter signal directly on each placeholder record so + # debug.json / final.html consumers can identify the adapter + # contract surface from the zones array alone, without joining + # against slide_status.adapter_needed_units. adapter_needed_units + # itself is unchanged (still the authoritative per-slide list). + _placeholder_popup = compose_zone_popup_payload(unit, 0) + zones_data.append({ + "position": position, + "template_id": "__empty__", + "slot_payload": {}, + "content_weight": {"score": 0}, + "min_height_px": min_height_px, + "assignment_source": "imp86_u1_adapter_needed", + "section_assignment_override": False, + "provisional": _unit_provisional, + "adapter_needed": True, + "mapper_fit_error": _fit_error_str, + **_placeholder_popup, + }) + debug_zones.append({ + "position": position, + "source_section_ids": list(unit.source_section_ids), + "merge_type": unit.merge_type, + "title": unit.title, + "v4_rank1_frame_id": unit.frame_id, + "v4_rank1_frame_number": unit.frame_number, + "v4_template_id": unit.frame_template_id, + "v4_label": unit.label, + "v4_confidence": float(unit.confidence or 0.0), + "v4_selected_rank": unit.v4_rank, + "selection_path": unit.selection_path, + "fallback_reason": unit.fallback_reason, + "fallback_used": bool(unit.selection_path and "fallback" in unit.selection_path), + "phase_z_status": unit.phase_z_status, + "composition_score": float(unit.score or 0.0), + "composition_rationale": dict(unit.rationale or {}), + "composition_notes": list(unit.notes), + "mapper_type": "adapter_needed", + "contract_id": unit.frame_template_id, + "contract_frame_id": contract_frame_id, + "builder": builder_name, + "min_height_px": min_height_px, + "slot_payload_keys": [], + "content_truncated_count": None, + "assets_dir": None, + "content_weight": {"score": 0}, + "placement_trace": placement_trace, + "assignment_source": "imp86_u1_adapter_needed", + "section_assignment_override": False, + "replaced_auto_unit": None, + "skipped_collided_auto_units": [], + "uncovered_section_ids": [], + "skipped_reason": "imp86_u1_adapter_needed_mapper_fit_error", + "provisional": _unit_provisional, + "adapter_needed": True, + "mapper_fit_error": _fit_error_str, + }) print(f" adapter : zone--{position} {unit.source_section_ids} → " f"{unit.frame_template_id} FitError → adapter_needed (skip render)") continue @@ -4935,6 +5007,38 @@ def run_phase_z2_mvp1( # 6. Build layout CSS — horizontal-2 = dynamic heights (regression preserve), 그 외 = fr default. # Step D-ext : override_zone_geometries 가 들어오면 layout_css 강제. + # IMP-86 u2 — pre-build layout invariant guard. zones_data / debug_zones + # MUST be cardinality- and position-aligned with the active layout + # preset's css_areas tokens before build_layout_css derives heights_px + # / widths_px and _compute_per_zone_geometry validates them against R/C. + # If a mapper FitError path (IMP-86 u1) — or any future zone-drop path — + # forgets to append a placeholder, the resulting shape drift would + # surface as a confusing `heights_px length N != grid rows R=M` + # ValueError deep inside the geometry helper. This guard fails fast at + # the pipeline boundary with preset / expected positions / actual + # positions / count diagnostics so the root cause is obvious from the + # log line (factual_verification: value + path + upstream). + _active_preset = LAYOUT_PRESETS[layout_preset] + _expected_positions = _parse_css_areas(_active_preset["css_areas"])[1] + _actual_positions = [zd["position"] for zd in zones_data] + _debug_positions = [dz["position"] for dz in debug_zones] + if ( + len(zones_data) != len(_expected_positions) + or len(debug_zones) != len(_expected_positions) + or sorted(_actual_positions) != sorted(_expected_positions) + or sorted(_debug_positions) != sorted(_expected_positions) + ): + raise ValueError( + "phase_z2_pipeline pre-build layout invariant violation: " + f"layout_preset={layout_preset!r} " + f"css_areas={_active_preset['css_areas']!r} " + f"expected_positions={_expected_positions!r} " + f"zones_data_positions={_actual_positions!r} " + f"debug_zones_positions={_debug_positions!r} " + f"zones_count={len(zones_data)} " + f"debug_count={len(debug_zones)} " + f"expected_count={len(_expected_positions)}" + ) layout_css = build_layout_css( layout_preset, zones_data, override_zone_geometries=override_zone_geometries ) diff --git a/tests/phase_z2/test_compute_per_zone_geometry.py b/tests/phase_z2/test_compute_per_zone_geometry.py index f14709d..5fcdb82 100644 --- a/tests/phase_z2/test_compute_per_zone_geometry.py +++ b/tests/phase_z2/test_compute_per_zone_geometry.py @@ -99,3 +99,63 @@ def test_fr_default_single_returns_full_body(): per_zone = _compute_per_zone_geometry(layout_css, debug_zones, GRID_GAP) assert per_zone[0]["zone_height_px"] == SLIDE_BODY_HEIGHT assert per_zone[0]["zone_width_px"] == SLIDE_BODY_WIDTH + + +def _placeholder_zone(position: str) -> dict: + # IMP-86 u3 — mirror the u1 mapper-FitError placeholder zones_data + # record shape (`src/phase_z2_pipeline.py:4459-4469`) used to keep the + # failed unit's preset position in zones_data so build_layout_css / + # _compute_per_zone_geometry observe len(zones_data) == active preset's + # css_areas rows (R). content_weight.score == 0 ensures the placeholder + # does not steal weight from the surviving normal zone. + return { + "position": position, + "template_id": "__empty__", + "slot_payload": {}, + "content_weight": {"score": 0}, + "min_height_px": 100, + "assignment_source": "imp86_u1_adapter_needed", + "section_assignment_override": False, + "provisional": False, + } + + +def test_horizontal_2_normal_plus_placeholder_preserves_R2_cardinality(): + """IMP-86 u3 — horizontal-2 with one mapper-success zone + one + mapper-FitError placeholder (per IMP-86 u1) must keep heights_px / + debug_zones / per_zone cardinality locked at R=2. + + Reproduces the bug scenario in the issue body (mdx03 reject override + where 03-2 hits adapter_needed) at the helper level: if the FitError + path forgets to append a placeholder, heights_px length 1 vs R=2 + raises ValueError at `_compute_per_zone_geometry`. With the u1 + placeholder, all three artifacts (heights_px, debug_zones, per_zone) + stay at length 2 and the geometry helper succeeds. + """ + zones = [_zone("top", 1.0), _placeholder_zone("bottom")] + layout_css = build_layout_css("horizontal-2", zones) + debug_zones = [{"position": "top"}, {"position": "bottom"}] + + # heights_px length is locked to R=2 (parsed from preset css_areas). + assert layout_css["areas"] == '"top" "bottom"' + assert len(layout_css["heights_px"]) == 2 + # widths_px length is locked to C=1. + assert len(layout_css["widths_px"]) == 1 + + # Placeholder zone (score=0) gets its min_height_px (100) and the + # surviving normal zone absorbs the remaining body height after gap. + assert layout_css["heights_px"][1] == 100 + assert ( + layout_css["heights_px"][0] + layout_css["heights_px"][1] + GRID_GAP + == SLIDE_BODY_HEIGHT + ) + + per_zone = _compute_per_zone_geometry(layout_css, debug_zones, GRID_GAP) + # per_zone cardinality matches debug_zones (which matches R=2). + assert len(per_zone) == 2 + assert [pz["position"] for pz in per_zone] == ["top", "bottom"] + assert per_zone[0]["zone_height_px"] == layout_css["heights_px"][0] + assert per_zone[1]["zone_height_px"] == layout_css["heights_px"][1] + # Both zones share the single column => width == SLIDE_BODY_WIDTH. + assert per_zone[0]["zone_width_px"] == SLIDE_BODY_WIDTH + assert per_zone[1]["zone_width_px"] == SLIDE_BODY_WIDTH diff --git a/tests/phase_z2_ai_fallback/test_reject_override_geometry_invariant.py b/tests/phase_z2_ai_fallback/test_reject_override_geometry_invariant.py new file mode 100644 index 0000000..a6b8d88 --- /dev/null +++ b/tests/phase_z2_ai_fallback/test_reject_override_geometry_invariant.py @@ -0,0 +1,232 @@ +"""IMP-86 u4 — integration: mdx03 reject override reaches Step 12 AI audit +without heights_px ValueError; default-path regression. + +Locks the Stage 2 guardrails from the IMP-86 issue body. Pre-u1, the mapper +``FitError`` handler appended only to ``adapter_needed_units`` and skipped +``zones_data`` / ``debug_zones`` — leaving them at ``len=1`` while +``build_layout_css("horizontal-2", ...)`` still returned ``R=2``. The +``_compute_per_zone_geometry`` invariant then raised +``ValueError("heights_px length 1 != grid rows R=2")`` BEFORE Step 12 +fired, so the AI router was never reached and ``step12_ai_repair.json`` +was never written. Post u1+u2 : + + * u1 placeholder zone keeps cardinality at R=2; + * u2 pre-build invariant guard surfaces drift before + ``_compute_per_zone_geometry`` raises; + * Step 12 AI router is reached and the audit artifact is produced; + * ``debug.json`` carries the adapter_needed marker (existing channel — + u5 will add finer per-record telemetry). + +AI router is patched to a deterministic ``None``-returning stub so the +integration coverage proves the wiring without any network / API / model +dependency (``feedback_ai_isolation_contract`` — AI = fallback only). +The pair ``(ai_called=False, skip_reason='router_short_circuit')`` is +the gather-side surface (``src/phase_z2_ai_fallback/step12.py:210-213``) +for ``route_ai_fallback → None`` and is the explicit "router reached and +returned None" signal — distinct from earlier-gate skips +(``not_provisional`` / ``route_not_ai_adaptation:``) which would +indicate the router was NOT reached. +""" +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +import src.phase_z2_ai_fallback.step12 as step12_mod +import src.phase_z2_pipeline as pz2 + + +_REPO_ROOT = Path(__file__).resolve().parents[2] +_SAMPLE_MDX_PATH = _REPO_ROOT / "samples" / "mdx_batch" / "03.mdx" + + +def _stub_router_short_circuit() -> MagicMock: + """AI-router stand-in returning ``None``. + + Gather records ``(ai_called=False, skip_reason='router_short_circuit')`` + per ``src/phase_z2_ai_fallback/step12.py:210-213`` when the router + returns ``None``. Asserting that pair on the reject record proves the + router was actually invoked — vs. the earlier ``not_provisional`` or + ``route_not_ai_adaptation:`` skips which would indicate the + router was NEVER reached (the IMP-86 pre-fix failure mode, which + manifested as a ``ValueError`` crash before Step 12 even fired). + """ + return MagicMock(return_value=None) + + +@pytest.mark.integration +def test_integration_reject_override_reaches_step12_without_value_error( + tmp_path, monkeypatch +): + """mdx03 + ``--override-frame 03-2=bim_dx_comparison_table`` must reach + Step 12 AI router without raising ``heights_px`` ValueError, and the + Step 12 audit artifact must reflect router reach on the provisional + reject unit. + """ + if not _SAMPLE_MDX_PATH.is_file(): + pytest.skip(f"sample MDX not present: {_SAMPLE_MDX_PATH}") + + monkeypatch.setattr(pz2, "RUNS_DIR", tmp_path / "runs") + router = _stub_router_short_circuit() + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + + run_id = "imp86_u4_reject_override_integration" + pz2.run_phase_z2_mvp1( + _SAMPLE_MDX_PATH, + run_id=run_id, + override_frames={"03-2": "bim_dx_comparison_table"}, + ) + + run_dir = tmp_path / "runs" / run_id / "phase_z2" + + # Guardrail 1 — step12_ai_repair.json present. Pre-IMP-86, the + # pipeline crashed at _compute_per_zone_geometry before Step 12 fired + # so this artifact was MISSING under reject override (issue body + # primary symptom: "AI 호출 0 / step12_ai_repair.json 미생성"). + step12_path = run_dir / "steps" / "step12_ai_repair.json" + assert step12_path.is_file(), ( + f"step12_ai_repair.json missing at {step12_path} — pre-IMP-86 " + "pipeline crashed at _compute_per_zone_geometry ValueError " + "before Step 12 fired." + ) + step12_data = json.loads(step12_path.read_text(encoding="utf-8")) + per_unit = step12_data["data"]["per_unit"] + assert len(per_unit) >= 1, "step12 per_unit must contain at least one record" + + # Guardrail 2 — provisional reject unit reached the AI router. + # _apply_frame_override_to_unit (src/phase_z2_pipeline.py:1199-1208) + # promotes the unit to provisional + label='reject' when the override + # target matches a V4 reject judgment; the IMP-47B u1 route map then + # sends reject → 'ai_adaptation_required'. + reject_records = [ + r for r in per_unit if r.get("source_section_ids") == ["03-2"] + ] + assert len(reject_records) == 1, ( + f"expected exactly one per_unit record for 03-2; got {reject_records}" + ) + reject = reject_records[0] + assert reject["provisional"] is True, ( + "03-2 unit must be provisional after reject override per " + "_apply_frame_override_to_unit reject-judgment promotion" + ) + assert reject["route_hint"] == "ai_adaptation_required", ( + f"03-2 route_hint must be 'ai_adaptation_required' " + f"(IMP-47B u1 reject→AI map); got {reject['route_hint']}" + ) + # The stub router returned None → gather records router_short_circuit. + # This pair is the explicit "router reached" surface; the alternative + # skips (not_provisional / route_not_ai_adaptation:*) would mean the + # router was NEVER called — the IMP-86 pre-fix failure mode. + assert reject["skip_reason"] == "router_short_circuit", ( + f"03-2 must reach the (stubbed) AI router and record " + f"router_short_circuit; got skip_reason={reject['skip_reason']}, " + f"error={reject.get('error')}" + ) + assert reject["ai_called"] is False + assert reject["error"] is None + router.assert_called() + + # Guardrail 3 — pipeline reached Step 20 (no heights_px ValueError). + # The pre-IMP-86 crash happened BEFORE the Step 7 layout artifact, so + # the Step 20 final-status artifact is the strongest "pipeline did + # not crash mid-flight" signal. + step20_path = run_dir / "steps" / "step20_slide_status.json" + assert step20_path.is_file(), ( + f"step20_slide_status.json missing at {step20_path} — pipeline " + "did not reach final step (likely crashed mid-pipeline)." + ) + + # Guardrail 4 — debug.json adapter_needed marker. u1 placeholder + # writes a parallel zones_data + debug_zone record, but + # adapter_needed_units stays the authoritative adapter signal for + # debug.json consumers. u5 adds finer per-placeholder telemetry + # (adapter_needed=True / mapper_fit_error) on the placeholder records + # themselves (asserted in Guardrail 5 below). + # write_debug_json (src/phase_z2_pipeline.py:3254-3292) nests the + # slide_status payload from compute_slide_status (src/phase_z2_pipeline.py:2939-3128) + # under the top-level "slide_status" key, so adapter_needed_count and + # adapter_needed_units are addressed via debug["slide_status"][...] + # — not at the top level. + debug_path = run_dir / "debug.json" + assert debug_path.is_file(), f"debug.json missing at {debug_path}" + debug = json.loads(debug_path.read_text(encoding="utf-8")) + slide_status = debug.get("slide_status") or {} + assert slide_status.get("adapter_needed_count", 0) >= 1, ( + f"debug.json slide_status.adapter_needed_count must reflect the " + f"FitError on 03-2; got {slide_status.get('adapter_needed_count')}" + ) + adapter_units = slide_status.get("adapter_needed_units") or [] + assert any( + "03-2" in (a.get("source_section_ids") or []) for a in adapter_units + ), ( + f"slide_status.adapter_needed_units must include the 03-2 reject " + f"override unit; got {adapter_units}" + ) + + # Guardrail 5 (IMP-86 u5) — per-placeholder telemetry on debug.json + # zones[] entry. Source: src/phase_z2_pipeline.py FitError handler + # appends adapter_needed=True + mapper_fit_error= + provisional + # on the placeholder debug_zone record so consumers reading the zones + # array alone (without joining against slide_status) can identify the + # adapter contract surface. adapter_needed_units stays the + # authoritative per-slide list (preserved by Guardrail 4 above). + debug_zones = debug.get("zones") or [] + reject_zones = [ + z for z in debug_zones if list(z.get("source_section_ids") or []) == ["03-2"] + ] + assert len(reject_zones) == 1, ( + f"expected exactly one debug zones[] record for 03-2 (placeholder " + f"from FitError handler); got {reject_zones}" + ) + reject_zone = reject_zones[0] + assert reject_zone.get("adapter_needed") is True, ( + f"debug zones[] placeholder for 03-2 must set adapter_needed=True; " + f"got {reject_zone.get('adapter_needed')!r}" + ) + fit_err_msg = reject_zone.get("mapper_fit_error") + assert isinstance(fit_err_msg, str) and fit_err_msg, ( + f"debug zones[] placeholder for 03-2 must carry a non-empty " + f"mapper_fit_error string (FitError raised by map_mdx_to_slots); " + f"got {fit_err_msg!r}" + ) + assert reject_zone.get("provisional") is True, ( + f"debug zones[] placeholder for 03-2 must mirror the unit " + f"provisional state (reject override → provisional=True); got " + f"{reject_zone.get('provisional')!r}" + ) + + +@pytest.mark.integration +def test_integration_default_path_no_override_no_regression( + tmp_path, monkeypatch +): + """mdx03 default path (no override) must still run end-to-end. The u1 + placeholder + u2 invariant guard must be no-ops when the mapper + succeeds for every unit (default mdx03 path; the FitError branch is + not entered). + """ + if not _SAMPLE_MDX_PATH.is_file(): + pytest.skip(f"sample MDX not present: {_SAMPLE_MDX_PATH}") + + monkeypatch.setattr(pz2, "RUNS_DIR", tmp_path / "runs") + monkeypatch.setattr( + step12_mod, "route_ai_fallback", _stub_router_short_circuit() + ) + + run_id = "imp86_u4_default_path_regression" + pz2.run_phase_z2_mvp1(_SAMPLE_MDX_PATH, run_id=run_id) + + run_dir = tmp_path / "runs" / run_id / "phase_z2" + # Pre-Step 12 crash would surface as both artifacts missing; default + # path must produce both. + assert (run_dir / "steps" / "step12_ai_repair.json").is_file(), ( + "step12_ai_repair.json missing on default mdx03 path — u1/u2 " + "must be no-ops when mapper succeeds for every unit." + ) + assert (run_dir / "steps" / "step20_slide_status.json").is_file(), ( + "step20_slide_status.json missing on default mdx03 path — " + "pipeline did not complete." + )