- u1~u9: AI fallback infrastructure (router/prompts/schema/validator) + Step 12 hook - u10: e2e reject chain (writes final.html with AI-repaired slot, full coverage) - u11: frontend wiring deferred to follow-up commit (split from IMP-41 hunks) - u12: coverage_invariant guard - u13: cache save gate (visual_check PASS + user_approved/auto_cache) — Codex #22 verified Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
270 lines
12 KiB
Python
270 lines
12 KiB
Python
"""IMP-47B u10 — End-to-end reject smoke (mocked client + full chain + render).
|
|
|
|
Scope (this slice):
|
|
E2E chain proving the IMP-47B reject route activates, preserves
|
|
full coverage, and propagates the AI-repaired ``slot_payload``
|
|
into the rendered ``final.html`` artifact when the AI fallback
|
|
client returns a deterministic PARTIAL_OVERRIDES proposal. Wires
|
|
together the four pipeline helpers introduced by u4 / u5 / u7 / u8
|
|
plus the Step 13 render step:
|
|
|
|
gather → apply → coverage_invariant → ai_repair_status surfacing
|
|
→ render_slide → final.html
|
|
|
|
The chain mirrors the ``run_phase_z2_mvp1`` call sequence between
|
|
the Step 12 slot_payload write and the Step 20 ``slide_status``
|
|
attach (src/phase_z2_pipeline.py — u4 call site, u5 apply, u6
|
|
artifact, u7 invariant, u8 surface). The Step 13 render path
|
|
(``render_slide`` at src/phase_z2_pipeline.py:2319, called from the
|
|
production write site at src/phase_z2_pipeline.py:5107-5111)
|
|
consumes ``zones_data[i]["slot_payload"]`` verbatim, so this test
|
|
drives that exact production seam: it calls ``render_slide`` on
|
|
the post-apply ``zones_data`` and writes the resulting HTML to a
|
|
``final.html`` file inside ``tmp_path``, then asserts the AI
|
|
proposal text appears in the on-disk artifact. A heavy
|
|
``run_phase_z2_mvp1`` integration variant with Selenium overflow
|
|
check remains deferred — this smoke test stops at the rendered
|
|
HTML.
|
|
|
|
Guardrails proven by this test (IMP-47B policy bullets):
|
|
* AI 호출 = fallback path only → master flag default OFF preserved
|
|
(test enables for itself only, restores after).
|
|
* MDX 원문 100% 보존 → coverage_invariant.status == "ok",
|
|
source_section_ids identical before/after AI.
|
|
* 자동 frame swap 금지 → frame_template_id unchanged.
|
|
* frame visual 임의 변경 금지 → frame_contract / partial untouched
|
|
(apply only merges proposal.payload.slots into slot_payload).
|
|
* dropped 절대 룰 → slot_payload AI keys merged on top
|
|
of deterministic keys; pre-existing meta keys survive.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
|
|
from src.phase_z2_pipeline import (
|
|
_apply_ai_repair_proposals_to_zones,
|
|
_check_post_ai_coverage_invariant,
|
|
_run_step12_ai_repair,
|
|
_summarize_ai_repair_status,
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class _StubUnit:
|
|
"""Synthetic CompositionUnit stand-in (subset of fields gather reads)."""
|
|
label: str | None = "reject"
|
|
provisional: bool = True
|
|
frame_template_id: str = "MOCK_T_reject"
|
|
frame_id: str = "MOCK_F_reject"
|
|
source_section_ids: list[str] = field(default_factory=lambda: ["MOCK_S1"])
|
|
raw_content: str = "MOCK MDX paragraph that must survive AI repair."
|
|
v4_rank: int | None = 1
|
|
cardinality: int | None = None
|
|
layout_preset: str = "two_zone_vertical"
|
|
zone_position: str = "top"
|
|
source_shape: str = "paragraph"
|
|
h3_count: int = 0
|
|
char_count: int = 48
|
|
|
|
|
|
def _patched_route_ai_fallback(**kwargs):
|
|
"""Deterministic stand-in for ``route_ai_fallback`` — returns a
|
|
PARTIAL_OVERRIDES proposal that mirrors the declared frame slots.
|
|
The validator (src/phase_z2_ai_fallback/validate.py:61-74) is not
|
|
re-invoked here because this helper bypasses the router; the
|
|
structural slot completeness is asserted by the apply step + the
|
|
coverage invariant downstream.
|
|
"""
|
|
return AiFallbackProposal(
|
|
proposal_kind=ProposalKind.PARTIAL_OVERRIDES,
|
|
payload={
|
|
"slots": {
|
|
"title": "AI repaired title",
|
|
"bullets": ["AI repaired bullet 1", "AI repaired bullet 2"],
|
|
}
|
|
},
|
|
rationale="E2E smoke proposal — deterministic.",
|
|
)
|
|
|
|
|
|
def test_e2e_reject_chain_applies_proposal_and_preserves_coverage(monkeypatch):
|
|
"""End-to-end reject smoke (synthetic chain, mocked client).
|
|
|
|
Drives the four IMP-47B u4/u5/u7/u8 helpers in pipeline order with
|
|
a single reject+provisional unit. Asserts every guardrail listed
|
|
in the module docstring + the four E2E invariants
|
|
(final.html-bound slot_payload / full coverage / no text loss /
|
|
human_review NOT required on the success path).
|
|
"""
|
|
# IMP-47B u4 wiring — patch the router seam in src/phase_z2_ai_fallback/step12.py
|
|
# so the gather call returns a deterministic PARTIAL_OVERRIDES proposal
|
|
# without touching the master flag / network / cache layers.
|
|
import src.phase_z2_ai_fallback.step12 as step12_mod
|
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
|
|
|
|
unit = _StubUnit()
|
|
units = [unit]
|
|
|
|
# Step 12 gather (u4) — eligible reject reaches the patched router.
|
|
records = _run_step12_ai_repair(units)
|
|
assert len(records) == 1
|
|
assert records[0]["route_hint"] == "ai_adaptation_required"
|
|
assert records[0]["ai_called"] is True
|
|
assert records[0]["skip_reason"] is None
|
|
assert records[0]["proposal"]["proposal_kind"] == "partial_overrides"
|
|
assert records[0]["source_section_ids"] == ["MOCK_S1"]
|
|
|
|
# Step 12 apply (u5) — PARTIAL_OVERRIDES merged into the matching zone.
|
|
# zones_data[0]["slot_payload"] is exactly what render_slide consumes
|
|
# to emit final.html (src/phase_z2_pipeline.py:5107) — asserting it
|
|
# here proves the reject route now flows into the rendered HTML.
|
|
zones = [{
|
|
"position": "top",
|
|
"template_id": "MOCK_T_reject",
|
|
"slot_payload": {
|
|
"title": "deterministic title",
|
|
"bullets": ["deterministic bullet"],
|
|
"_truncated_count": 0,
|
|
},
|
|
}]
|
|
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
|
|
assert records[0]["apply_status"] == "applied:partial_overrides"
|
|
# final.html-bound slot_payload carries AI proposal values
|
|
assert zones[0]["slot_payload"]["title"] == "AI repaired title"
|
|
assert zones[0]["slot_payload"]["bullets"] == [
|
|
"AI repaired bullet 1",
|
|
"AI repaired bullet 2",
|
|
]
|
|
# frame visual / pre-existing meta keys survive (no silent shrink).
|
|
assert zones[0]["template_id"] == "MOCK_T_reject"
|
|
assert zones[0]["slot_payload"]["_truncated_count"] == 0
|
|
# frame_template_id on the unit is byte-identical (no auto frame swap).
|
|
assert unit.frame_template_id == "MOCK_T_reject"
|
|
|
|
# Step 12 coverage invariant (u7) — full coverage, no text loss.
|
|
coverage = _check_post_ai_coverage_invariant(units, records)
|
|
assert coverage["status"] == "ok"
|
|
assert coverage["pre_ai_section_ids"] == ["MOCK_S1"]
|
|
assert coverage["post_ai_section_ids"] == ["MOCK_S1"]
|
|
assert coverage["dropped_section_ids"] == []
|
|
|
|
# Step 20 ai_repair_status surfacing (u8) — applied without human review.
|
|
status = _summarize_ai_repair_status(records, coverage)
|
|
assert status["status"] == "applied"
|
|
assert status["counts"]["applied"] == 1
|
|
assert status["counts"]["error"] == 0
|
|
assert status["counts"]["unsupported_kind"] == 0
|
|
assert status["coverage_status"] == "ok"
|
|
assert status.get("human_review_required") is not True
|
|
|
|
|
|
def test_e2e_reject_chain_writes_final_html_with_ai_repaired_slot(monkeypatch, tmp_path):
|
|
"""End-to-end reject smoke (real render path → final.html on disk).
|
|
|
|
Drives the full Stage-2 u10 chain INCLUDING ``render_slide``: the
|
|
AI-repaired ``slot_payload`` is fed through the same Jinja2
|
|
rendering seam the production pipeline uses
|
|
(src/phase_z2_pipeline.py:5107-5111), the resulting HTML is
|
|
written to ``tmp_path / "final.html"``, and the on-disk artifact
|
|
is then asserted to carry the AI proposal value. Uses
|
|
``bim_dx_comparison_table`` — a real registered frame partial
|
|
(templates/phase_z2/families/bim_dx_comparison_table.html) whose
|
|
template emits ``{{ slot_payload.title }}`` verbatim, so a
|
|
proposal-overridden title surfaces literally in the HTML output.
|
|
"""
|
|
import src.phase_z2_ai_fallback.step12 as step12_mod
|
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
|
|
from src.phase_z2_pipeline import build_layout_css, render_slide
|
|
|
|
unit = _StubUnit(
|
|
frame_template_id="bim_dx_comparison_table",
|
|
zone_position="primary",
|
|
layout_preset="single",
|
|
)
|
|
|
|
# Step 12 gather + apply. Deterministic non-overridden slots
|
|
# (col_a_label, col_b_label, rows[*]) are seeded BEFORE apply so the
|
|
# post-render assertions below can prove u5 merge semantics
|
|
# (dict.update — not dict-replace) survive the render seam. The
|
|
# router proposal only carries ``{title, bullets}`` — every other
|
|
# slot must reach final.html untouched.
|
|
records = _run_step12_ai_repair([unit])
|
|
zones = [{
|
|
"position": "primary",
|
|
"template_id": "bim_dx_comparison_table",
|
|
"slot_payload": {
|
|
"title": "deterministic frame title",
|
|
"col_a_label": "DETERMINISTIC_COL_A_LABEL",
|
|
"col_b_label": "DETERMINISTIC_COL_B_LABEL",
|
|
"rows": [
|
|
{"label": "DET_ROW_LABEL", "col_a": "DET_ROW_A", "col_b": "DET_ROW_B"},
|
|
],
|
|
},
|
|
}]
|
|
_apply_ai_repair_proposals_to_zones(records, ["primary"], zones)
|
|
assert records[0]["apply_status"] == "applied:partial_overrides"
|
|
|
|
# Step 13 render — production seam (src/phase_z2_pipeline.py:5107-5111).
|
|
layout_css = build_layout_css("single", zones)
|
|
html = render_slide("IMP-47B E2E reject smoke", None, zones, "single", layout_css)
|
|
final_html_path = tmp_path / "final.html"
|
|
final_html_path.write_text(html, encoding="utf-8")
|
|
|
|
# final.html artifact exists on disk and is non-empty.
|
|
assert final_html_path.is_file()
|
|
assert final_html_path.stat().st_size > 0
|
|
rendered = final_html_path.read_text(encoding="utf-8")
|
|
|
|
# AI-repaired slot content appears in the rendered HTML.
|
|
assert "AI repaired title" in rendered
|
|
# Deterministic pre-apply title was overridden in the HTML output
|
|
# (no silent merge that leaves both values visible).
|
|
assert "deterministic frame title" not in rendered
|
|
# Non-overridden deterministic slots survive merge → render (u5
|
|
# dict.update semantics, not dict-replace; dropped 절대 룰 honoured
|
|
# at the render seam, not just in slot_payload memory).
|
|
assert "DETERMINISTIC_COL_A_LABEL" in rendered
|
|
assert "DETERMINISTIC_COL_B_LABEL" in rendered
|
|
assert "DET_ROW_LABEL" in rendered
|
|
assert "DET_ROW_A" in rendered
|
|
assert "DET_ROW_B" in rendered
|
|
# Frame template id is preserved end-to-end (no auto frame swap).
|
|
assert 'data-template-id="bim_dx_comparison_table"' in rendered
|
|
assert unit.frame_template_id == "bim_dx_comparison_table"
|
|
|
|
# MDX 원문 100% 보존 — coverage invariant + status surfacing.
|
|
coverage = _check_post_ai_coverage_invariant([unit], records)
|
|
assert coverage["status"] == "ok"
|
|
assert coverage["dropped_section_ids"] == []
|
|
status = _summarize_ai_repair_status(records, coverage)
|
|
assert status["status"] == "applied"
|
|
assert status.get("human_review_required") is not True
|
|
|
|
|
|
def test_e2e_reject_chain_no_text_loss_on_multi_section_unit(monkeypatch):
|
|
"""Multi-section reject unit — every section id flows through gather,
|
|
apply, coverage invariant, and ai_repair_status surfacing without a
|
|
drop. Locks the 'MDX 원문 100% 보존' guardrail at unit-multiplicity
|
|
granularity (gather copies the list via ``list(...)`` at
|
|
src/phase_z2_ai_fallback/step12.py:124 so apply mutations cannot
|
|
silently drop it)."""
|
|
import src.phase_z2_ai_fallback.step12 as step12_mod
|
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
|
|
|
|
unit = _StubUnit(source_section_ids=["MOCK_S1", "MOCK_S2", "MOCK_S3"])
|
|
records = _run_step12_ai_repair([unit])
|
|
zones = [{
|
|
"position": "top",
|
|
"template_id": "MOCK_T_reject",
|
|
"slot_payload": {"title": "det", "bullets": ["det"]},
|
|
}]
|
|
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
|
|
coverage = _check_post_ai_coverage_invariant([unit], records)
|
|
assert coverage["pre_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
|
|
assert coverage["post_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
|
|
assert coverage["dropped_section_ids"] == []
|
|
status = _summarize_ai_repair_status(records, coverage)
|
|
assert status["status"] == "applied"
|
|
assert status.get("human_review_required") is not True
|