Files
C.E.L_Slide_test2/tests/test_imp47b_end_to_end.py
kyeongmin 1186ad8ae2 feat(#76): IMP-47B reject-as-AI-adaptation activation (u1~u13 backend + tests)
- u1~u9: AI fallback infrastructure (router/prompts/schema/validator) + Step 12 hook
- u10: e2e reject chain (writes final.html with AI-repaired slot, full coverage)
- u11: frontend wiring deferred to follow-up commit (split from IMP-41 hunks)
- u12: coverage_invariant guard
- u13: cache save gate (visual_check PASS + user_approved/auto_cache) — Codex #22 verified

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 00:19:10 +09:00

270 lines
12 KiB
Python

"""IMP-47B u10 — End-to-end reject smoke (mocked client + full chain + render).
Scope (this slice):
E2E chain proving the IMP-47B reject route activates, preserves
full coverage, and propagates the AI-repaired ``slot_payload``
into the rendered ``final.html`` artifact when the AI fallback
client returns a deterministic PARTIAL_OVERRIDES proposal. Wires
together the four pipeline helpers introduced by u4 / u5 / u7 / u8
plus the Step 13 render step:
gather → apply → coverage_invariant → ai_repair_status surfacing
→ render_slide → final.html
The chain mirrors the ``run_phase_z2_mvp1`` call sequence between
the Step 12 slot_payload write and the Step 20 ``slide_status``
attach (src/phase_z2_pipeline.py — u4 call site, u5 apply, u6
artifact, u7 invariant, u8 surface). The Step 13 render path
(``render_slide`` at src/phase_z2_pipeline.py:2319, called from the
production write site at src/phase_z2_pipeline.py:5107-5111)
consumes ``zones_data[i]["slot_payload"]`` verbatim, so this test
drives that exact production seam: it calls ``render_slide`` on
the post-apply ``zones_data`` and writes the resulting HTML to a
``final.html`` file inside ``tmp_path``, then asserts the AI
proposal text appears in the on-disk artifact. A heavy
``run_phase_z2_mvp1`` integration variant with Selenium overflow
check remains deferred — this smoke test stops at the rendered
HTML.
Guardrails proven by this test (IMP-47B policy bullets):
* AI 호출 = fallback path only → master flag default OFF preserved
(test enables for itself only, restores after).
* MDX 원문 100% 보존 → coverage_invariant.status == "ok",
source_section_ids identical before/after AI.
* 자동 frame swap 금지 → frame_template_id unchanged.
* frame visual 임의 변경 금지 → frame_contract / partial untouched
(apply only merges proposal.payload.slots into slot_payload).
* dropped 절대 룰 → slot_payload AI keys merged on top
of deterministic keys; pre-existing meta keys survive.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
from src.phase_z2_pipeline import (
_apply_ai_repair_proposals_to_zones,
_check_post_ai_coverage_invariant,
_run_step12_ai_repair,
_summarize_ai_repair_status,
)
@dataclass
class _StubUnit:
"""Synthetic CompositionUnit stand-in (subset of fields gather reads)."""
label: str | None = "reject"
provisional: bool = True
frame_template_id: str = "MOCK_T_reject"
frame_id: str = "MOCK_F_reject"
source_section_ids: list[str] = field(default_factory=lambda: ["MOCK_S1"])
raw_content: str = "MOCK MDX paragraph that must survive AI repair."
v4_rank: int | None = 1
cardinality: int | None = None
layout_preset: str = "two_zone_vertical"
zone_position: str = "top"
source_shape: str = "paragraph"
h3_count: int = 0
char_count: int = 48
def _patched_route_ai_fallback(**kwargs):
"""Deterministic stand-in for ``route_ai_fallback`` — returns a
PARTIAL_OVERRIDES proposal that mirrors the declared frame slots.
The validator (src/phase_z2_ai_fallback/validate.py:61-74) is not
re-invoked here because this helper bypasses the router; the
structural slot completeness is asserted by the apply step + the
coverage invariant downstream.
"""
return AiFallbackProposal(
proposal_kind=ProposalKind.PARTIAL_OVERRIDES,
payload={
"slots": {
"title": "AI repaired title",
"bullets": ["AI repaired bullet 1", "AI repaired bullet 2"],
}
},
rationale="E2E smoke proposal — deterministic.",
)
def test_e2e_reject_chain_applies_proposal_and_preserves_coverage(monkeypatch):
"""End-to-end reject smoke (synthetic chain, mocked client).
Drives the four IMP-47B u4/u5/u7/u8 helpers in pipeline order with
a single reject+provisional unit. Asserts every guardrail listed
in the module docstring + the four E2E invariants
(final.html-bound slot_payload / full coverage / no text loss /
human_review NOT required on the success path).
"""
# IMP-47B u4 wiring — patch the router seam in src/phase_z2_ai_fallback/step12.py
# so the gather call returns a deterministic PARTIAL_OVERRIDES proposal
# without touching the master flag / network / cache layers.
import src.phase_z2_ai_fallback.step12 as step12_mod
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
unit = _StubUnit()
units = [unit]
# Step 12 gather (u4) — eligible reject reaches the patched router.
records = _run_step12_ai_repair(units)
assert len(records) == 1
assert records[0]["route_hint"] == "ai_adaptation_required"
assert records[0]["ai_called"] is True
assert records[0]["skip_reason"] is None
assert records[0]["proposal"]["proposal_kind"] == "partial_overrides"
assert records[0]["source_section_ids"] == ["MOCK_S1"]
# Step 12 apply (u5) — PARTIAL_OVERRIDES merged into the matching zone.
# zones_data[0]["slot_payload"] is exactly what render_slide consumes
# to emit final.html (src/phase_z2_pipeline.py:5107) — asserting it
# here proves the reject route now flows into the rendered HTML.
zones = [{
"position": "top",
"template_id": "MOCK_T_reject",
"slot_payload": {
"title": "deterministic title",
"bullets": ["deterministic bullet"],
"_truncated_count": 0,
},
}]
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
assert records[0]["apply_status"] == "applied:partial_overrides"
# final.html-bound slot_payload carries AI proposal values
assert zones[0]["slot_payload"]["title"] == "AI repaired title"
assert zones[0]["slot_payload"]["bullets"] == [
"AI repaired bullet 1",
"AI repaired bullet 2",
]
# frame visual / pre-existing meta keys survive (no silent shrink).
assert zones[0]["template_id"] == "MOCK_T_reject"
assert zones[0]["slot_payload"]["_truncated_count"] == 0
# frame_template_id on the unit is byte-identical (no auto frame swap).
assert unit.frame_template_id == "MOCK_T_reject"
# Step 12 coverage invariant (u7) — full coverage, no text loss.
coverage = _check_post_ai_coverage_invariant(units, records)
assert coverage["status"] == "ok"
assert coverage["pre_ai_section_ids"] == ["MOCK_S1"]
assert coverage["post_ai_section_ids"] == ["MOCK_S1"]
assert coverage["dropped_section_ids"] == []
# Step 20 ai_repair_status surfacing (u8) — applied without human review.
status = _summarize_ai_repair_status(records, coverage)
assert status["status"] == "applied"
assert status["counts"]["applied"] == 1
assert status["counts"]["error"] == 0
assert status["counts"]["unsupported_kind"] == 0
assert status["coverage_status"] == "ok"
assert status.get("human_review_required") is not True
def test_e2e_reject_chain_writes_final_html_with_ai_repaired_slot(monkeypatch, tmp_path):
"""End-to-end reject smoke (real render path → final.html on disk).
Drives the full Stage-2 u10 chain INCLUDING ``render_slide``: the
AI-repaired ``slot_payload`` is fed through the same Jinja2
rendering seam the production pipeline uses
(src/phase_z2_pipeline.py:5107-5111), the resulting HTML is
written to ``tmp_path / "final.html"``, and the on-disk artifact
is then asserted to carry the AI proposal value. Uses
``bim_dx_comparison_table`` — a real registered frame partial
(templates/phase_z2/families/bim_dx_comparison_table.html) whose
template emits ``{{ slot_payload.title }}`` verbatim, so a
proposal-overridden title surfaces literally in the HTML output.
"""
import src.phase_z2_ai_fallback.step12 as step12_mod
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
from src.phase_z2_pipeline import build_layout_css, render_slide
unit = _StubUnit(
frame_template_id="bim_dx_comparison_table",
zone_position="primary",
layout_preset="single",
)
# Step 12 gather + apply. Deterministic non-overridden slots
# (col_a_label, col_b_label, rows[*]) are seeded BEFORE apply so the
# post-render assertions below can prove u5 merge semantics
# (dict.update — not dict-replace) survive the render seam. The
# router proposal only carries ``{title, bullets}`` — every other
# slot must reach final.html untouched.
records = _run_step12_ai_repair([unit])
zones = [{
"position": "primary",
"template_id": "bim_dx_comparison_table",
"slot_payload": {
"title": "deterministic frame title",
"col_a_label": "DETERMINISTIC_COL_A_LABEL",
"col_b_label": "DETERMINISTIC_COL_B_LABEL",
"rows": [
{"label": "DET_ROW_LABEL", "col_a": "DET_ROW_A", "col_b": "DET_ROW_B"},
],
},
}]
_apply_ai_repair_proposals_to_zones(records, ["primary"], zones)
assert records[0]["apply_status"] == "applied:partial_overrides"
# Step 13 render — production seam (src/phase_z2_pipeline.py:5107-5111).
layout_css = build_layout_css("single", zones)
html = render_slide("IMP-47B E2E reject smoke", None, zones, "single", layout_css)
final_html_path = tmp_path / "final.html"
final_html_path.write_text(html, encoding="utf-8")
# final.html artifact exists on disk and is non-empty.
assert final_html_path.is_file()
assert final_html_path.stat().st_size > 0
rendered = final_html_path.read_text(encoding="utf-8")
# AI-repaired slot content appears in the rendered HTML.
assert "AI repaired title" in rendered
# Deterministic pre-apply title was overridden in the HTML output
# (no silent merge that leaves both values visible).
assert "deterministic frame title" not in rendered
# Non-overridden deterministic slots survive merge → render (u5
# dict.update semantics, not dict-replace; dropped 절대 룰 honoured
# at the render seam, not just in slot_payload memory).
assert "DETERMINISTIC_COL_A_LABEL" in rendered
assert "DETERMINISTIC_COL_B_LABEL" in rendered
assert "DET_ROW_LABEL" in rendered
assert "DET_ROW_A" in rendered
assert "DET_ROW_B" in rendered
# Frame template id is preserved end-to-end (no auto frame swap).
assert 'data-template-id="bim_dx_comparison_table"' in rendered
assert unit.frame_template_id == "bim_dx_comparison_table"
# MDX 원문 100% 보존 — coverage invariant + status surfacing.
coverage = _check_post_ai_coverage_invariant([unit], records)
assert coverage["status"] == "ok"
assert coverage["dropped_section_ids"] == []
status = _summarize_ai_repair_status(records, coverage)
assert status["status"] == "applied"
assert status.get("human_review_required") is not True
def test_e2e_reject_chain_no_text_loss_on_multi_section_unit(monkeypatch):
"""Multi-section reject unit — every section id flows through gather,
apply, coverage invariant, and ai_repair_status surfacing without a
drop. Locks the 'MDX 원문 100% 보존' guardrail at unit-multiplicity
granularity (gather copies the list via ``list(...)`` at
src/phase_z2_ai_fallback/step12.py:124 so apply mutations cannot
silently drop it)."""
import src.phase_z2_ai_fallback.step12 as step12_mod
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
unit = _StubUnit(source_section_ids=["MOCK_S1", "MOCK_S2", "MOCK_S3"])
records = _run_step12_ai_repair([unit])
zones = [{
"position": "top",
"template_id": "MOCK_T_reject",
"slot_payload": {"title": "det", "bullets": ["det"]},
}]
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
coverage = _check_post_ai_coverage_invariant([unit], records)
assert coverage["pre_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
assert coverage["post_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
assert coverage["dropped_section_ids"] == []
status = _summarize_ai_repair_status(records, coverage)
assert status["status"] == "applied"
assert status.get("human_review_required") is not True