feat(#87): IMP-87 u1~u5 empty_shell honesty gate + BLOCKED exit
EMPTY_SHELL_NO_CONTENT overall enum + 3-marker detection (frame_template_id="__empty__" OR label="empty_shell" OR merge_type="empty_shell") routes empty-placeholder-only slides to BLOCKED CLI exit 1 + red final_status.html, blocking fake PASS reports (feedback_artifact_status_naming). Coverage accounting split: legacy covered_section_ids preserved + new content_rendered_section_ids / empty_shell_section_ids. mdx05 Case B (zero V4 evidence) honestly classified instead of synthesizing fabricated rank-1 reject frames. IMP-30 u6/u7 stale empty-shell PASS assertions inverted (29 tests). IMP-85 smoke parametrize: mdx05 removed from exit-0 list + dedicated BLOCKED exit test added (4 tests). No production behavior change for chain_exhausted Case A; no AI route activation; no mdx-id hardcoding. 53 targeted + 76 adjacent Phase Z tests PASS. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2936,6 +2936,71 @@ def write_overflow_error(run_dir: Path, overflow: dict) -> Path:
|
|||||||
|
|
||||||
# ─── Debug.json (single slide + zones[]) ───────────────────────
|
# ─── Debug.json (single slide + zones[]) ───────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _is_empty_shell_unit(u: CompositionUnit) -> bool:
|
||||||
|
"""IMP-87 u1 — true when a CompositionUnit is the IMP-30 u4 empty-shell
|
||||||
|
placeholder (frame_template_id="__empty__" / label="empty_shell" /
|
||||||
|
merge_type="empty_shell").
|
||||||
|
|
||||||
|
Used by compute_slide_status to redefine `full_mdx_coverage` over
|
||||||
|
rendered content units only: an empty-shell unit attaches the aligned
|
||||||
|
section_ids to a ``__empty__`` frame for layout purposes, but the slide
|
||||||
|
surface carries no MDX content for those sections. Counting it as
|
||||||
|
coverage would violate feedback_artifact_status_naming (overall /
|
||||||
|
coverage must reflect actual content state, not pipeline completion).
|
||||||
|
|
||||||
|
The three markers are checked as independent OR-branches so a
|
||||||
|
CompositionUnit synthesised by any of the IMP-30 u4 entry points
|
||||||
|
(phase A / phase B / e2e) is classified consistently.
|
||||||
|
"""
|
||||||
|
if getattr(u, "frame_template_id", None) == "__empty__":
|
||||||
|
return True
|
||||||
|
if getattr(u, "label", None) == "empty_shell":
|
||||||
|
return True
|
||||||
|
if getattr(u, "merge_type", None) == "empty_shell":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _final_status_html_class(overall: str) -> str:
|
||||||
|
"""IMP-87 u3 — map ``overall`` enum string → CSS class for the step20
|
||||||
|
final_status.html dashboard.
|
||||||
|
|
||||||
|
EMPTY_SHELL_NO_CONTENT MUST resolve to ``"fail"`` (red) so the Case B
|
||||||
|
honesty defect (Stage 1 mdx05) surfaces in the same colour band as visual
|
||||||
|
failures and regressions, not the legacy ``"partial"`` amber band. The
|
||||||
|
explicit check runs BEFORE the legacy substring-based mapping because
|
||||||
|
the literal ``"EMPTY_SHELL_NO_CONTENT"`` contains neither ``"PASS"`` nor
|
||||||
|
``"FAIL"`` / ``"REGRESSION"`` and would otherwise default to
|
||||||
|
``"partial"`` (Stage 2 axis A5 lock).
|
||||||
|
|
||||||
|
All other enums preserve pre-IMP-87 substring semantics so the legacy
|
||||||
|
PASS / RENDERED_WITH_VISUAL_REGRESSION / PARTIAL_COVERAGE / etc. paths
|
||||||
|
keep the colour they had before u3.
|
||||||
|
"""
|
||||||
|
if overall == "EMPTY_SHELL_NO_CONTENT":
|
||||||
|
return "fail"
|
||||||
|
if "PASS" in overall:
|
||||||
|
return "pass"
|
||||||
|
if "FAIL" in overall or "REGRESSION" in overall:
|
||||||
|
return "fail"
|
||||||
|
return "partial"
|
||||||
|
|
||||||
|
|
||||||
|
def _is_blocked_overall(overall: str) -> bool:
|
||||||
|
"""IMP-87 u3 — true iff ``overall`` warrants a BLOCKED CLI exit
|
||||||
|
(returncode 1) independent of the visual_check / full_mdx_coverage axes.
|
||||||
|
|
||||||
|
Currently the single blocked enum is EMPTY_SHELL_NO_CONTENT (Stage 1
|
||||||
|
mdx05 Case B). The CLI consults this helper BEFORE the legacy
|
||||||
|
visual_fail / partial_coverage branches so a content-empty placeholder
|
||||||
|
slide that happens to pass Selenium overflow checks (no content → no
|
||||||
|
overflow) cannot silently return a content-empty artifact without an
|
||||||
|
exit signal (Stage 2 axis A4 lock).
|
||||||
|
"""
|
||||||
|
return overall == "EMPTY_SHELL_NO_CONTENT"
|
||||||
|
|
||||||
|
|
||||||
def compute_slide_status(sections: list[MdxSection],
|
def compute_slide_status(sections: list[MdxSection],
|
||||||
units: list[CompositionUnit],
|
units: list[CompositionUnit],
|
||||||
comp_debug: dict,
|
comp_debug: dict,
|
||||||
@@ -2955,6 +3020,13 @@ def compute_slide_status(sections: list[MdxSection],
|
|||||||
u4 empty-shell — needs user/AI adaptation 신호)
|
u4 empty-shell — needs user/AI adaptation 신호)
|
||||||
|
|
||||||
overall enum :
|
overall enum :
|
||||||
|
EMPTY_SHELL_NO_CONTENT — IMP-87 u2 : every selected unit is an
|
||||||
|
IMP-30 u4 empty-shell placeholder
|
||||||
|
(no content-rendered units). Takes
|
||||||
|
precedence over the 4-way ladder below
|
||||||
|
because such a slide can technically
|
||||||
|
pass Selenium overflow checks but
|
||||||
|
carries no MDX content.
|
||||||
PASS — visual OK + full coverage + adapter_needed=0
|
PASS — visual OK + full coverage + adapter_needed=0
|
||||||
RENDERED_WITH_VISUAL_REGRESSION — full coverage 이지만 visual fail
|
RENDERED_WITH_VISUAL_REGRESSION — full coverage 이지만 visual fail
|
||||||
PARTIAL_COVERAGE — 일부 section 필터됨, 렌더된 부분만 visual OK
|
PARTIAL_COVERAGE — 일부 section 필터됨, 렌더된 부분만 visual OK
|
||||||
@@ -2964,13 +3036,43 @@ def compute_slide_status(sections: list[MdxSection],
|
|||||||
Stage 1 Q3 + Codex #10 D4 lock.)
|
Stage 1 Q3 + Codex #10 D4 lock.)
|
||||||
"""
|
"""
|
||||||
aligned_ids = [s.section_id for s in sections]
|
aligned_ids = [s.section_id for s in sections]
|
||||||
covered = set()
|
# IMP-87 u1 — split coverage into legacy display vs honesty axis.
|
||||||
|
# ``covered`` (legacy, preserved for downstream display / IMP-05 / IMP-06
|
||||||
|
# readers) still tracks every section attached to any selected unit.
|
||||||
|
# ``content_covered`` is the new honesty axis: only non-empty-shell units
|
||||||
|
# count as "rendered with content". Sections attached solely to an
|
||||||
|
# empty-shell placeholder (IMP-30 u4 frame_template_id="__empty__")
|
||||||
|
# are routed into ``filtered_section_ids`` so an EMPTY-SHELL-only slide
|
||||||
|
# cannot inherit full_mdx_coverage=True (Case B honesty defect lock,
|
||||||
|
# Stage 1 anchor c53722ad).
|
||||||
|
covered: set = set()
|
||||||
|
content_covered: set = set()
|
||||||
for u in units:
|
for u in units:
|
||||||
covered.update(u.source_section_ids)
|
covered.update(u.source_section_ids)
|
||||||
filtered_ids = sorted(set(aligned_ids) - covered)
|
if not _is_empty_shell_unit(u):
|
||||||
|
content_covered.update(u.source_section_ids)
|
||||||
|
filtered_ids = sorted(set(aligned_ids) - content_covered)
|
||||||
full_coverage = len(filtered_ids) == 0
|
full_coverage = len(filtered_ids) == 0
|
||||||
visual_passed = bool(overflow.get("passed", False))
|
visual_passed = bool(overflow.get("passed", False))
|
||||||
|
|
||||||
|
# IMP-87 u2 — Additive empty/content accounting (used by the overall enum
|
||||||
|
# precedence block below and surfaced on the return dict for downstream
|
||||||
|
# introspection). ``content_rendered_section_ids`` mirrors the new honesty
|
||||||
|
# axis (content_covered) at the surface level so consumers can ask "how
|
||||||
|
# much real MDX content actually rendered" without re-running the helper.
|
||||||
|
# ``empty_shell_section_ids`` exposes the placeholder counterpart for the
|
||||||
|
# same reason. Both pairs are purely additive — no existing field is
|
||||||
|
# removed or repurposed (legacy ``covered_section_ids`` display semantics
|
||||||
|
# locked by u1).
|
||||||
|
empty_shell_units_list = [u for u in units if _is_empty_shell_unit(u)]
|
||||||
|
content_units_list = [u for u in units if not _is_empty_shell_unit(u)]
|
||||||
|
empty_shell_section_ids = sorted({
|
||||||
|
sid
|
||||||
|
for u in empty_shell_units_list
|
||||||
|
for sid in (u.source_section_ids or [])
|
||||||
|
})
|
||||||
|
content_rendered_section_ids = sorted(content_covered)
|
||||||
|
|
||||||
adapter_needed_units = list(adapter_needed_units or [])
|
adapter_needed_units = list(adapter_needed_units or [])
|
||||||
content_truncated = []
|
content_truncated = []
|
||||||
fallback_selections = []
|
fallback_selections = []
|
||||||
@@ -3057,7 +3159,24 @@ def compute_slide_status(sections: list[MdxSection],
|
|||||||
"position": source_position,
|
"position": source_position,
|
||||||
})
|
})
|
||||||
|
|
||||||
if full_coverage and visual_passed:
|
# IMP-87 u2 — EMPTY_SHELL_NO_CONTENT precedence over the legacy 4-way
|
||||||
|
# ladder (Stage 2 axis A3). If the slide has aligned MDX sections but
|
||||||
|
# every selected unit is an IMP-30 u4 empty-shell placeholder (zero
|
||||||
|
# content units selected, at least one empty-shell unit selected), the
|
||||||
|
# visible artifact carries no real content — ``overall`` MUST report
|
||||||
|
# EMPTY_SHELL_NO_CONTENT so the u3 CLI exit / final_status.html styling
|
||||||
|
# can branch on it. The check runs before the visual/coverage ladder
|
||||||
|
# because a content-empty placeholder slide can technically pass Selenium
|
||||||
|
# overflow checks (no content → no overflow); without this precedence the
|
||||||
|
# ladder would mislabel it PASS (Stage 1 mdx05 Case B honesty defect).
|
||||||
|
is_empty_shell_only = (
|
||||||
|
len(aligned_ids) > 0
|
||||||
|
and len(empty_shell_units_list) > 0
|
||||||
|
and len(content_units_list) == 0
|
||||||
|
)
|
||||||
|
if is_empty_shell_only:
|
||||||
|
overall = "EMPTY_SHELL_NO_CONTENT"
|
||||||
|
elif full_coverage and visual_passed:
|
||||||
overall = "PASS"
|
overall = "PASS"
|
||||||
elif full_coverage and not visual_passed:
|
elif full_coverage and not visual_passed:
|
||||||
overall = "RENDERED_WITH_VISUAL_REGRESSION"
|
overall = "RENDERED_WITH_VISUAL_REGRESSION"
|
||||||
@@ -3102,6 +3221,11 @@ def compute_slide_status(sections: list[MdxSection],
|
|||||||
"full_mdx_coverage": full_coverage,
|
"full_mdx_coverage": full_coverage,
|
||||||
"aligned_section_ids": aligned_ids,
|
"aligned_section_ids": aligned_ids,
|
||||||
"covered_section_ids": sorted(covered),
|
"covered_section_ids": sorted(covered),
|
||||||
|
# IMP-87 u2 — additive empty/content accounting (overall enum precedence above).
|
||||||
|
"content_rendered_section_ids": content_rendered_section_ids,
|
||||||
|
"content_rendered_unit_count": len(content_units_list),
|
||||||
|
"empty_shell_section_ids": empty_shell_section_ids,
|
||||||
|
"empty_shell_unit_count": len(empty_shell_units_list),
|
||||||
"filtered_section_ids": filtered_ids,
|
"filtered_section_ids": filtered_ids,
|
||||||
"filtered_section_reasons": filtered_section_reasons,
|
"filtered_section_reasons": filtered_section_reasons,
|
||||||
"selection_path": "fallback_used" if fallback_selections else "rank_1",
|
"selection_path": "fallback_used" if fallback_selections else "rank_1",
|
||||||
@@ -5929,7 +6053,11 @@ def run_phase_z2_mvp1(
|
|||||||
)
|
)
|
||||||
# Step 20 HTML — 최종 판정 시각 보고
|
# Step 20 HTML — 최종 판정 시각 보고
|
||||||
_overall = slide_status.get("overall", "?")
|
_overall = slide_status.get("overall", "?")
|
||||||
_ov_class = "pass" if "PASS" in _overall else "fail" if "FAIL" in _overall or "REGRESSION" in _overall else "partial"
|
# IMP-87 u3 — route the dashboard CSS class through _final_status_html_class
|
||||||
|
# so EMPTY_SHELL_NO_CONTENT renders as "fail" (red) instead of the legacy
|
||||||
|
# substring-default "partial" amber. Other enums keep their pre-IMP-87
|
||||||
|
# colour (Stage 2 axis A5 lock).
|
||||||
|
_ov_class = _final_status_html_class(_overall)
|
||||||
_vfs = slide_status.get("visual_fail_reasons") or []
|
_vfs = slide_status.get("visual_fail_reasons") or []
|
||||||
_vfs_html = (
|
_vfs_html = (
|
||||||
"<ul>" + "".join(f"<li>{v}</li>" for v in _vfs) + "</ul>"
|
"<ul>" + "".join(f"<li>{v}</li>" for v in _vfs) + "</ul>"
|
||||||
@@ -6048,6 +6176,36 @@ def run_phase_z2_mvp1(
|
|||||||
f"(impl_status={nap.get('next_action_implementation_status')})")
|
f"(impl_status={nap.get('next_action_implementation_status')})")
|
||||||
|
|
||||||
# 13. Exit 정책 — visual fail 은 abort, partial coverage 는 abort 안 하지만 PASS 도 아님
|
# 13. Exit 정책 — visual fail 은 abort, partial coverage 는 abort 안 하지만 PASS 도 아님
|
||||||
|
# IMP-87 u3 — BLOCKED exit on EMPTY_SHELL_NO_CONTENT precedes the legacy
|
||||||
|
# visual_fail / partial_coverage branches (Stage 2 axis A4). A slide whose
|
||||||
|
# every selected unit is an IMP-30 u4 empty-shell placeholder carries no
|
||||||
|
# MDX content; without this branch a Case B run could pass Selenium
|
||||||
|
# overflow checks (nothing to overflow) and silently return overall
|
||||||
|
# EMPTY_SHELL_NO_CONTENT without any CLI exit signal, violating
|
||||||
|
# feedback_artifact_status_naming.
|
||||||
|
if _is_blocked_overall(overall):
|
||||||
|
_aligned = slide_status.get("aligned_section_ids") or []
|
||||||
|
_empty_ids = slide_status.get("empty_shell_section_ids") or []
|
||||||
|
_empty_count = slide_status.get("empty_shell_unit_count", 0)
|
||||||
|
_content_count = slide_status.get("content_rendered_unit_count", 0)
|
||||||
|
print(
|
||||||
|
f"\n[Phase Z-2 IMP-87 u3] BLOCKED @ empty_shell_no_content ({overall})",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
" reason : every selected unit is IMP-30 u4 empty-shell placeholder "
|
||||||
|
"(no MDX content rendered)",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
print(f" aligned : {_aligned}", file=sys.stderr)
|
||||||
|
print(f" shell : {_empty_ids}", file=sys.stderr)
|
||||||
|
print(
|
||||||
|
f" units : empty_shell_unit_count={_empty_count} "
|
||||||
|
f"content_rendered_unit_count={_content_count}",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
if not slide_status["visual_check_passed"]:
|
if not slide_status["visual_check_passed"]:
|
||||||
err_path = write_overflow_error(run_dir, overflow)
|
err_path = write_overflow_error(run_dir, overflow)
|
||||||
print(f"\n[Phase Z-2 MVP-1.5b] FAIL @ visual_runtime_check ({overall})", file=sys.stderr)
|
print(f"\n[Phase Z-2 MVP-1.5b] FAIL @ visual_runtime_check ({overall})", file=sys.stderr)
|
||||||
|
|||||||
@@ -1012,11 +1012,15 @@ def test_u6_empty_shell_unit_listed_with_empty_identifiers():
|
|||||||
assert entry["selection_path"] == "empty_shell"
|
assert entry["selection_path"] == "empty_shell"
|
||||||
assert entry["fallback_reason"] == "no_v4_rank_1_for_any_section"
|
assert entry["fallback_reason"] == "no_v4_rank_1_for_any_section"
|
||||||
assert entry["v4_rank"] is None
|
assert entry["v4_rank"] is None
|
||||||
# full_mdx_coverage holds because shell.source_section_ids covers every
|
# IMP-87 u4 — honesty defect inversion. The shell.source_section_ids
|
||||||
# aligned section id — u4 deliberately sets this so coverage stays True
|
# still feeds legacy covered_section_ids for display, but the content-
|
||||||
# under the terminal first-render invariant.
|
# rendered axis (u1) excludes empty-shell units, so full_mdx_coverage
|
||||||
assert status["full_mdx_coverage"] is True
|
# MUST flip to False. Overall (u2) MUST elevate to
|
||||||
assert status["overall"] == "PASS"
|
# EMPTY_SHELL_NO_CONTENT before the legacy ladder, otherwise a slide
|
||||||
|
# whose sole rendered unit is __empty__ would be reported as PASS —
|
||||||
|
# the exact Stage 1 mdx05 honesty defect this issue exists to fix.
|
||||||
|
assert status["full_mdx_coverage"] is False
|
||||||
|
assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"
|
||||||
|
|
||||||
|
|
||||||
# ─── u6 case 4 : mixed selection — provisional + normal units coexist ──
|
# ─── u6 case 4 : mixed selection — provisional + normal units coexist ──
|
||||||
@@ -1357,10 +1361,14 @@ def test_u7_e2e_zero_v4_empty_shell_status_surface(u7_patch_selector_deps):
|
|||||||
assert shell_entry["phase_z_status"] == "empty_shell"
|
assert shell_entry["phase_z_status"] == "empty_shell"
|
||||||
assert shell_entry["frame_template_id"] == "__empty__"
|
assert shell_entry["frame_template_id"] == "__empty__"
|
||||||
assert shell_entry["source_section_ids"] == ["S1", "S2"]
|
assert shell_entry["source_section_ids"] == ["S1", "S2"]
|
||||||
# Coverage check — both sections counted as covered by the shell unit
|
# IMP-87 u4 — honesty defect inversion. The shell unit still attaches
|
||||||
# (rendered=True path; PASS enum unchanged by provisional qualifier).
|
# both sections to legacy covered_section_ids (display preserved), but
|
||||||
assert status["full_mdx_coverage"] is True
|
# the content-rendered axis (u1) excludes empty-shell units, so
|
||||||
assert status["overall"] == "PASS"
|
# full_mdx_coverage MUST flip to False. Overall (u2) MUST elevate to
|
||||||
|
# EMPTY_SHELL_NO_CONTENT before the legacy 4-way ladder, so a zero-V4
|
||||||
|
# slide cannot disguise itself as PASS through visual-overflow alone.
|
||||||
|
assert status["full_mdx_coverage"] is False
|
||||||
|
assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"
|
||||||
|
|
||||||
|
|
||||||
# ─── u7 case 3 : e2e normal path unchanged when opt-in flags both on ─────
|
# ─── u7 case 3 : e2e normal path unchanged when opt-in flags both on ─────
|
||||||
|
|||||||
460
tests/test_phase_z2_imp87_empty_shell_honesty.py
Normal file
460
tests/test_phase_z2_imp87_empty_shell_honesty.py
Normal file
@@ -0,0 +1,460 @@
|
|||||||
|
"""IMP-87 empty-shell honesty regression tests — u1+u2+u3 scope.
|
||||||
|
|
||||||
|
Anchors the Stage 1 contract that an empty-shell-only run MUST NOT report
|
||||||
|
full_mdx_coverage=True (u1), MUST surface a distinct ``overall`` enum plus
|
||||||
|
additive empty/content accounting (u2), AND MUST resolve to a BLOCKED CLI
|
||||||
|
exit (returncode 1) with a red ``"fail"`` CSS class on the step20
|
||||||
|
final_status.html dashboard (u3).
|
||||||
|
|
||||||
|
u1 (baseline):
|
||||||
|
1) ``_is_empty_shell_unit`` helper — three independent OR-branches over
|
||||||
|
``frame_template_id == "__empty__"`` / ``label == "empty_shell"`` /
|
||||||
|
``merge_type == "empty_shell"``. Any IMP-30 u4 phase A / phase B / e2e
|
||||||
|
synthesised placeholder is classified consistently.
|
||||||
|
2) ``compute_slide_status`` coverage split — the legacy ``covered``
|
||||||
|
accumulator (display semantics, preserved for IMP-05 / IMP-06 readers)
|
||||||
|
still tracks every section attached to any selected unit, while the new
|
||||||
|
``content_covered`` axis counts only non-empty-shell units. The
|
||||||
|
``filtered_section_ids`` / ``full_mdx_coverage`` axes derive from
|
||||||
|
``content_covered`` so an EMPTY-SHELL-only slide cannot inherit
|
||||||
|
full_mdx_coverage=True (Case B honesty defect lock, Stage 1 anchor
|
||||||
|
c53722ad).
|
||||||
|
|
||||||
|
u2:
|
||||||
|
3) ``overall == "EMPTY_SHELL_NO_CONTENT"`` precedence over the legacy
|
||||||
|
4-way ladder when every selected unit is an empty-shell placeholder
|
||||||
|
(zero content units selected, at least one empty-shell unit selected,
|
||||||
|
at least one aligned section). The precedence runs BEFORE the
|
||||||
|
visual/coverage ladder so a content-empty slide that happens to pass
|
||||||
|
Selenium overflow checks cannot be mislabelled PASS.
|
||||||
|
4) Additive return-dict fields ``content_rendered_section_ids`` /
|
||||||
|
``content_rendered_unit_count`` / ``empty_shell_section_ids`` /
|
||||||
|
``empty_shell_unit_count``. No legacy field is removed or repurposed.
|
||||||
|
|
||||||
|
u3 (added by this revision):
|
||||||
|
5) ``_final_status_html_class`` helper — EMPTY_SHELL_NO_CONTENT resolves
|
||||||
|
to ``"fail"`` (red) so the step20 final_status.html dashboard surfaces
|
||||||
|
the Case B honesty defect in the same colour band as visual failures
|
||||||
|
and regressions, not the legacy ``"partial"`` amber band. Existing
|
||||||
|
PASS / RENDERED_WITH_VISUAL_REGRESSION / PARTIAL_COVERAGE / etc.
|
||||||
|
enums keep their pre-IMP-87 substring-based colour (Stage 2 axis A5).
|
||||||
|
6) ``_is_blocked_overall`` helper — true iff ``overall`` warrants a
|
||||||
|
BLOCKED CLI exit (returncode 1) independent of the visual_check /
|
||||||
|
full_mdx_coverage axes. Currently only EMPTY_SHELL_NO_CONTENT is
|
||||||
|
blocked; every other enum returns False so the legacy CLI ladder
|
||||||
|
(visual_fail → exit 1, partial_coverage → return, PASS → return) is
|
||||||
|
preserved bit-for-bit (Stage 2 axis A4).
|
||||||
|
|
||||||
|
Per feedback_scope_qualified_verification, each test declares which case it
|
||||||
|
covers (A=content only, B=empty-shell only, C=mixed) and whether the
|
||||||
|
assertion is the u1 surface (helper/coverage), the u2 surface (overall enum
|
||||||
|
/ accounting fields), or the u3 surface (CLI exit helper / HTML class).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from src.phase_z2_composition import CompositionUnit
|
||||||
|
from src.phase_z2_pipeline import (
|
||||||
|
MdxSection,
|
||||||
|
_final_status_html_class,
|
||||||
|
_is_blocked_overall,
|
||||||
|
_is_empty_shell_unit,
|
||||||
|
compute_slide_status,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _mk_section(section_id: str) -> MdxSection:
|
||||||
|
return MdxSection(
|
||||||
|
section_id=section_id,
|
||||||
|
section_num=int(section_id.lstrip("S") or "0"),
|
||||||
|
title=f"Section {section_id}",
|
||||||
|
raw_content=f"raw {section_id}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _mk_content_unit(*, section_ids: list[str], **overrides) -> CompositionUnit:
|
||||||
|
base = dict(
|
||||||
|
source_section_ids=list(section_ids),
|
||||||
|
merge_type="single",
|
||||||
|
frame_template_id="MOCK_template_direct_a",
|
||||||
|
frame_id="MOCK_frame_001",
|
||||||
|
frame_number=1,
|
||||||
|
confidence=0.9,
|
||||||
|
label="use_as_is",
|
||||||
|
phase_z_status="matched_zone",
|
||||||
|
raw_content="alpha",
|
||||||
|
title="MOCK content",
|
||||||
|
v4_rank=1,
|
||||||
|
selection_path="rank_1",
|
||||||
|
fallback_reason=None,
|
||||||
|
score=1.0,
|
||||||
|
provisional=False,
|
||||||
|
)
|
||||||
|
base.update(overrides)
|
||||||
|
return CompositionUnit(**base)
|
||||||
|
|
||||||
|
|
||||||
|
def _mk_empty_shell_unit(*, section_ids: list[str]) -> CompositionUnit:
|
||||||
|
"""Mirror of IMP-30 u4 phase B empty-shell synthesis. All three marker
|
||||||
|
fields set so _is_empty_shell_unit returns True via every branch
|
||||||
|
independently — the per-marker tests below override individually."""
|
||||||
|
return CompositionUnit(
|
||||||
|
source_section_ids=list(section_ids),
|
||||||
|
merge_type="empty_shell",
|
||||||
|
frame_template_id="__empty__",
|
||||||
|
frame_id="__empty__",
|
||||||
|
frame_number=0,
|
||||||
|
confidence=0.0,
|
||||||
|
label="empty_shell",
|
||||||
|
phase_z_status="empty_shell",
|
||||||
|
raw_content="\n\n".join(f"raw {sid}" for sid in section_ids),
|
||||||
|
title=" / ".join(f"Section {sid}" for sid in section_ids),
|
||||||
|
v4_rank=None,
|
||||||
|
selection_path="empty_shell",
|
||||||
|
fallback_reason="no_v4_rank_1_for_any_section",
|
||||||
|
score=0.0,
|
||||||
|
provisional=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
# Helper unit tests — _is_empty_shell_unit detection (3-marker OR)
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
|
||||||
|
def test_helper_detects_unit_via_frame_template_id_marker_only():
|
||||||
|
"""Empty-shell classifier triggers on frame_template_id="__empty__"
|
||||||
|
even when label / merge_type look like a normal content unit. Guards
|
||||||
|
a future code path that only sets the frame_template_id field."""
|
||||||
|
u = _mk_content_unit(
|
||||||
|
section_ids=["S1"],
|
||||||
|
frame_template_id="__empty__",
|
||||||
|
)
|
||||||
|
assert _is_empty_shell_unit(u) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_helper_detects_unit_via_label_marker_only():
|
||||||
|
"""Classifier triggers on label="empty_shell" alone."""
|
||||||
|
u = _mk_content_unit(section_ids=["S1"], label="empty_shell")
|
||||||
|
assert _is_empty_shell_unit(u) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_helper_detects_unit_via_merge_type_marker_only():
|
||||||
|
"""Classifier triggers on merge_type="empty_shell" alone."""
|
||||||
|
u = _mk_content_unit(section_ids=["S1"], merge_type="empty_shell")
|
||||||
|
assert _is_empty_shell_unit(u) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_helper_rejects_normal_content_unit():
|
||||||
|
"""A vanilla rank-1 use_as_is unit is NOT an empty shell."""
|
||||||
|
u = _mk_content_unit(section_ids=["S1"])
|
||||||
|
assert _is_empty_shell_unit(u) is False
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
# Case B — empty-shell only (mdx05 Stage 1 reproduction, u1 surface axes)
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
|
||||||
|
def test_case_b_empty_shell_only_breaks_full_mdx_coverage():
|
||||||
|
"""Stage 1 mdx05 reproduction at the u1 surface. With one empty-shell
|
||||||
|
unit covering every aligned section, the new content_covered axis is
|
||||||
|
empty, so ``full_mdx_coverage`` MUST be False and every aligned
|
||||||
|
section MUST surface in ``filtered_section_ids``. The legacy
|
||||||
|
``covered_section_ids`` field (display semantics) still includes the
|
||||||
|
shell-covered sections — preserved for IMP-05 / IMP-06 readers."""
|
||||||
|
sections = [_mk_section("S1"), _mk_section("S2"), _mk_section("S3")]
|
||||||
|
shell = _mk_empty_shell_unit(section_ids=["S1", "S2", "S3"])
|
||||||
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
||||||
|
comp_debug = {"candidates_summary": []}
|
||||||
|
|
||||||
|
status = compute_slide_status(
|
||||||
|
sections, [shell], comp_debug, overflow_pass,
|
||||||
|
adapter_needed_units=None, debug_zones=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status["full_mdx_coverage"] is False
|
||||||
|
assert status["filtered_section_ids"] == ["S1", "S2", "S3"]
|
||||||
|
# Legacy covered axis preserved — IMP-87 u1 does NOT change display
|
||||||
|
# semantics for downstream readers; it only redefines the honesty axis.
|
||||||
|
assert status["covered_section_ids"] == ["S1", "S2", "S3"]
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
# Case C — mixed empty + content units (partial coverage realism)
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
|
||||||
|
def test_case_c_mixed_empty_and_content_filters_shell_sections_only():
|
||||||
|
"""A content unit covers S1; an empty_shell placeholder covers S2.
|
||||||
|
Only S2 should surface as filtered (the shell does not count as content
|
||||||
|
coverage), but the legacy ``covered_section_ids`` field still lists
|
||||||
|
both sections (display semantics unchanged by u1)."""
|
||||||
|
sections = [_mk_section("S1"), _mk_section("S2")]
|
||||||
|
units = [
|
||||||
|
_mk_content_unit(section_ids=["S1"]),
|
||||||
|
_mk_empty_shell_unit(section_ids=["S2"]),
|
||||||
|
]
|
||||||
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
||||||
|
comp_debug = {"candidates_summary": []}
|
||||||
|
|
||||||
|
status = compute_slide_status(
|
||||||
|
sections, units, comp_debug, overflow_pass,
|
||||||
|
adapter_needed_units=None, debug_zones=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status["full_mdx_coverage"] is False
|
||||||
|
assert status["filtered_section_ids"] == ["S2"]
|
||||||
|
assert status["covered_section_ids"] == ["S1", "S2"]
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
# Case A — normal content-only PASS preserved
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
|
||||||
|
def test_case_a_normal_content_only_preserves_full_coverage():
|
||||||
|
"""IMP-05 / IMP-30 regression guard at the u1 surface. A slide whose
|
||||||
|
every selected unit is non-empty-shell content must continue to report
|
||||||
|
``full_mdx_coverage`` == True with an empty ``filtered_section_ids``.
|
||||||
|
No behavioral change vs pre-IMP-87 baseline — u1 must be additive."""
|
||||||
|
sections = [_mk_section("S1"), _mk_section("S2")]
|
||||||
|
units = [
|
||||||
|
_mk_content_unit(section_ids=["S1"]),
|
||||||
|
_mk_content_unit(section_ids=["S2"], frame_id="MOCK_frame_002"),
|
||||||
|
]
|
||||||
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
||||||
|
comp_debug = {"candidates_summary": []}
|
||||||
|
|
||||||
|
status = compute_slide_status(
|
||||||
|
sections, units, comp_debug, overflow_pass,
|
||||||
|
adapter_needed_units=None, debug_zones=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status["full_mdx_coverage"] is True
|
||||||
|
assert status["filtered_section_ids"] == []
|
||||||
|
assert status["covered_section_ids"] == ["S1", "S2"]
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
# IMP-87 u2 — EMPTY_SHELL_NO_CONTENT overall + additive accounting fields
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
#
|
||||||
|
# These tests assert the u2 surface ONLY:
|
||||||
|
# - ``overall`` enum precedence (EMPTY_SHELL_NO_CONTENT before the 4-way
|
||||||
|
# ladder) for Case B; legacy ladder preserved for Cases A and C.
|
||||||
|
# - Additive return-dict fields populate correctly across the three cases.
|
||||||
|
# CLI exit codes and final_status.html styling are u3 scope and remain
|
||||||
|
# unasserted here.
|
||||||
|
|
||||||
|
|
||||||
|
def test_case_b_u2_overall_is_empty_shell_no_content_even_when_visual_passes():
|
||||||
|
"""Case B (empty-shell only) — Stage 1 mdx05 reproduction at the u2
|
||||||
|
surface. The precedence check MUST fire before the visual/coverage
|
||||||
|
ladder: even though ``overflow_pass={passed: True}`` would otherwise
|
||||||
|
push the ladder to PASS (the content-empty slide has nothing to
|
||||||
|
overflow), the u2 precedence forces ``overall`` to
|
||||||
|
EMPTY_SHELL_NO_CONTENT. Guards the Stage 1 honesty defect."""
|
||||||
|
sections = [_mk_section("S1"), _mk_section("S2"), _mk_section("S3")]
|
||||||
|
shell = _mk_empty_shell_unit(section_ids=["S1", "S2", "S3"])
|
||||||
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
||||||
|
comp_debug = {"candidates_summary": []}
|
||||||
|
|
||||||
|
status = compute_slide_status(
|
||||||
|
sections, [shell], comp_debug, overflow_pass,
|
||||||
|
adapter_needed_units=None, debug_zones=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"
|
||||||
|
|
||||||
|
|
||||||
|
def test_case_b_u2_overall_remains_empty_shell_no_content_when_visual_fails():
|
||||||
|
"""Case B — precedence is unconditional on visual_passed. A visual fail
|
||||||
|
must NOT downgrade EMPTY_SHELL_NO_CONTENT to a ladder enum; the empty-
|
||||||
|
shell signal dominates because the slide carries no MDX content to
|
||||||
|
rescue regardless of overflow status."""
|
||||||
|
sections = [_mk_section("S1"), _mk_section("S2")]
|
||||||
|
shell = _mk_empty_shell_unit(section_ids=["S1", "S2"])
|
||||||
|
overflow_fail = {"passed": False, "fail_reasons": ["mock_overflow"]}
|
||||||
|
comp_debug = {"candidates_summary": []}
|
||||||
|
|
||||||
|
status = compute_slide_status(
|
||||||
|
sections, [shell], comp_debug, overflow_fail,
|
||||||
|
adapter_needed_units=None, debug_zones=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"
|
||||||
|
|
||||||
|
|
||||||
|
def test_case_b_u2_accounting_fields_populate_for_empty_shell_only():
|
||||||
|
"""Case B — additive accounting fields. All aligned sections appear in
|
||||||
|
``empty_shell_section_ids`` and none in ``content_rendered_section_ids``.
|
||||||
|
Counts mirror the unit partition (1 shell unit, 0 content units)."""
|
||||||
|
sections = [_mk_section("S1"), _mk_section("S2"), _mk_section("S3")]
|
||||||
|
shell = _mk_empty_shell_unit(section_ids=["S1", "S2", "S3"])
|
||||||
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
||||||
|
comp_debug = {"candidates_summary": []}
|
||||||
|
|
||||||
|
status = compute_slide_status(
|
||||||
|
sections, [shell], comp_debug, overflow_pass,
|
||||||
|
adapter_needed_units=None, debug_zones=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status["empty_shell_section_ids"] == ["S1", "S2", "S3"]
|
||||||
|
assert status["content_rendered_section_ids"] == []
|
||||||
|
assert status["empty_shell_unit_count"] == 1
|
||||||
|
assert status["content_rendered_unit_count"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_case_a_u2_overall_pass_preserved_for_content_only():
|
||||||
|
"""Case A — content-only slide. u2 precedence MUST NOT fire (zero
|
||||||
|
empty-shell units) so the legacy ladder still reports PASS when visual
|
||||||
|
+ full coverage line up. Regression guard against precedence over-firing."""
|
||||||
|
sections = [_mk_section("S1"), _mk_section("S2")]
|
||||||
|
units = [
|
||||||
|
_mk_content_unit(section_ids=["S1"]),
|
||||||
|
_mk_content_unit(section_ids=["S2"], frame_id="MOCK_frame_002"),
|
||||||
|
]
|
||||||
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
||||||
|
comp_debug = {"candidates_summary": []}
|
||||||
|
|
||||||
|
status = compute_slide_status(
|
||||||
|
sections, units, comp_debug, overflow_pass,
|
||||||
|
adapter_needed_units=None, debug_zones=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status["overall"] == "PASS"
|
||||||
|
assert status["empty_shell_section_ids"] == []
|
||||||
|
assert status["content_rendered_section_ids"] == ["S1", "S2"]
|
||||||
|
assert status["empty_shell_unit_count"] == 0
|
||||||
|
assert status["content_rendered_unit_count"] == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_case_c_u2_mixed_falls_to_legacy_partial_coverage_ladder():
|
||||||
|
"""Case C — mixed empty + content. u2 precedence MUST NOT fire (one
|
||||||
|
content unit exists), so the legacy ladder takes over: not-full-coverage
|
||||||
|
+ visual-pass → PARTIAL_COVERAGE. Accounting fields split the unit
|
||||||
|
partition cleanly (S1 = content, S2 = shell)."""
|
||||||
|
sections = [_mk_section("S1"), _mk_section("S2")]
|
||||||
|
units = [
|
||||||
|
_mk_content_unit(section_ids=["S1"]),
|
||||||
|
_mk_empty_shell_unit(section_ids=["S2"]),
|
||||||
|
]
|
||||||
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
||||||
|
comp_debug = {"candidates_summary": []}
|
||||||
|
|
||||||
|
status = compute_slide_status(
|
||||||
|
sections, units, comp_debug, overflow_pass,
|
||||||
|
adapter_needed_units=None, debug_zones=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status["overall"] == "PARTIAL_COVERAGE"
|
||||||
|
assert status["empty_shell_section_ids"] == ["S2"]
|
||||||
|
assert status["content_rendered_section_ids"] == ["S1"]
|
||||||
|
assert status["empty_shell_unit_count"] == 1
|
||||||
|
assert status["content_rendered_unit_count"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
# IMP-87 u3 — _final_status_html_class CSS class mapping
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
#
|
||||||
|
# These tests assert the u3 dashboard CSS-class surface ONLY. CLI exit
|
||||||
|
# decisions are covered by the _is_blocked_overall tests further down. End-
|
||||||
|
# to-end pipeline integration (full run → BLOCKED returncode, red HTML in
|
||||||
|
# the actual artifact file) is intentionally left to the smoke layer in
|
||||||
|
# u5; here the helper-level guarantee is enough because the call site
|
||||||
|
# (``_ov_class = _final_status_html_class(_overall)``) is a direct
|
||||||
|
# substitution of the previous inline expression.
|
||||||
|
|
||||||
|
|
||||||
|
def test_u3_html_class_empty_shell_no_content_is_fail_red():
|
||||||
|
"""Case B u3 surface — EMPTY_SHELL_NO_CONTENT MUST map to ``"fail"`` so
|
||||||
|
the dashboard surfaces the honesty defect in the red colour band. Stage
|
||||||
|
2 axis A5 lock: the literal does NOT contain ``"PASS"`` / ``"FAIL"`` /
|
||||||
|
``"REGRESSION"`` so without the explicit branch the helper would default
|
||||||
|
to ``"partial"`` (the amber legacy band), which is the exact mislabel
|
||||||
|
u3 corrects."""
|
||||||
|
assert _final_status_html_class("EMPTY_SHELL_NO_CONTENT") == "fail"
|
||||||
|
|
||||||
|
|
||||||
|
def test_u3_html_class_pass_preserves_legacy_substring_mapping():
|
||||||
|
"""Case A u3 surface — the legacy substring rule for ``"PASS"`` MUST
|
||||||
|
stay intact so existing dashboards keep the green band for healthy
|
||||||
|
runs. Guards against the u3 explicit branch over-firing on any string
|
||||||
|
that happens to contain ``"PASS"``."""
|
||||||
|
assert _final_status_html_class("PASS") == "pass"
|
||||||
|
|
||||||
|
|
||||||
|
def test_u3_html_class_regression_and_fail_substrings_preserved():
|
||||||
|
"""u3 surface — the legacy ``"FAIL"`` / ``"REGRESSION"`` substring rule
|
||||||
|
MUST stay intact for the existing visual-regression enums so dashboards
|
||||||
|
keep the red band for overflow / regression failures. The pre-IMP-87
|
||||||
|
expression is preserved verbatim in the fallback branch; this test
|
||||||
|
guards that preservation."""
|
||||||
|
assert (
|
||||||
|
_final_status_html_class("RENDERED_WITH_VISUAL_REGRESSION") == "fail"
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
_final_status_html_class("PARTIAL_COVERAGE_WITH_VISUAL_REGRESSION")
|
||||||
|
== "fail"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_u3_html_class_partial_coverage_remains_amber_partial():
|
||||||
|
"""u3 surface — ``"PARTIAL_COVERAGE"`` MUST stay in the amber
|
||||||
|
``"partial"`` band: it carries some rendered content (unlike Case B)
|
||||||
|
and does not deserve the red band. Guards against the EMPTY_SHELL
|
||||||
|
branch accidentally widening to the partial enum."""
|
||||||
|
assert _final_status_html_class("PARTIAL_COVERAGE") == "partial"
|
||||||
|
|
||||||
|
|
||||||
|
def test_u3_html_class_unknown_enum_falls_back_to_partial():
|
||||||
|
"""u3 surface — defensive default. An unrecognised enum string (e.g.
|
||||||
|
legacy ``"?"`` from the ``slide_status.get("overall", "?")`` fallback
|
||||||
|
at the call site) MUST keep the legacy amber default so the dashboard
|
||||||
|
does not silently shift colour bands when a new enum is added without
|
||||||
|
updating the helper."""
|
||||||
|
assert _final_status_html_class("?") == "partial"
|
||||||
|
assert _final_status_html_class("UNKNOWN_FUTURE_ENUM") == "partial"
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
# IMP-87 u3 — _is_blocked_overall CLI exit gating
|
||||||
|
# ════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
|
||||||
|
def test_u3_blocked_overall_true_for_empty_shell_no_content():
|
||||||
|
"""Case B u3 surface — EMPTY_SHELL_NO_CONTENT MUST be flagged for the
|
||||||
|
BLOCKED CLI exit branch. The CLI gates ``sys.exit(1)`` on this helper
|
||||||
|
BEFORE the legacy visual_fail / partial_coverage branches so a
|
||||||
|
content-empty slide that passes Selenium overflow checks cannot
|
||||||
|
silently return without an exit signal (Stage 2 axis A4 lock,
|
||||||
|
feedback_artifact_status_naming guardrail)."""
|
||||||
|
assert _is_blocked_overall("EMPTY_SHELL_NO_CONTENT") is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_u3_blocked_overall_false_for_pass_and_legacy_failure_enums():
|
||||||
|
"""Case A u3 surface — every pre-IMP-87 enum MUST stay False so the
|
||||||
|
legacy CLI ladder is preserved bit-for-bit (visual_fail → exit 1,
|
||||||
|
partial_coverage → return without exit, PASS → return without exit).
|
||||||
|
Regression guard against the BLOCKED branch widening beyond the Case B
|
||||||
|
honesty defect."""
|
||||||
|
for enum in (
|
||||||
|
"PASS",
|
||||||
|
"RENDERED_WITH_VISUAL_REGRESSION",
|
||||||
|
"PARTIAL_COVERAGE",
|
||||||
|
"PARTIAL_COVERAGE_WITH_VISUAL_REGRESSION",
|
||||||
|
):
|
||||||
|
assert _is_blocked_overall(enum) is False, (
|
||||||
|
f"{enum} must NOT trigger the IMP-87 u3 BLOCKED exit; only "
|
||||||
|
f"EMPTY_SHELL_NO_CONTENT is gated."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_u3_blocked_overall_false_for_defensive_unknown_enum():
|
||||||
|
"""u3 surface — defensive default. An unknown / sentinel enum string
|
||||||
|
MUST stay out of the BLOCKED branch so a future enum addition does
|
||||||
|
not accidentally enable exit-1 behaviour without an explicit opt-in
|
||||||
|
here (Stage 2 axis A4 narrow allow-list lock)."""
|
||||||
|
assert _is_blocked_overall("?") is False
|
||||||
|
assert _is_blocked_overall("UNKNOWN_FUTURE_ENUM") is False
|
||||||
@@ -14,13 +14,29 @@ invariant + runtime VP gate end-to-end against real MDX inputs:
|
|||||||
*downstream* of that routing (e.g. layout_css zone aggregation
|
*downstream* of that routing (e.g. layout_css zone aggregation
|
||||||
when all live zones are adapter_needed) is a separate axis and
|
when all live zones are adapter_needed) is a separate axis and
|
||||||
out of scope for this issue (see follow_up_issue_candidates).
|
out of scope for this issue (see follow_up_issue_candidates).
|
||||||
* mdx05 — non-VP rank-1 path stays clean (exit 0).
|
* mdx05 — IMP-#87 u5 inversion. mdx05 has ZERO V4 evidence for any
|
||||||
|
section (``judgments_full32 = 0``, Case B per IMP-#87 Stage 1),
|
||||||
|
so the composition planner emits an IMP-#30 u4 EMPTY-SHELL
|
||||||
|
placeholder for the whole slide. Before IMP-#87 the pipeline
|
||||||
|
reported ``overall=PASS`` + ``full_mdx_coverage=True`` for this
|
||||||
|
state — the honesty defect this issue fixes. After IMP-#87 u2/u3
|
||||||
|
the same run elevates ``overall`` to
|
||||||
|
``EMPTY_SHELL_NO_CONTENT`` and the CLI exits 1 (BLOCKED). The old
|
||||||
|
exit-0 mdx05 smoke is therefore stale; this module now (a) keeps
|
||||||
|
mdx03 in the exit-0 non-VP parametrization, (b) adds a dedicated
|
||||||
|
mdx05 blocked-exit assertion that verifies the new
|
||||||
|
``EMPTY_SHELL_NO_CONTENT`` status surface, and (c) preserves the
|
||||||
|
IMP-#85 crash-marker guard on the mdx05 path so future
|
||||||
|
regressions cannot re-introduce the original uncaught
|
||||||
|
``BuilderMissingError`` propagation under cover of the blocked
|
||||||
|
exit.
|
||||||
|
|
||||||
Each subprocess gets a unique run_id so the runs do not collide on
|
Each subprocess gets a unique run_id so the runs do not collide on
|
||||||
disk when pytest is invoked concurrently or with -x retry.
|
disk when pytest is invoked concurrently or with -x retry.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import uuid
|
import uuid
|
||||||
@@ -30,6 +46,7 @@ import pytest
|
|||||||
|
|
||||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
SAMPLES_DIR = REPO_ROOT / "samples" / "mdx_batch"
|
SAMPLES_DIR = REPO_ROOT / "samples" / "mdx_batch"
|
||||||
|
RUNS_DIR = REPO_ROOT / "data" / "runs"
|
||||||
|
|
||||||
# Original IMP-#85 crash signature (issue body verbatim). u1 converted
|
# Original IMP-#85 crash signature (issue body verbatim). u1 converted
|
||||||
# the uncaught ``ValueError`` raised from the mapper's missing-builder
|
# the uncaught ``ValueError`` raised from the mapper's missing-builder
|
||||||
@@ -65,15 +82,22 @@ def _unique_run_id(prefix: str) -> str:
|
|||||||
"mdx_name,prefix",
|
"mdx_name,prefix",
|
||||||
[
|
[
|
||||||
("03.mdx", "mdx03"),
|
("03.mdx", "mdx03"),
|
||||||
("05.mdx", "mdx05"),
|
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_non_vp_smoke_runs_clean(mdx_name: str, prefix: str) -> None:
|
def test_non_vp_smoke_runs_clean(mdx_name: str, prefix: str) -> None:
|
||||||
"""mdx03 / mdx05 hit non-VP rank-1 frames; the pipeline runs to exit 0.
|
"""mdx03 hits non-VP rank-1 frames; the pipeline runs to exit 0.
|
||||||
|
|
||||||
Non-VP rank-1 selection is the normal Phase Z path and the
|
Non-VP rank-1 selection is the normal Phase Z path and the
|
||||||
primary regression guard that u1-u6 do not perturb mapper /
|
primary regression guard that IMP-#85 u1-u6 do not perturb
|
||||||
pipeline behaviour for non-VP routes.
|
mapper / pipeline behaviour for non-VP routes.
|
||||||
|
|
||||||
|
IMP-#87 u5 — mdx05 was removed from this parametrization because
|
||||||
|
its V4 evidence is empty for every aligned section (Case B,
|
||||||
|
Stage 1 lock). The IMP-#87 u2 ``EMPTY_SHELL_NO_CONTENT`` enum
|
||||||
|
+ u3 BLOCKED CLI exit make the post-IMP-#87 mdx05 run exit 1,
|
||||||
|
not 0, so an exit-0 parametrization would now be stale. The
|
||||||
|
dedicated mdx05 blocked-exit coverage lives in
|
||||||
|
``test_mdx05_blocked_exit_empty_shell_no_content`` below.
|
||||||
"""
|
"""
|
||||||
cp = _run_pipeline(mdx_name, _unique_run_id(prefix))
|
cp = _run_pipeline(mdx_name, _unique_run_id(prefix))
|
||||||
assert cp.returncode == 0, (
|
assert cp.returncode == 0, (
|
||||||
@@ -83,6 +107,69 @@ def test_non_vp_smoke_runs_clean(mdx_name: str, prefix: str) -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_mdx05_blocked_exit_empty_shell_no_content() -> None:
|
||||||
|
"""mdx05 must exit 1 (BLOCKED) with ``overall=EMPTY_SHELL_NO_CONTENT``.
|
||||||
|
|
||||||
|
IMP-#87 u5 — mdx05 is the canonical Case B fixture (zero V4
|
||||||
|
evidence for any aligned section per Stage 1; ``judgments_full32 = 0``
|
||||||
|
in step05). The pre-IMP-#87 pipeline mislabelled this state as
|
||||||
|
``overall=PASS`` + ``full_mdx_coverage=True`` because the only
|
||||||
|
rendered unit was an IMP-#30 u4 EMPTY-SHELL placeholder
|
||||||
|
(``frame_template_id="__empty__"``) which trivially passes the
|
||||||
|
Selenium overflow check. IMP-#87 u1 splits content-rendered
|
||||||
|
coverage from legacy ``covered_section_ids``, u2 elevates the
|
||||||
|
overall enum to ``EMPTY_SHELL_NO_CONTENT`` before the legacy
|
||||||
|
ladder, and u3 routes that enum to a BLOCKED CLI exit (1).
|
||||||
|
|
||||||
|
This smoke pins the post-IMP-#87 contract on the real mdx05
|
||||||
|
pipeline run:
|
||||||
|
|
||||||
|
* subprocess returncode == 1 (BLOCKED, u3 axis A4).
|
||||||
|
* ``step20_slide_status.json`` ``overall`` ==
|
||||||
|
``"EMPTY_SHELL_NO_CONTENT"`` (u2 axis A3 precedence over the
|
||||||
|
legacy 4-way ladder).
|
||||||
|
* ``step20_slide_status.json`` ``full_mdx_coverage`` is False
|
||||||
|
(u1 axis A2 content-rendered coverage split).
|
||||||
|
* The IMP-#85 original crash marker
|
||||||
|
(``PAYLOAD_BUILDERS has no such entry``) is absent from both
|
||||||
|
stdout and stderr — the IMP-#85 crash-marker guard is
|
||||||
|
preserved on the mdx05 path even though mdx05 itself no
|
||||||
|
longer exits 0.
|
||||||
|
"""
|
||||||
|
run_id = _unique_run_id("mdx05")
|
||||||
|
cp = _run_pipeline("05.mdx", run_id)
|
||||||
|
|
||||||
|
assert cp.returncode == 1, (
|
||||||
|
f"mdx05 expected BLOCKED exit 1, got {cp.returncode}\n"
|
||||||
|
f"--- stderr tail ---\n{cp.stderr[-1500:]}\n"
|
||||||
|
f"--- stdout tail ---\n{cp.stdout[-1500:]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
combined = cp.stdout + cp.stderr
|
||||||
|
assert IMP85_OLD_CRASH_MARKER not in combined, (
|
||||||
|
"IMP-#85 original crash signature regressed on mdx05 path:\n"
|
||||||
|
f"--- stderr tail ---\n{cp.stderr[-1500:]}\n"
|
||||||
|
f"--- stdout tail ---\n{cp.stdout[-1500:]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
status_path = RUNS_DIR / run_id / "phase_z2" / "steps" / "step20_slide_status.json"
|
||||||
|
assert status_path.is_file(), (
|
||||||
|
f"mdx05 step20_slide_status.json not found at {status_path}\n"
|
||||||
|
f"--- stderr tail ---\n{cp.stderr[-1500:]}\n"
|
||||||
|
f"--- stdout tail ---\n{cp.stdout[-1500:]}"
|
||||||
|
)
|
||||||
|
status_payload = json.loads(status_path.read_text(encoding="utf-8"))
|
||||||
|
status_data = status_payload.get("data") or {}
|
||||||
|
assert status_data.get("overall") == "EMPTY_SHELL_NO_CONTENT", (
|
||||||
|
f"mdx05 overall expected EMPTY_SHELL_NO_CONTENT, got "
|
||||||
|
f"{status_data.get('overall')!r}"
|
||||||
|
)
|
||||||
|
assert status_data.get("full_mdx_coverage") is False, (
|
||||||
|
f"mdx05 full_mdx_coverage expected False, got "
|
||||||
|
f"{status_data.get('full_mdx_coverage')!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_mdx04_no_longer_emits_imp85_crash_signature() -> None:
|
def test_mdx04_no_longer_emits_imp85_crash_signature() -> None:
|
||||||
"""mdx04 must no longer surface the IMP-#85 uncaught crash marker.
|
"""mdx04 must no longer surface the IMP-#85 uncaught crash marker.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user