feat(#87): IMP-87 u1~u5 empty_shell honesty gate + BLOCKED exit
EMPTY_SHELL_NO_CONTENT overall enum + 3-marker detection (frame_template_id="__empty__" OR label="empty_shell" OR merge_type="empty_shell") routes empty-placeholder-only slides to BLOCKED CLI exit 1 + red final_status.html, blocking fake PASS reports (feedback_artifact_status_naming). Coverage accounting split: legacy covered_section_ids preserved + new content_rendered_section_ids / empty_shell_section_ids. mdx05 Case B (zero V4 evidence) honestly classified instead of synthesizing fabricated rank-1 reject frames. IMP-30 u6/u7 stale empty-shell PASS assertions inverted (29 tests). IMP-85 smoke parametrize: mdx05 removed from exit-0 list + dedicated BLOCKED exit test added (4 tests). No production behavior change for chain_exhausted Case A; no AI route activation; no mdx-id hardcoding. 53 targeted + 76 adjacent Phase Z tests PASS. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2936,6 +2936,71 @@ def write_overflow_error(run_dir: Path, overflow: dict) -> Path:
|
||||
|
||||
# ─── Debug.json (single slide + zones[]) ───────────────────────
|
||||
|
||||
|
||||
def _is_empty_shell_unit(u: CompositionUnit) -> bool:
|
||||
"""IMP-87 u1 — true when a CompositionUnit is the IMP-30 u4 empty-shell
|
||||
placeholder (frame_template_id="__empty__" / label="empty_shell" /
|
||||
merge_type="empty_shell").
|
||||
|
||||
Used by compute_slide_status to redefine `full_mdx_coverage` over
|
||||
rendered content units only: an empty-shell unit attaches the aligned
|
||||
section_ids to a ``__empty__`` frame for layout purposes, but the slide
|
||||
surface carries no MDX content for those sections. Counting it as
|
||||
coverage would violate feedback_artifact_status_naming (overall /
|
||||
coverage must reflect actual content state, not pipeline completion).
|
||||
|
||||
The three markers are checked as independent OR-branches so a
|
||||
CompositionUnit synthesised by any of the IMP-30 u4 entry points
|
||||
(phase A / phase B / e2e) is classified consistently.
|
||||
"""
|
||||
if getattr(u, "frame_template_id", None) == "__empty__":
|
||||
return True
|
||||
if getattr(u, "label", None) == "empty_shell":
|
||||
return True
|
||||
if getattr(u, "merge_type", None) == "empty_shell":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _final_status_html_class(overall: str) -> str:
|
||||
"""IMP-87 u3 — map ``overall`` enum string → CSS class for the step20
|
||||
final_status.html dashboard.
|
||||
|
||||
EMPTY_SHELL_NO_CONTENT MUST resolve to ``"fail"`` (red) so the Case B
|
||||
honesty defect (Stage 1 mdx05) surfaces in the same colour band as visual
|
||||
failures and regressions, not the legacy ``"partial"`` amber band. The
|
||||
explicit check runs BEFORE the legacy substring-based mapping because
|
||||
the literal ``"EMPTY_SHELL_NO_CONTENT"`` contains neither ``"PASS"`` nor
|
||||
``"FAIL"`` / ``"REGRESSION"`` and would otherwise default to
|
||||
``"partial"`` (Stage 2 axis A5 lock).
|
||||
|
||||
All other enums preserve pre-IMP-87 substring semantics so the legacy
|
||||
PASS / RENDERED_WITH_VISUAL_REGRESSION / PARTIAL_COVERAGE / etc. paths
|
||||
keep the colour they had before u3.
|
||||
"""
|
||||
if overall == "EMPTY_SHELL_NO_CONTENT":
|
||||
return "fail"
|
||||
if "PASS" in overall:
|
||||
return "pass"
|
||||
if "FAIL" in overall or "REGRESSION" in overall:
|
||||
return "fail"
|
||||
return "partial"
|
||||
|
||||
|
||||
def _is_blocked_overall(overall: str) -> bool:
|
||||
"""IMP-87 u3 — true iff ``overall`` warrants a BLOCKED CLI exit
|
||||
(returncode 1) independent of the visual_check / full_mdx_coverage axes.
|
||||
|
||||
Currently the single blocked enum is EMPTY_SHELL_NO_CONTENT (Stage 1
|
||||
mdx05 Case B). The CLI consults this helper BEFORE the legacy
|
||||
visual_fail / partial_coverage branches so a content-empty placeholder
|
||||
slide that happens to pass Selenium overflow checks (no content → no
|
||||
overflow) cannot silently return a content-empty artifact without an
|
||||
exit signal (Stage 2 axis A4 lock).
|
||||
"""
|
||||
return overall == "EMPTY_SHELL_NO_CONTENT"
|
||||
|
||||
|
||||
def compute_slide_status(sections: list[MdxSection],
|
||||
units: list[CompositionUnit],
|
||||
comp_debug: dict,
|
||||
@@ -2955,6 +3020,13 @@ def compute_slide_status(sections: list[MdxSection],
|
||||
u4 empty-shell — needs user/AI adaptation 신호)
|
||||
|
||||
overall enum :
|
||||
EMPTY_SHELL_NO_CONTENT — IMP-87 u2 : every selected unit is an
|
||||
IMP-30 u4 empty-shell placeholder
|
||||
(no content-rendered units). Takes
|
||||
precedence over the 4-way ladder below
|
||||
because such a slide can technically
|
||||
pass Selenium overflow checks but
|
||||
carries no MDX content.
|
||||
PASS — visual OK + full coverage + adapter_needed=0
|
||||
RENDERED_WITH_VISUAL_REGRESSION — full coverage 이지만 visual fail
|
||||
PARTIAL_COVERAGE — 일부 section 필터됨, 렌더된 부분만 visual OK
|
||||
@@ -2964,13 +3036,43 @@ def compute_slide_status(sections: list[MdxSection],
|
||||
Stage 1 Q3 + Codex #10 D4 lock.)
|
||||
"""
|
||||
aligned_ids = [s.section_id for s in sections]
|
||||
covered = set()
|
||||
# IMP-87 u1 — split coverage into legacy display vs honesty axis.
|
||||
# ``covered`` (legacy, preserved for downstream display / IMP-05 / IMP-06
|
||||
# readers) still tracks every section attached to any selected unit.
|
||||
# ``content_covered`` is the new honesty axis: only non-empty-shell units
|
||||
# count as "rendered with content". Sections attached solely to an
|
||||
# empty-shell placeholder (IMP-30 u4 frame_template_id="__empty__")
|
||||
# are routed into ``filtered_section_ids`` so an EMPTY-SHELL-only slide
|
||||
# cannot inherit full_mdx_coverage=True (Case B honesty defect lock,
|
||||
# Stage 1 anchor c53722ad).
|
||||
covered: set = set()
|
||||
content_covered: set = set()
|
||||
for u in units:
|
||||
covered.update(u.source_section_ids)
|
||||
filtered_ids = sorted(set(aligned_ids) - covered)
|
||||
if not _is_empty_shell_unit(u):
|
||||
content_covered.update(u.source_section_ids)
|
||||
filtered_ids = sorted(set(aligned_ids) - content_covered)
|
||||
full_coverage = len(filtered_ids) == 0
|
||||
visual_passed = bool(overflow.get("passed", False))
|
||||
|
||||
# IMP-87 u2 — Additive empty/content accounting (used by the overall enum
|
||||
# precedence block below and surfaced on the return dict for downstream
|
||||
# introspection). ``content_rendered_section_ids`` mirrors the new honesty
|
||||
# axis (content_covered) at the surface level so consumers can ask "how
|
||||
# much real MDX content actually rendered" without re-running the helper.
|
||||
# ``empty_shell_section_ids`` exposes the placeholder counterpart for the
|
||||
# same reason. Both pairs are purely additive — no existing field is
|
||||
# removed or repurposed (legacy ``covered_section_ids`` display semantics
|
||||
# locked by u1).
|
||||
empty_shell_units_list = [u for u in units if _is_empty_shell_unit(u)]
|
||||
content_units_list = [u for u in units if not _is_empty_shell_unit(u)]
|
||||
empty_shell_section_ids = sorted({
|
||||
sid
|
||||
for u in empty_shell_units_list
|
||||
for sid in (u.source_section_ids or [])
|
||||
})
|
||||
content_rendered_section_ids = sorted(content_covered)
|
||||
|
||||
adapter_needed_units = list(adapter_needed_units or [])
|
||||
content_truncated = []
|
||||
fallback_selections = []
|
||||
@@ -3057,7 +3159,24 @@ def compute_slide_status(sections: list[MdxSection],
|
||||
"position": source_position,
|
||||
})
|
||||
|
||||
if full_coverage and visual_passed:
|
||||
# IMP-87 u2 — EMPTY_SHELL_NO_CONTENT precedence over the legacy 4-way
|
||||
# ladder (Stage 2 axis A3). If the slide has aligned MDX sections but
|
||||
# every selected unit is an IMP-30 u4 empty-shell placeholder (zero
|
||||
# content units selected, at least one empty-shell unit selected), the
|
||||
# visible artifact carries no real content — ``overall`` MUST report
|
||||
# EMPTY_SHELL_NO_CONTENT so the u3 CLI exit / final_status.html styling
|
||||
# can branch on it. The check runs before the visual/coverage ladder
|
||||
# because a content-empty placeholder slide can technically pass Selenium
|
||||
# overflow checks (no content → no overflow); without this precedence the
|
||||
# ladder would mislabel it PASS (Stage 1 mdx05 Case B honesty defect).
|
||||
is_empty_shell_only = (
|
||||
len(aligned_ids) > 0
|
||||
and len(empty_shell_units_list) > 0
|
||||
and len(content_units_list) == 0
|
||||
)
|
||||
if is_empty_shell_only:
|
||||
overall = "EMPTY_SHELL_NO_CONTENT"
|
||||
elif full_coverage and visual_passed:
|
||||
overall = "PASS"
|
||||
elif full_coverage and not visual_passed:
|
||||
overall = "RENDERED_WITH_VISUAL_REGRESSION"
|
||||
@@ -3102,6 +3221,11 @@ def compute_slide_status(sections: list[MdxSection],
|
||||
"full_mdx_coverage": full_coverage,
|
||||
"aligned_section_ids": aligned_ids,
|
||||
"covered_section_ids": sorted(covered),
|
||||
# IMP-87 u2 — additive empty/content accounting (overall enum precedence above).
|
||||
"content_rendered_section_ids": content_rendered_section_ids,
|
||||
"content_rendered_unit_count": len(content_units_list),
|
||||
"empty_shell_section_ids": empty_shell_section_ids,
|
||||
"empty_shell_unit_count": len(empty_shell_units_list),
|
||||
"filtered_section_ids": filtered_ids,
|
||||
"filtered_section_reasons": filtered_section_reasons,
|
||||
"selection_path": "fallback_used" if fallback_selections else "rank_1",
|
||||
@@ -5929,7 +6053,11 @@ def run_phase_z2_mvp1(
|
||||
)
|
||||
# Step 20 HTML — 최종 판정 시각 보고
|
||||
_overall = slide_status.get("overall", "?")
|
||||
_ov_class = "pass" if "PASS" in _overall else "fail" if "FAIL" in _overall or "REGRESSION" in _overall else "partial"
|
||||
# IMP-87 u3 — route the dashboard CSS class through _final_status_html_class
|
||||
# so EMPTY_SHELL_NO_CONTENT renders as "fail" (red) instead of the legacy
|
||||
# substring-default "partial" amber. Other enums keep their pre-IMP-87
|
||||
# colour (Stage 2 axis A5 lock).
|
||||
_ov_class = _final_status_html_class(_overall)
|
||||
_vfs = slide_status.get("visual_fail_reasons") or []
|
||||
_vfs_html = (
|
||||
"<ul>" + "".join(f"<li>{v}</li>" for v in _vfs) + "</ul>"
|
||||
@@ -6048,6 +6176,36 @@ def run_phase_z2_mvp1(
|
||||
f"(impl_status={nap.get('next_action_implementation_status')})")
|
||||
|
||||
# 13. Exit 정책 — visual fail 은 abort, partial coverage 는 abort 안 하지만 PASS 도 아님
|
||||
# IMP-87 u3 — BLOCKED exit on EMPTY_SHELL_NO_CONTENT precedes the legacy
|
||||
# visual_fail / partial_coverage branches (Stage 2 axis A4). A slide whose
|
||||
# every selected unit is an IMP-30 u4 empty-shell placeholder carries no
|
||||
# MDX content; without this branch a Case B run could pass Selenium
|
||||
# overflow checks (nothing to overflow) and silently return overall
|
||||
# EMPTY_SHELL_NO_CONTENT without any CLI exit signal, violating
|
||||
# feedback_artifact_status_naming.
|
||||
if _is_blocked_overall(overall):
|
||||
_aligned = slide_status.get("aligned_section_ids") or []
|
||||
_empty_ids = slide_status.get("empty_shell_section_ids") or []
|
||||
_empty_count = slide_status.get("empty_shell_unit_count", 0)
|
||||
_content_count = slide_status.get("content_rendered_unit_count", 0)
|
||||
print(
|
||||
f"\n[Phase Z-2 IMP-87 u3] BLOCKED @ empty_shell_no_content ({overall})",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
" reason : every selected unit is IMP-30 u4 empty-shell placeholder "
|
||||
"(no MDX content rendered)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(f" aligned : {_aligned}", file=sys.stderr)
|
||||
print(f" shell : {_empty_ids}", file=sys.stderr)
|
||||
print(
|
||||
f" units : empty_shell_unit_count={_empty_count} "
|
||||
f"content_rendered_unit_count={_content_count}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if not slide_status["visual_check_passed"]:
|
||||
err_path = write_overflow_error(run_dir, overflow)
|
||||
print(f"\n[Phase Z-2 MVP-1.5b] FAIL @ visual_runtime_check ({overall})", file=sys.stderr)
|
||||
|
||||
Reference in New Issue
Block a user