feat(#71): IMP-42 u1~u5 silent fail chain diagnostics (assert + invalid-char detector + DIAG log)
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 24s

Stage 4 binding scope — diagnostic-only, fail-loud, sample-agnostic
(RULE 0 / AI-isolation contract). No production behavior change beyond
fail-loud raises on previously-silent failure classes.

u1 src/phase_z2_pipeline.py:2747-2772 — render_slide precondition assert
   (template_id non-empty str + slot_payload dict), placed after the
   `__empty__` short-circuit at 2740 to preserve empty-zone grid behavior.
u2 src/phase_z2_pipeline.py:2681-2710 — _scan_rendered_html_for_invalid_path_chars
   helper covering src / href / url(...) values for backslash, &, '.
   Invoked on partial render (2778) and slide_base assembly (2798).
u3 src/phase_z2_pipeline.py:2638-2676,2733,5509 — _emit_diag_zones_shape
   shape-only [DIAG] JSON at Step 12 slot_payload emit and Step 13
   render_slide entry. No env gate — silence is the bug.
u4 Front/client/src/pages/Home.tsx:388-392 — unconditional [DIAG raw overrides]
   console.log on handleGenerate boundary, after flushUserOverrides() and
   immediately before runPipeline.
u5 tests/phase_z2/test_phase_z2_diag_smoke_general.py — 32-frame general
   smoke driven by load_frame_contracts() registry (not literal MDX 03/04/05),
   parametrizes u1/u2/u3 across the full frame_contracts.yaml top-level.

Tests (Stage 4 verification PASS):
- u1 8 passed, u2 14 passed, u3 12 passed, u4 5 passed, u5 97 passed.
- Backend full regression tests/phase_z2/ 499 passed in 110.84s.
- Frontend full regression 182 passed in 1.10s.

Out of scope (separate axes):
- Path normalization / as_posix migration.
- Autoescape policy change.
- build_layout_css refactor (Stage 1 category-error rejection).
- Recovery / auto-fix on detected invalid path.
- MDX content / frame-selection / zone-composition change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-24 08:28:54 +09:00
parent c59864eb9a
commit 5deeb97cf6
7 changed files with 818 additions and 4 deletions

View File

@@ -2632,6 +2632,84 @@ def _remeasure_after_frame_reselect(
}
# ─── IMP-42 u3 (#71) — unconditional Step 12 / Step 13 DIAG log helper ──
def _emit_diag_zones_shape(stage_label: str, zones_data: list[dict], **extra_fields) -> None:
"""IMP-42 u3 (#71) — emit shape-only zone metadata to stdout.
Used at the Step 12 slot_payload emit site and the Step 13 render_slide
entry site to make the silent 3-hop handoff visible in the terminal.
Shape-only — never logs raw slot_payload values — so the diag is
sample-agnostic (RULE 0) and never leaks user content.
No env gate: silence is the bug this IMP fights, so the log fires
unconditionally on every slide loop.
"""
payload = {
"zones_count": len(zones_data),
"zones": [
{
"i": i,
"position": z.get("position"),
"template_id": z.get("template_id"),
"slot_keys": (
sorted(z["slot_payload"].keys())
if isinstance(z.get("slot_payload"), dict) else None
),
}
for i, z in enumerate(zones_data)
],
}
payload.update(extra_fields)
print(
f"[DIAG] phase_z2 {stage_label} "
+ json.dumps(payload, ensure_ascii=False, sort_keys=True),
flush=True,
)
# ─── IMP-42 u2 (#71) — post-render HTML invalid path char detector ──
_INVALID_PATH_ATTR_RE = re.compile(
r"""(src|href)\s*=\s*["']([^"']*)["']|url\(\s*['"]?([^)'"\s]+)['"]?\s*\)""",
re.IGNORECASE,
)
_INVALID_PATH_CHARS = ("\\", "&amp;", "&#39;")
def _scan_rendered_html_for_invalid_path_chars(html: str, context: str) -> None:
"""IMP-42 u2 (#71) — fail loud when rendered HTML asset references
contain invalid path characters in src / href / url(...) values.
Catches three silent fail vectors at the rendered HTML boundary that
surface downstream as 404 / asset-load failures far from upstream cause:
- Windows backslash from str(Path) (e.g. ``assets\\img.png``).
- Autoescape entity ``&amp;`` (raw ``&`` in raw path source string).
- Autoescape entity ``&#39;`` (raw ``'`` in raw path source string).
Raises ValueError on first hit, citing context, attr type, value snippet.
Scope-locked to rendered HTML asset attrs only; never inspects CSS grid
metadata or static template strings.
"""
for match in _INVALID_PATH_ATTR_RE.finditer(html):
if match.group(3) is not None:
attr_label = "url(...)"
value = match.group(3)
else:
attr_label = match.group(1).lower()
value = match.group(2)
for bad in _INVALID_PATH_CHARS:
if bad in value:
snippet = value if len(value) <= 120 else value[:117] + "..."
raise ValueError(
f"render_slide: {context} — invalid path char {bad!r} in "
f"{attr_label} value (value={snippet}). "
"Likely upstream: Windows backslash from str(Path) or "
"autoescape of '&' / \"'\" in raw path string."
)
def render_slide(slide_title: str, slide_footer: Optional[str],
zones_data: list[dict], layout_preset: str,
layout_css: dict, gap_px: int = GRID_GAP,
@@ -2651,22 +2729,60 @@ def render_slide(slide_title: str, slide_footer: Optional[str],
f"render_slide: invalid embedded_mode={embedded_mode!r}; "
"expected one of 'auto', 'embedded', 'standalone'"
)
# IMP-42 u3 (#71) — unconditional Step 13 entry DIAG log.
_emit_diag_zones_shape(
"Step 13 render_slide entry",
zones_data,
layout_preset=layout_preset,
embedded_mode=embedded_mode,
)
env = Environment(
loader=FileSystemLoader(str(TEMPLATE_DIR)),
autoescape=select_autoescape(["html"]),
)
for zone in zones_data:
for zone_index, zone in enumerate(zones_data):
# Stage 4 Part 2 (Codex #10 Catch N) — empty zone produced by section
# assignment override has no partial template; render an empty string so
# the slide_base zones loop preserves grid identity without TemplateNotFound.
if zone.get("template_id") == "__empty__":
zone["partial_html"] = ""
continue
partial = env.get_template(f"families/{zone['template_id']}.html")
zone["partial_html"] = partial.render(slot_payload=zone["slot_payload"])
# IMP-42 u1 (#71) — fail-loud precondition for Step 13 partial render.
# Catches the silent fail vector where Step 12 emits a zone dict missing
# `template_id` / `slot_payload`. Error message cites zone_index +
# missing key so the diag is actionable (vs Jinja TemplateNotFound /
# KeyError surfacing far from the upstream emit site).
template_id = zone.get("template_id")
if not isinstance(template_id, str) or not template_id:
raise TypeError(
f"render_slide: zones_data[{zone_index}] precondition failed — "
f"`template_id` must be a non-empty str, got {type(template_id).__name__}={template_id!r}"
)
if "slot_payload" not in zone:
raise TypeError(
f"render_slide: zones_data[{zone_index}] precondition failed — "
f"`slot_payload` key missing (template_id={template_id!r})"
)
slot_payload = zone["slot_payload"]
if not isinstance(slot_payload, dict):
raise TypeError(
f"render_slide: zones_data[{zone_index}] precondition failed — "
f"`slot_payload` must be a dict, got {type(slot_payload).__name__} "
f"(template_id={template_id!r})"
)
partial = env.get_template(f"families/{template_id}.html")
rendered_partial = partial.render(slot_payload=slot_payload)
# IMP-42 u2 (#71) — fail loud on invalid path chars in rendered HTML
# asset refs (src / href / url(...)). Catches Windows backslash and
# autoescape entity vectors before they reach the browser as 404.
_scan_rendered_html_for_invalid_path_chars(
rendered_partial,
f"zones_data[{zone_index}] template_id={template_id!r}",
)
zone["partial_html"] = rendered_partial
base = env.get_template("slide_base.html")
return base.render(
rendered_base = base.render(
slide_title=slide_title,
slide_footer=slide_footer,
zones=zones_data,
@@ -2676,6 +2792,11 @@ def render_slide(slide_title: str, slide_footer: Optional[str],
token_css=_read_token_css(),
embedded_mode=embedded_mode,
)
# IMP-42 u2 (#71) — also scan the assembled slide_base output to cover
# asset refs introduced by the slide-base shell itself (title / footer /
# popup slots) outside the per-zone partial scope.
_scan_rendered_html_for_invalid_path_chars(rendered_base, "slide_base")
return rendered_base
# ─── Selenium check (single slide + per-zone) ──────────────────
@@ -5384,6 +5505,8 @@ def run_phase_z2_mvp1(
outputs=["step12_slot_payload.json"],
note="map_with_contract 결과 — actual slot_payload 값 그대로 (key 만 X).",
)
# IMP-42 u3 (#71) — unconditional Step 12 slot_payload emit DIAG log.
_emit_diag_zones_shape("Step 12 slot_payload emit", zones_data)
# 6. Build layout CSS — horizontal-2 = dynamic heights (regression preserve), 그 외 = fr default.
# Step D-ext : override_zone_geometries 가 들어오면 layout_css 강제.