From cacc5b30dbeb9703d53bc1cb12b99c197e3f7cfa Mon Sep 17 00:00:00 2001 From: kyeongmin Date: Sat, 23 May 2026 16:56:38 +0900 Subject: [PATCH] feat(#85): IMP catalog builder invariant + VP runtime gate (u1~u7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - u1: BuilderMissingError(FitError) — narrow exception aligned with pipeline catch - u2: load_frame_contracts catalog invariant + VP skip + CatalogInvariantError - u3a: audit CLI I1~I3 (partial existence / declared builder / registry membership) - u3b: audit CLI I4 (slot_payload refs vs declared/generated payload keys) - u4: lookup_v4_candidates VP filter (lookup_v4_all_judgments raw telemetry untouched) - u5: catalog invariant regression coverage + temp non-VP failure fixtures - u6: mdx04 VP routing fixture tests (sw_dependency_four_problems excluded from live) - u7: tests/conftest.py env isolation + mdx03/mdx04/mdx05 subprocess smoke Targeted 74 PASS (12.31s). Full regression 1063 PASS (87.70s). Audit CLI clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/audit_frame_invariants.py | 299 ++++++++++++ src/phase_z2_mapper.py | 68 ++- src/phase_z2_pipeline.py | 23 + tests/conftest.py | 112 +++++ .../catalog/missing_builder_non_vp.yaml | 17 + .../catalog/undeclared_slot_ref_non_vp.yaml | 23 + tests/test_audit_frame_invariants_i1_i3.py | 249 ++++++++++ tests/test_audit_frame_invariants_i4.py | 444 ++++++++++++++++++ tests/test_catalog_invariant.py | 146 ++++++ tests/test_lookup_v4_candidates_vp_filter.py | 259 ++++++++++ tests/test_mdx04_vp_routing.py | 129 +++++ .../test_phase_z2_load_frame_contracts_vp.py | 185 ++++++++ tests/test_phase_z2_mapper_builder_missing.py | 85 ++++ tests/test_pipeline_smoke_imp85.py | 127 +++++ 14 files changed, 2163 insertions(+), 3 deletions(-) create mode 100644 scripts/audit_frame_invariants.py create mode 100644 tests/conftest.py create mode 100644 tests/phase_z2/fixtures/catalog/missing_builder_non_vp.yaml create mode 100644 tests/phase_z2/fixtures/catalog/undeclared_slot_ref_non_vp.yaml create mode 100644 tests/test_audit_frame_invariants_i1_i3.py create mode 100644 tests/test_audit_frame_invariants_i4.py create mode 100644 tests/test_lookup_v4_candidates_vp_filter.py create mode 100644 tests/test_mdx04_vp_routing.py create mode 100644 tests/test_phase_z2_load_frame_contracts_vp.py create mode 100644 tests/test_phase_z2_mapper_builder_missing.py create mode 100644 tests/test_pipeline_smoke_imp85.py diff --git a/scripts/audit_frame_invariants.py b/scripts/audit_frame_invariants.py new file mode 100644 index 0000000..5151fcd --- /dev/null +++ b/scripts/audit_frame_invariants.py @@ -0,0 +1,299 @@ +"""Catalog ↔ partial ↔ builder invariant audit CLI (IMP-#85 u3a / u3b). + +Offline audit of `templates/phase_z2/catalog/frame_contracts.yaml` against +the on-disk frame partials and the runtime `PAYLOAD_BUILDERS` registry. + +Reports diff surface so first-fix iteration sees the entire catalog drift, +not just the first failure (matches the boot-time invariant's aggregation +behavior in `_check_catalog_builder_invariant`). + +Invariants (scope-locked per Stage 2): + I1 partial existence — `templates/phase_z2/families/{template_id}.html` + must exist for live (non-VP) contracts. + I2 builder declared — live contracts must declare a non-empty + `payload.builder`. + I3 builder registered — declared builders must be members of + `src.phase_z2_mapper.PAYLOAD_BUILDERS`. + I4 slot_payload refs — every key generated by the contract's builder + must appear as a `slot_payload.` reference in + the partial. Direction A only (dead generated key). + Skipped when the partial uses dynamic bracket + access (`slot_payload[...]`) — those refs cannot be + resolved statically; the relevant generated keys + are presumed reachable via the dynamic form. + +`visual_pending: true` contracts are skipped for I1–I4 (data-driven from +catalog, no hard-coded frame allow-list; matches u2 invariant scope). + +Exit codes: + 0 — all invariants pass on live (non-VP) contracts. + 1 — one or more violations reported. + +Usage:: + + python scripts/audit_frame_invariants.py + python scripts/audit_frame_invariants.py --catalog --partials-dir +""" +from __future__ import annotations + +import argparse +import re +import sys +from pathlib import Path +from typing import Iterable + +REPO_ROOT = Path(__file__).resolve().parent.parent +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + +import yaml + +DEFAULT_CATALOG_PATH = ( + REPO_ROOT / "templates" / "phase_z2" / "catalog" / "frame_contracts.yaml" +) +DEFAULT_PARTIALS_DIR = REPO_ROOT / "templates" / "phase_z2" / "families" + + +def _format_path(path: Path) -> str: + try: + return str(path.relative_to(REPO_ROOT)) + except ValueError: + return str(path) + + +def _is_visual_pending(contract: dict) -> bool: + return contract.get("visual_pending") is True + + +def _iter_live_contracts(catalog: dict) -> Iterable[tuple[str, dict]]: + for template_id, contract in catalog.items(): + if not isinstance(contract, dict): + continue + if _is_visual_pending(contract): + continue + yield template_id, contract + + +def check_i1_partial_existence( + catalog: dict, partials_dir: Path +) -> list[str]: + """I1 — Live contracts must have `families/{template_id}.html` on disk.""" + violations: list[str] = [] + for template_id, _contract in _iter_live_contracts(catalog): + partial_path = partials_dir / f"{template_id}.html" + if not partial_path.is_file(): + violations.append( + f"I1 partial-missing: contract '{template_id}' has no " + f"partial file at {_format_path(partial_path)}." + ) + return violations + + +def check_i2_builder_declared(catalog: dict) -> list[str]: + """I2 — Live contracts must declare a non-empty `payload.builder`.""" + violations: list[str] = [] + for template_id, contract in _iter_live_contracts(catalog): + payload = contract.get("payload") or {} + if not isinstance(payload, dict): + violations.append( + f"I2 builder-undeclared: contract '{template_id}' has " + f"non-dict payload (type={type(payload).__name__})." + ) + continue + builder_name = payload.get("builder") + if not builder_name: + violations.append( + f"I2 builder-undeclared: contract '{template_id}' is " + f"missing payload.builder." + ) + return violations + + +def check_i3_builder_registered( + catalog: dict, registered_builders: set[str] +) -> list[str]: + """I3 — Declared builders must be members of PAYLOAD_BUILDERS registry.""" + violations: list[str] = [] + for template_id, contract in _iter_live_contracts(catalog): + payload = contract.get("payload") or {} + if not isinstance(payload, dict): + continue + builder_name = payload.get("builder") + if not builder_name: + continue + if builder_name not in registered_builders: + violations.append( + f"I3 builder-unregistered: contract '{template_id}' " + f"references payload.builder='{builder_name}' not in " + f"PAYLOAD_BUILDERS." + ) + return violations + + +_SLOT_PAYLOAD_DOT_RE = re.compile(r"slot_payload\.([A-Za-z_][A-Za-z0-9_]*)") +_SLOT_PAYLOAD_BRACKET_RE = re.compile(r"slot_payload\s*\[") + + +def extract_static_slot_refs(partial_text: str) -> set[str]: + """Return the set of `slot_payload.` dot-access references.""" + return set(_SLOT_PAYLOAD_DOT_RE.findall(partial_text)) + + +def partial_uses_dynamic_slot_access(partial_text: str) -> bool: + """True if the partial dereferences `slot_payload[...]` (dynamic key).""" + return bool(_SLOT_PAYLOAD_BRACKET_RE.search(partial_text)) + + +def expected_payload_keys(contract: dict) -> set[str]: + """Statically compute the set of payload keys the contract's builder produces. + + Mirrors `src.phase_z2_mapper`'s registered builders (IMP-#85 u3b). Returns + an empty set when the builder is unknown — I3 already flags that drift. + """ + payload = contract.get("payload") or {} + if not isinstance(payload, dict): + return set() + keys: set[str] = set() + title_spec = payload.get("title") + if isinstance(title_spec, dict) and title_spec.get("source"): + keys.add("title") + + builder = payload.get("builder") + options = payload.get("builder_options") or {} + if not isinstance(options, dict): + options = {} + + if builder == "items_with_role": + array_root = options.get("array_root") + if array_root: + keys.add(array_root) + elif builder == "process_product_pair": + for col in options.get("columns") or []: + if not isinstance(col, dict): + continue + if col.get("title_to"): + keys.add(col["title_to"]) + if col.get("body_to"): + keys.add(col["body_to"]) + elif builder == "quadrant_flat_slots": + pad_to = int(options.get("pad_to", 4)) + label_key = options.get("label_key_pattern", "quadrant_{n}_label") + body_key = options.get("body_key_pattern", "quadrant_{n}_body") + for n in range(1, pad_to + 1): + keys.add(label_key.format(n=n)) + keys.add(body_key.format(n=n)) + elif builder == "cycle_intersect_3": + pad_to = int(options.get("pad_to", 3)) + label_key = options.get("label_key_pattern", "circle_{n}_label") + for n in range(1, pad_to + 1): + keys.add(label_key.format(n=n)) + keys.add("intersection") + elif builder == "compare_table_2col": + keys.update({"col_a_label", "col_b_label", "rows"}) + elif builder == "paired_rows_4x2_slots": + label_key = options.get("label_key_pattern", "row_{r}_{side}_label") + body_key = options.get("body_key_pattern", "row_{r}_{side}_body") + rows = int(options.get("rows", 4)) + sides = options.get("sides", ["left", "right"]) or [] + for r in range(1, rows + 1): + for side in sides: + keys.add(label_key.format(r=r, side=side)) + keys.add(body_key.format(r=r, side=side)) + return keys + + +def check_i4_slot_payload_refs( + catalog: dict, + partials_dir: Path, + registered_builders: set[str], +) -> list[str]: + """I4 — every generated payload key must be referenced by the partial. + + Direction A only (dead key). Skipped when the partial uses dynamic + bracket access (`slot_payload[...]`) — generated keys are presumed + reached via the dynamic form and cannot be resolved statically. + + Contracts already failing I1 (missing partial) or I3 (unregistered + builder) are skipped so the same drift is not double-reported. + """ + violations: list[str] = [] + for template_id, contract in _iter_live_contracts(catalog): + payload = contract.get("payload") or {} + if not isinstance(payload, dict): + continue + builder_name = payload.get("builder") + if not builder_name or builder_name not in registered_builders: + continue + partial_path = partials_dir / f"{template_id}.html" + if not partial_path.is_file(): + continue + partial_text = partial_path.read_text(encoding="utf-8") + if partial_uses_dynamic_slot_access(partial_text): + continue + static_refs = extract_static_slot_refs(partial_text) + expected = expected_payload_keys(contract) + orphans = sorted(expected - static_refs) + for key in orphans: + violations.append( + f"I4 generated-key-orphan: contract '{template_id}' builder " + f"'{builder_name}' produces payload key '{key}' but partial " + f"never references slot_payload.{key}." + ) + return violations + + +def run_audit( + catalog_path: Path = DEFAULT_CATALOG_PATH, + partials_dir: Path = DEFAULT_PARTIALS_DIR, +) -> list[str]: + """Load catalog + registry and aggregate I1-I4 violations. + + Registry is imported here (not at module import) so the script can be + inspected without triggering the boot-time catalog invariant. + """ + from src.phase_z2_mapper import PAYLOAD_BUILDERS + + catalog = yaml.safe_load(catalog_path.read_text(encoding="utf-8")) or {} + registered = set(PAYLOAD_BUILDERS.keys()) + + violations: list[str] = [] + violations.extend(check_i1_partial_existence(catalog, partials_dir)) + violations.extend(check_i2_builder_declared(catalog)) + violations.extend(check_i3_builder_registered(catalog, registered)) + violations.extend(check_i4_slot_payload_refs(catalog, partials_dir, registered)) + return violations + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Audit Phase Z-2 catalog ↔ partials ↔ builder registry." + ) + parser.add_argument( + "--catalog", + type=Path, + default=DEFAULT_CATALOG_PATH, + help="Path to frame_contracts.yaml", + ) + parser.add_argument( + "--partials-dir", + type=Path, + default=DEFAULT_PARTIALS_DIR, + help="Directory containing families/{template_id}.html partials", + ) + args = parser.parse_args(argv) + + violations = run_audit(args.catalog, args.partials_dir) + if not violations: + print("audit_frame_invariants: PASS (I1-I4 clean on live contracts).") + return 0 + + print( + f"audit_frame_invariants: FAIL ({len(violations)} violation(s)):" + ) + for v in violations: + print(f" - {v}") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/phase_z2_mapper.py b/src/phase_z2_mapper.py index cb87a1e..fd990f8 100644 --- a/src/phase_z2_mapper.py +++ b/src/phase_z2_mapper.py @@ -42,6 +42,22 @@ class FitError(Exception): """ +class BuilderMissingError(FitError): + """Contract.payload.builder ↔ PAYLOAD_BUILDERS registry mismatch. + + FitError subclass — pipeline 의 기존 `except FitError` 경로가 그대로 + adapter_needed 로 라우팅 (mdx04 hard crash 차단, IMP-#85 u1). + """ + + +class CatalogInvariantError(Exception): + """Catalog ↔ runtime registry drift detected at load time. + + Boot-time invariant violation (IMP-#85 u2). Distinct from FitError: + runtime fallback 대상이 아니라 catalog wiring 결함 (fail-fast). + """ + + # ─── Catalog loading ────────────────────────────────────────────── _CATALOG_CACHE: dict | None = None @@ -50,7 +66,9 @@ _CATALOG_CACHE: dict | None = None def load_frame_contracts() -> dict: global _CATALOG_CACHE if _CATALOG_CACHE is None: - _CATALOG_CACHE = yaml.safe_load(CATALOG_PATH.read_text(encoding="utf-8")) or {} + catalog = yaml.safe_load(CATALOG_PATH.read_text(encoding="utf-8")) or {} + _check_catalog_builder_invariant(catalog) + _CATALOG_CACHE = catalog return _CATALOG_CACHE @@ -686,6 +704,50 @@ PAYLOAD_BUILDERS: dict[str, Callable] = { } +# ─── Catalog builder invariant (IMP-#85 u2) ────────────────────── + +def _check_catalog_builder_invariant(catalog: dict) -> None: + """Every non-`visual_pending` contract must declare a registered builder. + + `visual_pending: true` contracts are scaffolding records whose builders + are tracked as VP backlog (별 axis IMP-04b / #42) — skipped here so the + catalog can keep declaring them without breaking boot. + + Violations are aggregated and raised together so first-fix iteration sees + the full drift surface, not just the first row. + + Raises: + CatalogInvariantError — when one or more live (non-VP) contracts + either omit `payload.builder` or reference a name absent from + `PAYLOAD_BUILDERS`. + """ + violations: list[str] = [] + for template_id, contract in catalog.items(): + if not isinstance(contract, dict): + continue + if contract.get("visual_pending") is True: + continue + payload = contract.get("payload") or {} + builder_name = payload.get("builder") if isinstance(payload, dict) else None + if not builder_name: + violations.append( + f"Contract '{template_id}' (non-VP) missing payload.builder." + ) + continue + if builder_name not in PAYLOAD_BUILDERS: + violations.append( + f"Contract '{template_id}' (non-VP) references payload.builder=" + f"'{builder_name}' not in PAYLOAD_BUILDERS registry." + ) + if violations: + raise CatalogInvariantError( + f"Catalog builder invariant violated " + f"({len(violations)} non-VP contract(s)):\n - " + + "\n - ".join(violations) + + f"\nRegistered builders: {sorted(PAYLOAD_BUILDERS.keys())}" + ) + + # ─── Generic mapper (single dispatch via builder) ──────────────── def _check_cardinality(contract: dict, units: list, section) -> None: @@ -843,13 +905,13 @@ def map_with_contract(section, contract: dict) -> dict: payload_spec = contract["payload"] builder_name = payload_spec.get("builder") if not builder_name: - raise ValueError( + raise BuilderMissingError( f"Contract '{contract['template_id']}' missing payload.builder. " f"available: {sorted(PAYLOAD_BUILDERS.keys())}" ) builder = PAYLOAD_BUILDERS.get(builder_name) if builder is None: - raise ValueError( + raise BuilderMissingError( f"Contract '{contract['template_id']}' references payload.builder=" f"'{builder_name}' but PAYLOAD_BUILDERS has no such entry. " f"available: {sorted(PAYLOAD_BUILDERS.keys())}" diff --git a/src/phase_z2_pipeline.py b/src/phase_z2_pipeline.py index f1a1de7..98133d2 100644 --- a/src/phase_z2_pipeline.py +++ b/src/phase_z2_pipeline.py @@ -1099,6 +1099,20 @@ def lookup_v4_all_judgments( return out +def _is_visual_pending(template_id: str) -> bool: + """IMP-#85 u4 — return True iff catalog marks contract as ``visual_pending``. + + Data-driven from ``frame_contracts.yaml`` (no hard-coded frame allow-list). + Used by ``lookup_v4_candidates`` to exclude VP frames from the live + candidate set; ``lookup_v4_all_judgments`` raw telemetry stays untouched + (Step 7-A axis preserves full 32-frame evidence for the frontend). + """ + contract = get_contract(template_id) + if not isinstance(contract, dict): + return False + return contract.get("visual_pending") is True + + def lookup_v4_candidates( v4: dict, section_id: str, @@ -1112,6 +1126,7 @@ def lookup_v4_candidates( v4_candidates = [ c for c in judgments_full32 if c["label"] != "reject" + and not visual_pending(c.template_id) # IMP-#85 u4 ][:max_n] Returns: @@ -1123,6 +1138,11 @@ def lookup_v4_candidates( lookup_v4_match() (rank-1) 는 그대로. Step 6 의 plan_composition() 호출처 무변. 본 함수는 Step 5 artifact + Step 9 application_plan input 위한 새 entry point. + + IMP-#85 u4 — visual_pending frames are excluded from the live candidate + set (catalog scaffolding without registered builder would crash the + mapper). lookup_v4_all_judgments raw telemetry is intentionally NOT + gated here. """ resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys) sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None @@ -1133,6 +1153,9 @@ def lookup_v4_candidates( for j in judgments: if j.get("label") == "reject": continue + tid = j.get("template_id") + if tid and _is_visual_pending(tid): + continue candidates.append(_v4_match_from_judgment(section_id, j)) if len(candidates) >= max_n: break diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..3142ae0 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,112 @@ +"""IMP-#85 u7 — pytest env isolation for src.config defaults. + +This conftest.py runs BEFORE any test module is imported by pytest. +Setting ``os.environ["AI_FALLBACK_*"]`` here overrides values that the +live ``.env`` file would otherwise inject through ``pydantic-settings`` +(priority: init args > os.environ > env_file). The ``src.config`` +module-level ``settings = Settings()`` singleton is therefore built +against the test-clean environment when src.config is first imported +during test collection. + +Scope (per Stage 2 plan u7): + * Restore the default-OFF contract for ``ai_fallback_enabled`` so + ``tests/test_phase_z2_ai_fallback_config.py`` and + ``tests/test_imp47b_step12_ai_wiring.py`` (which lock the + flag-off short-circuit) match the source-of-truth default in + ``src/config.py``. + * Restore the default-OFF contract for ``ai_fallback_auto_cache``. + +Out of scope: + * Touching ``ANTHROPIC_API_KEY`` / ``KEI_API_URL`` / ``LOG_LEVEL``. + * Resetting the ``src.config.settings`` singleton mid-session. + Tests that need to flip ``settings.ai_fallback_enabled`` at + runtime mutate the singleton directly (mirrors the production + ``--auto-cache`` CLI path in ``src/phase_z2_pipeline.py``). + +IMP-35 baseline-red invariance carve-out +======================================== +The IMP-35 baseline-red invariance gate at +``tests/phase_z2/test_imp35_baseline_red_invariance.py`` spawns a child +pytest subprocess that targets ONLY the two baseline-area files: + + tests/test_imp47b_step12_ai_wiring.py + tests/test_phase_z2_ai_fallback_config.py + +That gate's binding contract (Stage 2 u11 lock) is that those four +registered known-red tests STAY RED until a follow-up issue +deregisters them. If this conftest blindly forces +``AI_FALLBACK_ENABLED=false`` in the gate's subprocess, the +``test_ai_fallback_master_flag_default_off`` registered red flips +green and the invariance gate trips — a real cross-issue contract +conflict (see Codex #8 Stage 3 verification of IMP-#85 u7). + +The carve-out below detects that exact subprocess signature +(positional ``.py`` targets are entirely baseline-area files) and +skips env isolation, leaving the gate's child process in its native +``.env``-loaded state. Every other pytest invocation — full-suite +``pytest -q tests``, the IMP-#85 smoke targets, single-file dev runs +on non-baseline files — still gets the default-OFF isolation. + +Per ``feedback_demo_env_toggle_policy``: demo activation belongs in +``.env`` only. The override below is test-scoped (lives under +``tests/``) and never propagates into ``src/`` or ``vite.config``. +""" +from __future__ import annotations + +import os +import sys + +# File suffixes (basenames) of the IMP-35 baseline-red area files. +# The IMP-35 gate spawns its subprocess with these as the sole positional +# pytest targets. Suffix matching is used so the detection is robust +# across Windows/POSIX path separators and absolute/relative cwd. +_IMP35_BASELINE_AREA_FILE_SUFFIXES: tuple[str, ...] = ( + "test_imp47b_step12_ai_wiring.py", + "test_phase_z2_ai_fallback_config.py", +) + + +def _is_imp35_baseline_subprocess() -> bool: + """True iff the current pytest argv targets ONLY IMP-35 baseline-area files. + + The IMP-35 baseline-red invariance gate + (``tests/phase_z2/test_imp35_baseline_red_invariance.py``) runs: + + python -m pytest -q --tb=no -p no:cacheprovider \\ + tests/test_imp47b_step12_ai_wiring.py \\ + tests/test_phase_z2_ai_fallback_config.py + + The two trailing positional ``.py`` arguments are the signature. + We compare on basename suffix so the check is path-separator and + cwd agnostic. + + Returning True here suppresses the ``AI_FALLBACK_*`` env override + so the baseline-red registry contract (Stage 2 u11 lock) holds for + the gate's child process while every other invocation + (full-suite, IMP-#85 smokes, mixed-target dev runs) still gets the + default-OFF isolation. + """ + file_targets = [arg for arg in sys.argv[1:] if arg.endswith(".py")] + if not file_targets: + return False + return all( + any( + arg.replace("\\", "/").endswith(suffix) + for suffix in _IMP35_BASELINE_AREA_FILE_SUFFIXES + ) + for arg in file_targets + ) + + +if _is_imp35_baseline_subprocess(): + # Drop any inherited AI_FALLBACK_* values so the gate's child process + # falls back to the live ``.env`` (AI_FALLBACK_ENABLED=true) — the + # exact precondition under which the four registered baseline-red + # tests are red. ``pop`` is no-op when the key is absent, so a + # developer running the gate manually with a clean environment is + # unaffected. + os.environ.pop("AI_FALLBACK_ENABLED", None) + os.environ.pop("AI_FALLBACK_AUTO_CACHE", None) +else: + os.environ["AI_FALLBACK_ENABLED"] = "false" + os.environ["AI_FALLBACK_AUTO_CACHE"] = "false" diff --git a/tests/phase_z2/fixtures/catalog/missing_builder_non_vp.yaml b/tests/phase_z2/fixtures/catalog/missing_builder_non_vp.yaml new file mode 100644 index 0000000..071dc15 --- /dev/null +++ b/tests/phase_z2/fixtures/catalog/missing_builder_non_vp.yaml @@ -0,0 +1,17 @@ +# IMP-#85 u5 fixture — non-VP contract whose payload.builder is absent from +# `PAYLOAD_BUILDERS`. Drives the u2 boot invariant + audit I3 negative paths. +# +# Scope (Stage 2 lock): regression coverage only. Not a runtime catalog entry. +# Frame id is in the 9999xxx range so any accidental cross-reference is obvious. + +imp85_u5_missing_builder_frame: + template_id: imp85_u5_missing_builder_frame + frame_id: 9999001 + family: imp85_u5_fixture + source_shape: top_bullets + cardinality: + strict: 3 + payload: + title: + source: section.title + builder: definitely_not_a_registered_builder_imp85_u5 diff --git a/tests/phase_z2/fixtures/catalog/undeclared_slot_ref_non_vp.yaml b/tests/phase_z2/fixtures/catalog/undeclared_slot_ref_non_vp.yaml new file mode 100644 index 0000000..ee31bde --- /dev/null +++ b/tests/phase_z2/fixtures/catalog/undeclared_slot_ref_non_vp.yaml @@ -0,0 +1,23 @@ +# IMP-#85 u5 fixture — non-VP contract whose `items_with_role` builder produces +# a `slot_payload.` key the partial never references. Drives the +# audit I4 (generated-key-orphan) negative path. +# +# Scope (Stage 2 lock): regression coverage only. The corresponding partial is +# written into a tmp dir by the test (it must NOT use `slot_payload[...]` +# bracket access, otherwise I4 suppresses correctly and the assertion fails). + +imp85_u5_undeclared_slot_frame: + template_id: imp85_u5_undeclared_slot_frame + frame_id: 9999002 + family: imp85_u5_fixture + source_shape: top_bullets + cardinality: + strict: 3 + payload: + title: + source: section.title + builder: items_with_role + builder_options: + item_parser: pillar_item + array_root: orphan_array_root_imp85_u5 + role_field: color_class diff --git a/tests/test_audit_frame_invariants_i1_i3.py b/tests/test_audit_frame_invariants_i1_i3.py new file mode 100644 index 0000000..dbfc400 --- /dev/null +++ b/tests/test_audit_frame_invariants_i1_i3.py @@ -0,0 +1,249 @@ +"""IMP-#85 u3a — Audit CLI invariants I1-I3. + +Scope (Stage 2 lock): + I1 partial existence — `templates/phase_z2/families/{template_id}.html` + must exist for live (non-VP) contracts. + I2 builder declared — live contracts must declare non-empty + `payload.builder`. + I3 builder registered — declared builders must be in PAYLOAD_BUILDERS. + + `visual_pending: true` skipped for all of I1-I3 (data-driven from catalog, + no hard-coded frame allow-list; matches u2 invariant scope). + +Out of scope (별 axis): + - I4 slot_payload references (u3b). + - V4 runtime VP filter (u4). + - Implementing the 17 missing VP builders. +""" +from __future__ import annotations + +import subprocess +import sys +import textwrap +from pathlib import Path + +import pytest +import yaml + +REPO_ROOT = Path(__file__).resolve().parent.parent +SCRIPT_PATH = REPO_ROOT / "scripts" / "audit_frame_invariants.py" + + +def _write_yaml(path: Path, payload: dict) -> Path: + path.write_text(yaml.safe_dump(payload, sort_keys=False), encoding="utf-8") + return path + + +def _run_cli(catalog: Path, partials: Path) -> subprocess.CompletedProcess: + return subprocess.run( + [ + sys.executable, + str(SCRIPT_PATH), + "--catalog", + str(catalog), + "--partials-dir", + str(partials), + ], + cwd=str(REPO_ROOT), + capture_output=True, + text=True, + ) + + +def test_prod_catalog_audit_passes(tmp_path): + """Prod catalog + prod partials dir → I1-I3 PASS (live contracts clean).""" + from scripts.audit_frame_invariants import ( + DEFAULT_CATALOG_PATH, + DEFAULT_PARTIALS_DIR, + run_audit, + ) + + violations = run_audit(DEFAULT_CATALOG_PATH, DEFAULT_PARTIALS_DIR) + assert violations == [], ( + "Prod live contracts (non-VP) must satisfy I1-I3 invariants. " + f"Got: {violations}" + ) + + +def test_i1_partial_missing_for_live_contract(tmp_path): + """Live contract without families/{template_id}.html → I1 violation.""" + from src.phase_z2_mapper import PAYLOAD_BUILDERS + from scripts.audit_frame_invariants import check_i1_partial_existence + + sample_builder = next(iter(PAYLOAD_BUILDERS.keys())) + catalog = { + "missing_partial_frame": { + "template_id": "missing_partial_frame", + "payload": {"builder": sample_builder}, + }, + } + partials_dir = tmp_path / "families" + partials_dir.mkdir() + violations = check_i1_partial_existence(catalog, partials_dir) + assert len(violations) == 1 + assert "I1 partial-missing" in violations[0] + assert "missing_partial_frame" in violations[0] + + +def test_i1_partial_present_no_violation(tmp_path): + """Live contract with partial on disk → no I1 violation.""" + from scripts.audit_frame_invariants import check_i1_partial_existence + + catalog = { + "ok_frame": { + "template_id": "ok_frame", + "payload": {"builder": "items_with_role"}, + }, + } + partials_dir = tmp_path / "families" + partials_dir.mkdir() + (partials_dir / "ok_frame.html").write_text("
", encoding="utf-8") + assert check_i1_partial_existence(catalog, partials_dir) == [] + + +def test_i1_skips_visual_pending(tmp_path): + """visual_pending: true with no partial → I1 skip (no violation).""" + from scripts.audit_frame_invariants import check_i1_partial_existence + + catalog = { + "vp_frame": { + "template_id": "vp_frame", + "visual_pending": True, + "payload": {"builder": "definitely_not_registered"}, + }, + } + partials_dir = tmp_path / "families" + partials_dir.mkdir() + assert check_i1_partial_existence(catalog, partials_dir) == [] + + +def test_i2_missing_builder_field(): + """Live contract without payload.builder → I2 violation.""" + from scripts.audit_frame_invariants import check_i2_builder_declared + + catalog = { + "no_builder_frame": { + "template_id": "no_builder_frame", + "payload": {}, + }, + } + violations = check_i2_builder_declared(catalog) + assert len(violations) == 1 + assert "I2 builder-undeclared" in violations[0] + assert "no_builder_frame" in violations[0] + + +def test_i2_skips_visual_pending(): + """visual_pending: true without builder → I2 skip.""" + from scripts.audit_frame_invariants import check_i2_builder_declared + + catalog = { + "vp_frame": { + "template_id": "vp_frame", + "visual_pending": True, + "payload": {}, + }, + } + assert check_i2_builder_declared(catalog) == [] + + +def test_i3_unregistered_builder(): + """Live contract with unknown builder → I3 violation.""" + from scripts.audit_frame_invariants import check_i3_builder_registered + + catalog = { + "ghost_frame": { + "template_id": "ghost_frame", + "payload": {"builder": "ghost_builder_xyz"}, + }, + } + violations = check_i3_builder_registered( + catalog, registered_builders={"items_with_role"} + ) + assert len(violations) == 1 + assert "I3 builder-unregistered" in violations[0] + assert "ghost_frame" in violations[0] + assert "ghost_builder_xyz" in violations[0] + + +def test_i3_registered_builder_passes(): + """Live contract with registered builder → no I3 violation.""" + from scripts.audit_frame_invariants import check_i3_builder_registered + + catalog = { + "ok_frame": { + "template_id": "ok_frame", + "payload": {"builder": "items_with_role"}, + }, + } + assert check_i3_builder_registered( + catalog, registered_builders={"items_with_role"} + ) == [] + + +def test_i3_skips_visual_pending(): + """visual_pending: true with unregistered builder → I3 skip.""" + from scripts.audit_frame_invariants import check_i3_builder_registered + + catalog = { + "vp_frame": { + "template_id": "vp_frame", + "visual_pending": True, + "payload": {"builder": "vp_only_builder"}, + }, + } + assert check_i3_builder_registered( + catalog, registered_builders={"items_with_role"} + ) == [] + + +def test_cli_exit_zero_on_clean_catalog(tmp_path): + """CLI exit code 0 + PASS line on clean (live) catalog.""" + catalog_path = tmp_path / "catalog.yaml" + partials_dir = tmp_path / "families" + partials_dir.mkdir() + (partials_dir / "ok_frame.html").write_text("
", encoding="utf-8") + _write_yaml( + catalog_path, + { + "ok_frame": { + "template_id": "ok_frame", + "payload": {"builder": "items_with_role"}, + }, + "vp_frame": { + "template_id": "vp_frame", + "visual_pending": True, + "payload": {"builder": "unregistered_xyz"}, + }, + }, + ) + result = _run_cli(catalog_path, partials_dir) + assert result.returncode == 0, result.stdout + result.stderr + assert "PASS" in result.stdout + + +def test_cli_exit_one_on_violations(tmp_path): + """CLI exit code 1 + aggregated violations listed on drift.""" + catalog_path = tmp_path / "catalog.yaml" + partials_dir = tmp_path / "families" + partials_dir.mkdir() + _write_yaml( + catalog_path, + { + "frame_a": { + "template_id": "frame_a", + "payload": {"builder": "ghost_builder"}, + }, + "frame_b": { + "template_id": "frame_b", + "payload": {}, + }, + }, + ) + result = _run_cli(catalog_path, partials_dir) + assert result.returncode == 1, result.stdout + result.stderr + assert "FAIL" in result.stdout + assert "frame_a" in result.stdout + assert "frame_b" in result.stdout + assert "I1" in result.stdout + assert "I2" in result.stdout or "I3" in result.stdout diff --git a/tests/test_audit_frame_invariants_i4.py b/tests/test_audit_frame_invariants_i4.py new file mode 100644 index 0000000..e54e897 --- /dev/null +++ b/tests/test_audit_frame_invariants_i4.py @@ -0,0 +1,444 @@ +"""IMP-#85 u3b — Audit CLI invariant I4 (slot_payload ↔ builder generated keys). + +Scope (Stage 2 lock): + I4 slot_payload refs — every key generated by the contract's builder must + appear as a `slot_payload.` reference in the + partial. Direction A only (dead generated key). + Skipped when the partial uses dynamic bracket + access (`slot_payload[...]`). + + `visual_pending: true` skipped (data-driven from catalog, matches u2/u3a + invariant scope; no hard-coded frame allow-list). + +Out of scope (별 axis): + - V4 runtime VP filter (u4). + - Catalog regression coverage suite (u5). + - Implementing the 17 missing VP builders. +""" +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +import yaml + +REPO_ROOT = Path(__file__).resolve().parent.parent +SCRIPT_PATH = REPO_ROOT / "scripts" / "audit_frame_invariants.py" + + +def _write_yaml(path: Path, payload: dict) -> Path: + path.write_text(yaml.safe_dump(payload, sort_keys=False), encoding="utf-8") + return path + + +def _run_cli(catalog: Path, partials: Path) -> subprocess.CompletedProcess: + return subprocess.run( + [ + sys.executable, + str(SCRIPT_PATH), + "--catalog", + str(catalog), + "--partials-dir", + str(partials), + ], + cwd=str(REPO_ROOT), + capture_output=True, + text=True, + ) + + +def test_prod_catalog_audit_passes_i4(): + """Prod catalog + prod partials dir → no I4 violations on live contracts.""" + from scripts.audit_frame_invariants import ( + DEFAULT_CATALOG_PATH, + DEFAULT_PARTIALS_DIR, + check_i4_slot_payload_refs, + ) + from src.phase_z2_mapper import PAYLOAD_BUILDERS + + catalog = yaml.safe_load( + DEFAULT_CATALOG_PATH.read_text(encoding="utf-8") + ) or {} + registered = set(PAYLOAD_BUILDERS.keys()) + violations = check_i4_slot_payload_refs( + catalog, DEFAULT_PARTIALS_DIR, registered + ) + assert violations == [], ( + "Prod live contracts must satisfy I4 (every generated key is " + "referenced by the partial, or partial uses dynamic access). " + f"Got: {violations}" + ) + + +def test_extract_static_slot_refs_finds_dot_access(): + from scripts.audit_frame_invariants import extract_static_slot_refs + + partial = ( + "{{ slot_payload.title }}\n" + "{% if slot_payload.foo %}{{ slot_payload.foo }}{% endif %}\n" + "{% for x in slot_payload.bar %}{{ x }}{% endfor %}\n" + ) + refs = extract_static_slot_refs(partial) + assert refs == {"title", "foo", "bar"} + + +def test_extract_static_slot_refs_ignores_dynamic_bracket(): + from scripts.audit_frame_invariants import extract_static_slot_refs + + partial = "{{ slot_payload['pill_' ~ n ~ '_label'] }}" + # Dynamic access does NOT contribute dot-access refs. + assert extract_static_slot_refs(partial) == set() + + +def test_partial_uses_dynamic_slot_access_detects_bracket(): + from scripts.audit_frame_invariants import partial_uses_dynamic_slot_access + + dynamic = "{{ slot_payload['pill_' ~ n ~ '_label'] }}" + static = "{{ slot_payload.title }} and {{ slot_payload.body }}" + assert partial_uses_dynamic_slot_access(dynamic) is True + assert partial_uses_dynamic_slot_access(static) is False + + +def test_expected_keys_quadrant_flat_slots_default_pattern(): + from scripts.audit_frame_invariants import expected_payload_keys + + contract = { + "payload": { + "title": {"source": "section.title"}, + "builder": "quadrant_flat_slots", + "builder_options": { + "item_parser": "quadrant_item", + "pad_to": 4, + "label_key_pattern": "quadrant_{n}_label", + "body_key_pattern": "quadrant_{n}_body", + }, + } + } + keys = expected_payload_keys(contract) + assert "title" in keys + for n in range(1, 5): + assert f"quadrant_{n}_label" in keys + assert f"quadrant_{n}_body" in keys + + +def test_expected_keys_quadrant_flat_slots_custom_pattern(): + from scripts.audit_frame_invariants import expected_payload_keys + + contract = { + "payload": { + "title": {"source": "section.title"}, + "builder": "quadrant_flat_slots", + "builder_options": { + "item_parser": "quadrant_item", + "pad_to": 3, + "label_key_pattern": "category_{n}_label", + "body_key_pattern": "category_{n}_body", + }, + } + } + keys = expected_payload_keys(contract) + assert keys == { + "title", + "category_1_label", "category_2_label", "category_3_label", + "category_1_body", "category_2_body", "category_3_body", + } + + +def test_expected_keys_cycle_intersect_3(): + from scripts.audit_frame_invariants import expected_payload_keys + + contract = { + "payload": { + "title": {"source": "section.title"}, + "builder": "cycle_intersect_3", + "builder_options": { + "item_parser": "quadrant_item", + "pad_to": 3, + "label_key_pattern": "circle_{n}_label", + }, + } + } + keys = expected_payload_keys(contract) + assert keys == { + "title", "circle_1_label", "circle_2_label", "circle_3_label", + "intersection", + } + + +def test_expected_keys_compare_table_2col(): + from scripts.audit_frame_invariants import expected_payload_keys + + contract = { + "payload": { + "title": {"source": "section.title"}, + "builder": "compare_table_2col", + "builder_options": {"item_parser": "compare_row_2col_item"}, + } + } + keys = expected_payload_keys(contract) + assert keys == {"title", "col_a_label", "col_b_label", "rows"} + + +def test_expected_keys_paired_rows_4x2_slots(): + from scripts.audit_frame_invariants import expected_payload_keys + + contract = { + "payload": { + "title": {"source": "section.title"}, + "builder": "paired_rows_4x2_slots", + "builder_options": { + "item_parser": "quadrant_item", + "label_key_pattern": "row_{r}_{side}_label", + "body_key_pattern": "row_{r}_{side}_body", + "rows": 4, + "sides": ["left", "right"], + }, + } + } + keys = expected_payload_keys(contract) + assert "title" in keys + for r in range(1, 5): + for side in ("left", "right"): + assert f"row_{r}_{side}_label" in keys + assert f"row_{r}_{side}_body" in keys + + +def test_expected_keys_process_product_pair(): + from scripts.audit_frame_invariants import expected_payload_keys + + contract = { + "payload": { + "title": {"source": "section.title"}, + "builder": "process_product_pair", + "builder_options": { + "pad_sections_to": 3, + "columns": [ + {"title_to": "banner_left", "body_to": "process", + "body_parser": "column_with_transform"}, + {"title_to": "banner_right", "body_to": "product", + "body_parser": "column_with_transform"}, + ], + }, + } + } + keys = expected_payload_keys(contract) + assert keys == {"title", "banner_left", "process", "banner_right", "product"} + + +def test_expected_keys_items_with_role(): + from scripts.audit_frame_invariants import expected_payload_keys + + contract = { + "payload": { + "title": {"source": "section.title"}, + "builder": "items_with_role", + "builder_options": { + "item_parser": "pillar_item", + "array_root": "pillars", + }, + } + } + keys = expected_payload_keys(contract) + assert keys == {"title", "pillars"} + + +def test_i4_dead_generated_key_flagged(tmp_path): + """Builder produces key X, partial doesn't reference it → I4 violation.""" + from scripts.audit_frame_invariants import check_i4_slot_payload_refs + + partials_dir = tmp_path / "families" + partials_dir.mkdir() + # Partial only references `title` — missing category_2_label / _body etc. + (partials_dir / "drift_frame.html").write_text( + "
{{ slot_payload.title }}
" + "
{{ slot_payload.category_1_label }}
" + "
{{ slot_payload.category_1_body }}
", + encoding="utf-8", + ) + catalog = { + "drift_frame": { + "template_id": "drift_frame", + "payload": { + "title": {"source": "section.title"}, + "builder": "quadrant_flat_slots", + "builder_options": { + "item_parser": "quadrant_item", + "pad_to": 2, + "label_key_pattern": "category_{n}_label", + "body_key_pattern": "category_{n}_body", + }, + }, + }, + } + violations = check_i4_slot_payload_refs( + catalog, partials_dir, registered_builders={"quadrant_flat_slots"} + ) + msgs = "\n".join(violations) + assert "I4 generated-key-orphan" in msgs + assert "drift_frame" in msgs + assert "category_2_label" in msgs + assert "category_2_body" in msgs + # category_1 keys ARE referenced — must NOT be flagged. + assert "slot_payload.category_1_label." not in msgs + assert "slot_payload.category_1_body." not in msgs + + +def test_i4_skips_partial_with_dynamic_bracket_access(tmp_path): + """Dynamic bracket access in partial → I4 skipped (cannot resolve statically).""" + from scripts.audit_frame_invariants import check_i4_slot_payload_refs + + partials_dir = tmp_path / "families" + partials_dir.mkdir() + (partials_dir / "dynamic_frame.html").write_text( + "{{ slot_payload.title }}\n" + "{% for n in range(1, 6) %}" + "{{ slot_payload['pill_' ~ n ~ '_label'] }}" + "{{ slot_payload['pill_' ~ n ~ '_body'] }}" + "{% endfor %}", + encoding="utf-8", + ) + catalog = { + "dynamic_frame": { + "template_id": "dynamic_frame", + "payload": { + "title": {"source": "section.title"}, + "builder": "quadrant_flat_slots", + "builder_options": { + "item_parser": "quadrant_item", + "pad_to": 5, + "label_key_pattern": "pill_{n}_label", + "body_key_pattern": "pill_{n}_body", + }, + }, + }, + } + violations = check_i4_slot_payload_refs( + catalog, partials_dir, registered_builders={"quadrant_flat_slots"} + ) + assert violations == [], ( + "Dynamic bracket access must suppress I4 (cannot resolve statically); " + f"got: {violations}" + ) + + +def test_i4_skips_visual_pending(tmp_path): + """VP contract with drift → I4 skip (no violation).""" + from scripts.audit_frame_invariants import check_i4_slot_payload_refs + + partials_dir = tmp_path / "families" + partials_dir.mkdir() + (partials_dir / "vp_frame.html").write_text( + "
nothing
", encoding="utf-8" + ) + catalog = { + "vp_frame": { + "template_id": "vp_frame", + "visual_pending": True, + "payload": { + "title": {"source": "section.title"}, + "builder": "quadrant_flat_slots", + "builder_options": { + "item_parser": "quadrant_item", "pad_to": 4, + }, + }, + }, + } + violations = check_i4_slot_payload_refs( + catalog, partials_dir, registered_builders={"quadrant_flat_slots"} + ) + assert violations == [] + + +def test_i4_skips_unregistered_builder(tmp_path): + """Unregistered builder (already an I3 hit) → I4 silent on same contract.""" + from scripts.audit_frame_invariants import check_i4_slot_payload_refs + + partials_dir = tmp_path / "families" + partials_dir.mkdir() + (partials_dir / "ghost_frame.html").write_text( + "{{ slot_payload.title }}", encoding="utf-8" + ) + catalog = { + "ghost_frame": { + "template_id": "ghost_frame", + "payload": { + "title": {"source": "section.title"}, + "builder": "ghost_builder_not_in_registry", + }, + }, + } + violations = check_i4_slot_payload_refs( + catalog, partials_dir, registered_builders={"quadrant_flat_slots"} + ) + assert violations == [], ( + "Unregistered builder is already flagged by I3 — I4 must stay silent " + f"on the same contract; got: {violations}" + ) + + +def test_i4_skips_missing_partial(tmp_path): + """Missing partial (already I1 hit) → I4 silent on same contract.""" + from scripts.audit_frame_invariants import check_i4_slot_payload_refs + + partials_dir = tmp_path / "families" + partials_dir.mkdir() + # No partial file written. + catalog = { + "missing_partial_frame": { + "template_id": "missing_partial_frame", + "payload": { + "title": {"source": "section.title"}, + "builder": "quadrant_flat_slots", + "builder_options": { + "item_parser": "quadrant_item", "pad_to": 4, + }, + }, + }, + } + violations = check_i4_slot_payload_refs( + catalog, partials_dir, registered_builders={"quadrant_flat_slots"} + ) + assert violations == [] + + +def test_cli_pass_on_prod_paths(tmp_path): + """End-to-end CLI on prod paths reports PASS with I1-I4 wording.""" + result = subprocess.run( + [sys.executable, str(SCRIPT_PATH)], + cwd=str(REPO_ROOT), + capture_output=True, + text=True, + ) + assert result.returncode == 0, result.stdout + result.stderr + assert "PASS (I1-I4 clean" in result.stdout + + +def test_cli_fail_on_synthetic_i4_drift(tmp_path): + """CLI exits 1 + emits I4 violation when a non-VP contract has dead keys.""" + partials_dir = tmp_path / "families" + partials_dir.mkdir() + (partials_dir / "drift_frame.html").write_text( + "{{ slot_payload.title }}", encoding="utf-8" + ) + catalog_path = _write_yaml( + tmp_path / "frame_contracts.yaml", + { + "drift_frame": { + "template_id": "drift_frame", + "payload": { + "title": {"source": "section.title"}, + "builder": "quadrant_flat_slots", + "builder_options": { + "item_parser": "quadrant_item", "pad_to": 2, + "label_key_pattern": "category_{n}_label", + "body_key_pattern": "category_{n}_body", + }, + }, + }, + }, + ) + result = _run_cli(catalog_path, partials_dir) + assert result.returncode == 1, result.stdout + result.stderr + assert "I4 generated-key-orphan" in result.stdout + assert "category_1_label" in result.stdout diff --git a/tests/test_catalog_invariant.py b/tests/test_catalog_invariant.py index f5b8a90..7f09c0b 100644 --- a/tests/test_catalog_invariant.py +++ b/tests/test_catalog_invariant.py @@ -79,3 +79,149 @@ def test_catalog_entry_count_matches_frame_count(): f"catalog shape inconsistent: entries={entry_count} " f"templates={template_count} frames={frame_count}" ) + + +# ──────────────────────── IMP-#85 u5 regression coverage ──────────────────────── +# +# Scope (Stage 2 lock): +# - Prod catalog passes the audit CLI (run_audit) end-to-end. +# - Non-VP fixture catalogs reproduce the boot invariant (u2) + audit (u3a/u3b) +# negative paths: missing payload.builder, missing partial, undeclared +# slot_payload reference (I4 generated-key-orphan). +# - Same fixtures with `visual_pending: true` MUST be silently skipped — the +# data-driven VP scope guard from u2/u3a/u3b must not regress. +# +# Out of scope: +# - Implementing the 17 missing VP builders (별 P0 / IMP-04b backlog). +# - Visual rendering of fixture frames. +# +# Path-convention note (tests/CLAUDE.md §F-5): +# Stage 2 plan named `tests/fixtures/catalog/` but the project convention +# reserves the root `tests/fixtures/` for non-Phase-Z fixtures (creation +# requires a separate issue). Phase-Z YAML fixtures live under +# `tests/phase_z2/fixtures/`. The u5 fixtures therefore live at +# `tests/phase_z2/fixtures/catalog/`. + +import yaml + +from scripts.audit_frame_invariants import ( + DEFAULT_CATALOG_PATH, + DEFAULT_PARTIALS_DIR, + run_audit, +) +from src import phase_z2_mapper +from src.phase_z2_mapper import ( + CatalogInvariantError, + PAYLOAD_BUILDERS, + _check_catalog_builder_invariant, +) + +_IMP85_FIXTURES_DIR = Path(__file__).parent / "phase_z2" / "fixtures" / "catalog" +_MISSING_BUILDER_FIXTURE = _IMP85_FIXTURES_DIR / "missing_builder_non_vp.yaml" +_UNDECLARED_SLOT_FIXTURE = _IMP85_FIXTURES_DIR / "undeclared_slot_ref_non_vp.yaml" + + +def _load_fixture_catalog(path: Path) -> dict: + with path.open(encoding="utf-8") as f: + return yaml.safe_load(f) + + +@pytest.fixture +def _reset_catalog_cache_for_imp85(): + """Some tests below load fixture YAMLs into the boot invariant; ensure the + prod cache is untouched on entry/exit so other tests stay deterministic.""" + phase_z2_mapper._CATALOG_CACHE = None + yield + phase_z2_mapper._CATALOG_CACHE = None + + +def test_prod_catalog_audit_clean(): + """IMP-#85 u5 — prod catalog + prod partials dir pass audit (I1-I4 clean).""" + violations = run_audit(DEFAULT_CATALOG_PATH, DEFAULT_PARTIALS_DIR) + assert violations == [], ( + f"Prod catalog audit reported {len(violations)} violation(s):\n - " + + "\n - ".join(violations) + ) + + +def test_missing_builder_fixture_raises_catalog_invariant( + _reset_catalog_cache_for_imp85, +): + """Fixture: non-VP contract with unregistered builder → u2 invariant raise.""" + catalog = _load_fixture_catalog(_MISSING_BUILDER_FIXTURE) + with pytest.raises(CatalogInvariantError) as exc: + _check_catalog_builder_invariant(catalog) + msg = str(exc.value) + assert "imp85_u5_missing_builder_frame" in msg + assert "definitely_not_a_registered_builder_imp85_u5" in msg + + +def test_missing_builder_fixture_audit_reports_i3(tmp_path): + """Fixture: non-VP contract with unregistered builder → audit I3 + I1. + + The fixture frame's template_id has no partial on disk (tmp_path is empty), + so I1 fires as well. I3 is the primary assertion target; I1 surfacing is + expected and asserted to lock both audit paths together. + """ + violations = run_audit(_MISSING_BUILDER_FIXTURE, tmp_path) + joined = "\n".join(violations) + assert any( + v.startswith("I3 builder-unregistered:") + and "imp85_u5_missing_builder_frame" in v + for v in violations + ), f"expected I3 builder-unregistered violation, got:\n{joined}" + assert any( + v.startswith("I1 partial-missing:") + and "imp85_u5_missing_builder_frame" in v + for v in violations + ), f"expected I1 partial-missing violation, got:\n{joined}" + + +def test_undeclared_slot_fixture_audit_reports_i4(tmp_path): + """Fixture: non-VP contract with valid builder but orphan generated key. + + `items_with_role` + `array_root: orphan_array_root_imp85_u5` produces + `slot_payload.orphan_array_root_imp85_u5`. The temp partial below contains + `slot_payload.title` only (no bracket access), so I4 must fire on the + orphan array_root key. + """ + partials_dir = tmp_path / "families" + partials_dir.mkdir() + partial = partials_dir / "imp85_u5_undeclared_slot_frame.html" + partial.write_text( + "
{{ slot_payload.title }}
", + encoding="utf-8", + ) + + violations = run_audit(_UNDECLARED_SLOT_FIXTURE, partials_dir) + joined = "\n".join(violations) + assert any( + v.startswith("I4 generated-key-orphan:") + and "imp85_u5_undeclared_slot_frame" in v + and "orphan_array_root_imp85_u5" in v + for v in violations + ), f"expected I4 generated-key-orphan violation, got:\n{joined}" + + +def test_fixtures_with_visual_pending_true_are_skipped( + tmp_path, _reset_catalog_cache_for_imp85, +): + """VP scope guard — flipping `visual_pending: true` on fixture frames must + silence both the boot invariant (u2) and the audit CLI (I1-I4).""" + missing = _load_fixture_catalog(_MISSING_BUILDER_FIXTURE) + undeclared = _load_fixture_catalog(_UNDECLARED_SLOT_FIXTURE) + for entry in (*missing.values(), *undeclared.values()): + entry["visual_pending"] = True + + _check_catalog_builder_invariant(missing) + _check_catalog_builder_invariant(undeclared) + + vp_yaml = tmp_path / "vp_only.yaml" + vp_yaml.write_text(yaml.safe_dump({**missing, **undeclared}), encoding="utf-8") + partials_dir = tmp_path / "families" + partials_dir.mkdir() + violations = run_audit(vp_yaml, partials_dir) + assert violations == [], ( + f"VP frames must be silently skipped, got:\n - " + + "\n - ".join(violations) + ) diff --git a/tests/test_lookup_v4_candidates_vp_filter.py b/tests/test_lookup_v4_candidates_vp_filter.py new file mode 100644 index 0000000..361fd67 --- /dev/null +++ b/tests/test_lookup_v4_candidates_vp_filter.py @@ -0,0 +1,259 @@ +"""IMP-#85 u4 — lookup_v4_candidates visual_pending filter regression tests. + +Scope (Stage 2 lock): + - ``visual_pending: true`` frames are excluded from the live candidate set + returned by ``lookup_v4_candidates`` (mdx04 hard-crash path closure). + - Filter is data-driven from catalog ``visual_pending`` field (no hard-coded + frame allow-list, per Stage 2 guardrail + ``feedback_no_hardcoding``). + - ``lookup_v4_all_judgments`` raw telemetry MUST remain untouched — full 32 + judgments (reject + VP inclusive) preserved for frontend Step 7-A axis. + - Existing ``label == "reject"`` filter and ``max_n`` cap behavior unchanged. + +Out of scope (other IMP-#85 units / future axes): + - Implementing the 17 missing VP builders (별 P0 backlog, IMP-04b / #42). + - VP semantics redefinition / VP frame removal from V4 evidence. + - Adapter pipeline redesign. + +Synthetic naming convention (per ``test_phase_z2_v4_fallback.py`` E1 lock): + ``MOCK_`` prefix mandatory. ``_a`` / ``_b`` suffixes = enumeration, not + ordering / priority. Rank expressed by ``v4_full_rank``, never by suffix. +""" +from __future__ import annotations + +import pytest + +from src import phase_z2_pipeline +from src.phase_z2_pipeline import ( + _is_visual_pending, + lookup_v4_all_judgments, + lookup_v4_candidates, +) + + +# ─── Synthetic catalog stub ────────────────────────────────────── +# Maps template_id → contract dict (None means catalog-unregistered). + +_MOCK_CATALOG: dict[str, object] = { + "MOCK_template_live_a": {"visual_pending": False}, + "MOCK_template_live_b": {"visual_pending": False}, + "MOCK_template_live_no_vp": {}, # no visual_pending key at all → treated as live + "MOCK_template_vp_a": {"visual_pending": True}, + "MOCK_template_vp_b": {"visual_pending": True}, + # MOCK_template_missing_contract intentionally absent (get_contract → None) +} + + +def _mock_get_contract(template_id: str): + return _MOCK_CATALOG.get(template_id) + + +@pytest.fixture +def patch_catalog(monkeypatch): + """Monkeypatch module-level ``get_contract`` so ``_is_visual_pending`` + reads from ``_MOCK_CATALOG`` without touching prod ``frame_contracts.yaml``. + """ + monkeypatch.setattr( + "src.phase_z2_pipeline.get_contract", _mock_get_contract + ) + + +def _make_v4(judgments: list[dict], section_id: str = "S1") -> dict: + return {"mdx_sections": {section_id: {"judgments_full32": judgments}}} + + +def _j(rank: int, template_id: str, frame_id: str, label: str = "use_as_is", + confidence: float = 0.9) -> dict: + return { + "frame_id": frame_id, + "frame_number": rank, + "template_id": template_id, + "confidence": confidence, + "label": label, + "v4_full_rank": rank, + } + + +# ─── _is_visual_pending helper ────────────────────────────────── + + +def test_is_visual_pending_true_for_vp_contract(patch_catalog): + """VP-flagged contract → True.""" + assert _is_visual_pending("MOCK_template_vp_a") is True + + +def test_is_visual_pending_false_for_live_contract(patch_catalog): + """Live (explicit visual_pending=False) contract → False.""" + assert _is_visual_pending("MOCK_template_live_a") is False + + +def test_is_visual_pending_false_when_key_absent(patch_catalog): + """Contract without ``visual_pending`` field → False (default = live).""" + assert _is_visual_pending("MOCK_template_live_no_vp") is False + + +def test_is_visual_pending_false_for_unregistered_contract(patch_catalog): + """``get_contract`` → None → False (no spurious gating on unknown ids). + + Catalog drift (unregistered template_id) is caught by catalog invariant + (u2 boot + u3 audit), not by this runtime helper. + """ + assert _is_visual_pending("MOCK_template_missing_contract") is False + + +# ─── lookup_v4_candidates VP filter ───────────────────────────── + + +def test_vp_rank_1_excluded_live_rank_2_promoted(patch_catalog): + """mdx04 crash-path shape — rank-1 VP frame is skipped, live rank-2 wins. + + Mirrors the production 04-2.x case where ``sw_dependency_four_problems`` + (VP, builder = ``cards_4_grid`` absent from registry) appeared at high + rank and crashed the mapper. With the u4 filter, the VP candidate is + skipped and a live candidate is returned instead. + """ + v4 = _make_v4([ + _j(1, "MOCK_template_vp_a", "MOCK_frame_001", "restructure"), + _j(2, "MOCK_template_live_a", "MOCK_frame_002", "use_as_is"), + ]) + + candidates = lookup_v4_candidates(v4, "S1", max_n=6) + + assert [c.template_id for c in candidates] == ["MOCK_template_live_a"] + + +def test_all_vp_yields_empty_candidates(patch_catalog): + """All candidates VP → empty list (Step 9 fallback signal). + + 0-length output remains the documented ``no_non_reject_v4_candidate`` + signal for the Step 9 fallback path; VP exclusion preserves this contract. + """ + v4 = _make_v4([ + _j(1, "MOCK_template_vp_a", "MOCK_frame_001", "use_as_is"), + _j(2, "MOCK_template_vp_b", "MOCK_frame_002", "light_edit"), + ]) + + candidates = lookup_v4_candidates(v4, "S1", max_n=6) + + assert candidates == [] + + +def test_vp_and_reject_both_filtered(patch_catalog): + """VP and reject co-occur — both filtered; only live non-reject survive.""" + v4 = _make_v4([ + _j(1, "MOCK_template_vp_a", "MOCK_frame_001", "use_as_is"), + _j(2, "MOCK_template_live_a", "MOCK_frame_002", "reject"), + _j(3, "MOCK_template_live_b", "MOCK_frame_003", "use_as_is"), + ]) + + candidates = lookup_v4_candidates(v4, "S1", max_n=6) + + assert [c.template_id for c in candidates] == ["MOCK_template_live_b"] + + +def test_unregistered_contract_not_filtered_by_vp(patch_catalog): + """Unregistered template_id (get_contract → None) is NOT VP-filtered. + + VP gating only applies when catalog declares ``visual_pending: true``. + Catalog drift (template_id absent from catalog entirely) is a separate + failure mode covered by catalog invariant (u2) and audit (u3a) — runtime + VP filter stays silent on that axis. + """ + v4 = _make_v4([ + _j(1, "MOCK_template_missing_contract", "MOCK_frame_001", "use_as_is"), + ]) + + candidates = lookup_v4_candidates(v4, "S1", max_n=6) + + assert [c.template_id for c in candidates] == ["MOCK_template_missing_contract"] + + +def test_max_n_applies_after_vp_filter(patch_catalog): + """``max_n`` caps the live-eligible list after VP and reject filtering. + + Three live candidates + ``max_n=2`` → first two live frames are returned. + """ + v4 = _make_v4([ + _j(1, "MOCK_template_vp_a", "MOCK_frame_001", "use_as_is"), + _j(2, "MOCK_template_live_a", "MOCK_frame_002", "use_as_is"), + _j(3, "MOCK_template_live_b", "MOCK_frame_003", "use_as_is"), + _j(4, "MOCK_template_live_no_vp", "MOCK_frame_004", "use_as_is"), + ]) + + candidates = lookup_v4_candidates(v4, "S1", max_n=2) + + assert [c.template_id for c in candidates] == [ + "MOCK_template_live_a", + "MOCK_template_live_b", + ] + + +def test_only_live_candidates_pass_unchanged(patch_catalog): + """No VP / no reject → behavior identical to pre-u4 (regression guard).""" + v4 = _make_v4([ + _j(1, "MOCK_template_live_a", "MOCK_frame_001", "use_as_is"), + _j(2, "MOCK_template_live_b", "MOCK_frame_002", "light_edit"), + ]) + + candidates = lookup_v4_candidates(v4, "S1", max_n=6) + + assert [c.template_id for c in candidates] == [ + "MOCK_template_live_a", + "MOCK_template_live_b", + ] + + +# ─── lookup_v4_all_judgments untouched (Step 7-A axis preservation) ─── + + +def test_all_judgments_includes_vp_frames(patch_catalog): + """Raw 32-judgment telemetry MUST include VP frames (not gated). + + Stage 2 explicit guardrail — frontend Step 7-A axis needs full 32-frame + PNG evidence including VP scaffolding. The u4 filter applies ONLY to the + live candidate path, not the raw judgments path. + """ + v4 = _make_v4([ + _j(1, "MOCK_template_vp_a", "MOCK_frame_001", "restructure"), + _j(2, "MOCK_template_live_a", "MOCK_frame_002", "use_as_is"), + _j(3, "MOCK_template_vp_b", "MOCK_frame_003", "light_edit"), + ]) + + all_judgments = lookup_v4_all_judgments(v4, "S1") + + assert [j.template_id for j in all_judgments] == [ + "MOCK_template_vp_a", + "MOCK_template_live_a", + "MOCK_template_vp_b", + ] + + +def test_all_judgments_includes_reject_and_vp(patch_catalog): + """Raw judgments preserves BOTH reject AND VP — confirms u4 narrowed scope.""" + v4 = _make_v4([ + _j(1, "MOCK_template_vp_a", "MOCK_frame_001", "restructure"), + _j(2, "MOCK_template_live_a", "MOCK_frame_002", "reject"), + _j(3, "MOCK_template_live_b", "MOCK_frame_003", "use_as_is"), + ]) + + all_judgments = lookup_v4_all_judgments(v4, "S1") + candidates = lookup_v4_candidates(v4, "S1", max_n=6) + + # raw telemetry: 3 (all preserved) + assert len(all_judgments) == 3 + # live candidates: 1 (vp + reject filtered) + assert [c.template_id for c in candidates] == ["MOCK_template_live_b"] + + +# ─── Empty section / missing v4 ───────────────────────────────── + + +def test_empty_judgments_returns_empty(patch_catalog): + """No judgments → empty list (unchanged from pre-u4).""" + v4 = _make_v4([]) + assert lookup_v4_candidates(v4, "S1", max_n=6) == [] + + +def test_unknown_section_returns_empty(patch_catalog): + """Section_id not in V4 → empty list (unchanged from pre-u4).""" + v4 = _make_v4([_j(1, "MOCK_template_live_a", "MOCK_frame_001")]) + assert lookup_v4_candidates(v4, "SECTION_NOT_PRESENT", max_n=6) == [] diff --git a/tests/test_mdx04_vp_routing.py b/tests/test_mdx04_vp_routing.py new file mode 100644 index 0000000..e9c5cfc --- /dev/null +++ b/tests/test_mdx04_vp_routing.py @@ -0,0 +1,129 @@ +"""IMP-#85 u6 — mdx04 VP routing regression against the real V4 evidence. + +Scope (Stage 2 lock): + - Use the production ``tests/matching/v4_full32_result.yaml`` + the production + ``templates/phase_z2/catalog/frame_contracts.yaml`` (no fixtures, no mocks). + - Prove that ``sw_dependency_four_problems`` (VP rank-1 on ``04-2.1``, VP + rank-2 on ``04-2.2``) is excluded from ``lookup_v4_candidates`` after u4, + while ``lookup_v4_all_judgments`` retains it as Step 7-A raw telemetry. + - Guard mdx03 dynamically — the actual rank-1 winners on ``03-1`` / ``03-2`` + must be non-VP per catalog AND must survive into live candidates. + - VP gating is asserted data-driven (catalog ``visual_pending: true`` flag), + never hard-coded — matches Stage 1/2 ``feedback_no_hardcoding`` guardrail. + +Out of scope: + - Implementing the 17 missing VP builders (별 P0 backlog, IMP-04b / #42). + - VP semantics redefinition or VP frame removal from V4 evidence. + - Adapter pipeline redesign. +""" +from __future__ import annotations + +from src.phase_z2_mapper import get_contract +from src.phase_z2_pipeline import ( + load_v4_result, + lookup_v4_all_judgments, + lookup_v4_candidates, +) + +CRASH_TEMPLATE_ID = "sw_dependency_four_problems" + + +def _rank1_template_id(v4: dict, section_id: str) -> str: + judgments = v4["mdx_sections"][section_id]["judgments_full32"] + return judgments[0]["template_id"] + + +# ─── Dynamic catalog proof — VP flag is data-driven ───────────── + + +def test_crash_template_is_visual_pending_in_catalog(): + """Catalog declares ``sw_dependency_four_problems.visual_pending: true``. + + Locks the data-driven contract — the entire u4 / u6 chain rests on this + YAML flag, not a hard-coded frame allow-list. If the catalog ever drops + the flag without registering the ``cards_4_grid`` builder, this assertion + surfaces the regression before mdx04 crashes the mapper. + """ + contract = get_contract(CRASH_TEMPLATE_ID) + assert isinstance(contract, dict), CRASH_TEMPLATE_ID + assert contract.get("visual_pending") is True + + +# ─── mdx04-2.1 — VP frame at rank 1 ───────────────────────────── + + +def test_mdx04_2_1_excludes_vp_rank_1_from_live_candidates(): + """``04-2.1`` rank-1 is the VP crash frame — must NOT appear in live set. + + Every surviving live candidate (if any) must itself be non-VP per catalog; + the section may legitimately produce an empty list (all remaining entries + are reject), which is the documented ``no_non_reject_v4_candidate`` signal + routed to the Step 9 fallback path. + """ + v4 = load_v4_result() + assert _rank1_template_id(v4, "04-2.1") == CRASH_TEMPLATE_ID + + candidates = lookup_v4_candidates(v4, "04-2.1", max_n=6) + tids = [c.template_id for c in candidates] + + assert CRASH_TEMPLATE_ID not in tids + for tid in tids: + contract = get_contract(tid) or {} + assert contract.get("visual_pending") is not True, ( + f"04-2.1: surviving live candidate {tid} is VP" + ) + + +def test_mdx04_2_1_retains_vp_frame_in_raw_judgments(): + """Step 7-A axis preservation — raw 32-entry telemetry still carries VP.""" + v4 = load_v4_result() + all_tids = [j.template_id for j in lookup_v4_all_judgments(v4, "04-2.1")] + assert CRASH_TEMPLATE_ID in all_tids + + +# ─── mdx04-2.2 — VP frame at rank 2 ───────────────────────────── + + +def test_mdx04_2_2_excludes_vp_rank_2_from_live_candidates(): + """``04-2.2`` rank-2 is the VP crash frame — rank-1 live frame must win.""" + v4 = load_v4_result() + rank_1 = _rank1_template_id(v4, "04-2.2") + rank_1_contract = get_contract(rank_1) or {} + # Pre-condition for this regression: rank-1 on 04-2.2 is non-VP. + assert rank_1_contract.get("visual_pending") is not True + + candidates = lookup_v4_candidates(v4, "04-2.2", max_n=6) + tids = [c.template_id for c in candidates] + + assert CRASH_TEMPLATE_ID not in tids + assert tids[0] == rank_1 + + +def test_mdx04_2_2_retains_vp_frame_in_raw_judgments(): + """Raw judgments path preserves VP frame regardless of its rank.""" + v4 = load_v4_result() + all_tids = [j.template_id for j in lookup_v4_all_judgments(v4, "04-2.2")] + assert CRASH_TEMPLATE_ID in all_tids + + +# ─── mdx03 dynamic guard — non-VP rank-1 survives ─────────────── + + +def test_mdx03_rank_1_non_vp_survives_live_candidates(): + """Non-VP rank-1 winners on mdx03 sections must still win after u4. + + Dynamic check — pulls rank-1 from the V4 yaml + catalog VP flag at runtime. + No hard-coded template_id list; only the regression contract is asserted. + """ + v4 = load_v4_result() + for section_id in ("03-1", "03-2"): + rank_1 = _rank1_template_id(v4, section_id) + contract = get_contract(rank_1) or {} + assert contract.get("visual_pending") is not True, ( + f"{section_id} rank-1 ({rank_1}) unexpectedly VP — guard precondition broken" + ) + candidates = lookup_v4_candidates(v4, section_id, max_n=6) + tids = [c.template_id for c in candidates] + assert tids and tids[0] == rank_1, ( + f"{section_id}: expected rank-1 ({rank_1}) live, got {tids}" + ) diff --git a/tests/test_phase_z2_load_frame_contracts_vp.py b/tests/test_phase_z2_load_frame_contracts_vp.py new file mode 100644 index 0000000..5603858 --- /dev/null +++ b/tests/test_phase_z2_load_frame_contracts_vp.py @@ -0,0 +1,185 @@ +"""IMP-#85 u2 — load_frame_contracts catalog builder invariant. + +Scope (Stage 2 lock): + - Prod `frame_contracts.yaml` (32 frames) passes invariant on load. + - `visual_pending: true` contracts are skipped — backlog 별 axis (IMP-04b / #42). + - Non-VP contracts with missing or unknown `payload.builder` raise + `CatalogInvariantError` (boot-time fail-fast). + - Failed invariant must NOT populate `_CATALOG_CACHE` (retry-able). + +Out of scope: + - Implementing the 17 missing VP builders (별 P0 / IMP-04b backlog). + - Audit CLI invariants I1–I4 (u3a / u3b). + - Lookup-side VP filter (u4). + - Catalog regression fixtures via tests/fixtures/catalog/ (u5). +""" +from __future__ import annotations + +import pytest + +from src import phase_z2_mapper +from src.phase_z2_mapper import ( + CatalogInvariantError, + PAYLOAD_BUILDERS, + _check_catalog_builder_invariant, + load_frame_contracts, +) + + +@pytest.fixture(autouse=True) +def _reset_catalog_cache(): + phase_z2_mapper._CATALOG_CACHE = None + yield + phase_z2_mapper._CATALOG_CACHE = None + + +def test_prod_catalog_passes_invariant(): + """Prod frame_contracts.yaml load 시 invariant violation 없음 (32 frames).""" + catalog = load_frame_contracts() + assert isinstance(catalog, dict) + assert len(catalog) >= 30 + + +def test_invariant_skips_visual_pending_contract_with_unknown_builder(): + """visual_pending: true 인 contract 는 builder 가 unknown 이어도 skip.""" + catalog = { + "vp_frame": { + "template_id": "vp_frame", + "visual_pending": True, + "payload": {"builder": "definitely_not_a_registered_builder"}, + }, + } + _check_catalog_builder_invariant(catalog) + + +def test_invariant_skips_vp_contract_missing_builder_field(): + """visual_pending: true contract 의 payload 가 builder field 자체를 안 가져도 skip.""" + catalog = { + "vp_frame": { + "template_id": "vp_frame", + "visual_pending": True, + "payload": {}, + }, + } + _check_catalog_builder_invariant(catalog) + + +def test_invariant_raises_on_non_vp_missing_builder_field(): + """visual_pending 이 없거나 false 인 contract 의 payload.builder 누락 → raise.""" + catalog = { + "live_frame": { + "template_id": "live_frame", + "payload": {}, + }, + } + with pytest.raises(CatalogInvariantError) as exc: + _check_catalog_builder_invariant(catalog) + msg = str(exc.value) + assert "live_frame" in msg + assert "missing payload.builder" in msg + + +def test_invariant_raises_on_non_vp_unknown_builder(): + """non-VP contract 의 payload.builder 가 PAYLOAD_BUILDERS 에 없으면 raise.""" + catalog = { + "live_frame": { + "template_id": "live_frame", + "payload": {"builder": "definitely_not_a_registered_builder"}, + }, + } + with pytest.raises(CatalogInvariantError) as exc: + _check_catalog_builder_invariant(catalog) + msg = str(exc.value) + assert "live_frame" in msg + assert "definitely_not_a_registered_builder" in msg + + +def test_invariant_passes_on_non_vp_registered_builder(): + """non-VP contract 가 registered builder 를 가리키면 통과.""" + sample_builder = next(iter(PAYLOAD_BUILDERS.keys())) + catalog = { + "live_frame": { + "template_id": "live_frame", + "payload": {"builder": sample_builder}, + }, + } + _check_catalog_builder_invariant(catalog) + + +def test_invariant_aggregates_multiple_violations_excluding_vp(): + """여러 non-VP 위반이 있으면 모두 message 에 포함. VP frame 은 제외.""" + catalog = { + "frame_a": { + "template_id": "frame_a", + "payload": {"builder": "missing_x"}, + }, + "frame_b": { + "template_id": "frame_b", + "payload": {}, + }, + "vp_frame": { + "template_id": "vp_frame", + "visual_pending": True, + "payload": {"builder": "missing_y"}, + }, + } + with pytest.raises(CatalogInvariantError) as exc: + _check_catalog_builder_invariant(catalog) + msg = str(exc.value) + assert "frame_a" in msg + assert "frame_b" in msg + assert "vp_frame" not in msg + assert "missing_x" in msg + assert "missing_y" not in msg + + +def test_invariant_treats_visual_pending_false_as_live(): + """visual_pending: false (explicit) 는 live 와 동일하게 검증.""" + catalog = { + "live_frame": { + "template_id": "live_frame", + "visual_pending": False, + "payload": {"builder": "missing_x"}, + }, + } + with pytest.raises(CatalogInvariantError): + _check_catalog_builder_invariant(catalog) + + +def test_load_frame_contracts_failure_does_not_populate_cache( + monkeypatch, tmp_path +): + """invariant 실패 시 _CATALOG_CACHE 가 populate 되지 않음 (retry 가능).""" + bad_yaml = tmp_path / "bad.yaml" + bad_yaml.write_text( + "live_frame:\n" + " template_id: live_frame\n" + " payload:\n" + " builder: nonexistent_builder_xyz\n", + encoding="utf-8", + ) + monkeypatch.setattr(phase_z2_mapper, "CATALOG_PATH", bad_yaml) + monkeypatch.setattr(phase_z2_mapper, "_CATALOG_CACHE", None) + + with pytest.raises(CatalogInvariantError): + load_frame_contracts() + assert phase_z2_mapper._CATALOG_CACHE is None + + +def test_load_frame_contracts_success_populates_cache(monkeypatch, tmp_path): + """invariant 통과 시 _CATALOG_CACHE 가 populate 되어 두 번째 호출이 동일 dict.""" + sample_builder = next(iter(PAYLOAD_BUILDERS.keys())) + good_yaml = tmp_path / "good.yaml" + good_yaml.write_text( + "live_frame:\n" + " template_id: live_frame\n" + f" payload:\n builder: {sample_builder}\n", + encoding="utf-8", + ) + monkeypatch.setattr(phase_z2_mapper, "CATALOG_PATH", good_yaml) + monkeypatch.setattr(phase_z2_mapper, "_CATALOG_CACHE", None) + + first = load_frame_contracts() + second = load_frame_contracts() + assert first is second + assert "live_frame" in first diff --git a/tests/test_phase_z2_mapper_builder_missing.py b/tests/test_phase_z2_mapper_builder_missing.py new file mode 100644 index 0000000..4cd108e --- /dev/null +++ b/tests/test_phase_z2_mapper_builder_missing.py @@ -0,0 +1,85 @@ +"""IMP-#85 u1 — mapper missing-builder dispatch raises BuilderMissingError. + +Scope (Stage 2 lock): + - `BuilderMissingError` exists and is a subclass of `FitError`. + - `map_with_contract` raises `BuilderMissingError` when + `contract.payload.builder` references an unknown registry entry, OR + when `payload.builder` is empty/missing. + - Because it subclasses `FitError`, the existing pipeline + `except FitError` route in `src/phase_z2_pipeline.py` continues to + catch the failure and emit an `adapter_needed` record instead of a + hard crash (mdx04 `sw_dependency_four_problems` / `cards_4_grid` + regression evidence). +""" +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + +from src.phase_z2_mapper import ( + BuilderMissingError, + FitError, + PAYLOAD_BUILDERS, + map_with_contract, +) + + +def _make_section(raw_content: str = "- a\n- b\n- c"): + return SimpleNamespace( + section_id="test-sec", + raw_content=raw_content, + title="t", + order=1, + ) + + +def test_builder_missing_error_is_fit_error_subclass(): + assert issubclass(BuilderMissingError, FitError) + + +def test_unknown_builder_raises_builder_missing_error(): + unknown = "definitely_not_a_registered_builder" + assert unknown not in PAYLOAD_BUILDERS + contract = { + "template_id": "fake_contract_unknown_builder", + "source_shape": "top_bullets", + "cardinality": {}, + "payload": {"builder": unknown}, + } + with pytest.raises(BuilderMissingError) as exc: + map_with_contract(_make_section(), contract) + assert unknown in str(exc.value) + assert "fake_contract_unknown_builder" in str(exc.value) + + +def test_missing_builder_field_raises_builder_missing_error(): + contract = { + "template_id": "fake_contract_missing_builder_field", + "source_shape": "top_bullets", + "cardinality": {}, + "payload": {}, + } + with pytest.raises(BuilderMissingError) as exc: + map_with_contract(_make_section(), contract) + assert "missing payload.builder" in str(exc.value) + + +def test_builder_missing_error_caught_by_fit_error_handler(): + """Pipeline 의 `except FitError` 경로가 그대로 잡아주는지 검증. + + 실제 pipeline import 없이 동일 패턴을 재현하여 subclass 의 의도된 + routing 효과(adapter_needed) 가 깨지지 않는지 확인. + """ + contract = { + "template_id": "fake_contract_routing_check", + "source_shape": "top_bullets", + "cardinality": {}, + "payload": {"builder": "no_such_builder"}, + } + caught = False + try: + map_with_contract(_make_section(), contract) + except FitError: + caught = True + assert caught, "BuilderMissingError must propagate through `except FitError`" diff --git a/tests/test_pipeline_smoke_imp85.py b/tests/test_pipeline_smoke_imp85.py new file mode 100644 index 0000000..2ec68bf --- /dev/null +++ b/tests/test_pipeline_smoke_imp85.py @@ -0,0 +1,127 @@ +"""IMP-#85 u7 — subprocess smoke for mdx03 / mdx04 / mdx05 pipeline runs. + +These smokes exercise the IMP-#85 catalog ↔ contract ↔ builder +invariant + runtime VP gate end-to-end against real MDX inputs: + + * mdx03 — non-VP rank-1 path stays clean (exit 0). + * mdx04 — the original IMP-#85 hard-crash signature + (``BuilderMissingError ... PAYLOAD_BUILDERS has no such entry``) + is GONE. u1 converted the uncaught ``ValueError`` into a + ``BuilderMissingError(FitError)`` subclass; the pipeline's + existing ``except FitError`` at ``src/phase_z2_pipeline.py:4436`` + catches it and the zone is routed to + ``adapter_needed (skip render)``. Anything that crashes + *downstream* of that routing (e.g. layout_css zone aggregation + when all live zones are adapter_needed) is a separate axis and + out of scope for this issue (see follow_up_issue_candidates). + * mdx05 — non-VP rank-1 path stays clean (exit 0). + +Each subprocess gets a unique run_id so the runs do not collide on +disk when pytest is invoked concurrently or with -x retry. +""" +from __future__ import annotations + +import subprocess +import sys +import uuid +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[1] +SAMPLES_DIR = REPO_ROOT / "samples" / "mdx_batch" + +# Original IMP-#85 crash signature (issue body verbatim). u1 converted +# the uncaught ``ValueError`` raised from the mapper's missing-builder +# branch into a ``BuilderMissingError(FitError)`` subclass that the +# pipeline catches. The string below was the marker of the uncaught +# propagation; it must no longer appear in stdout/stderr of a mdx04 +# subprocess run. +IMP85_OLD_CRASH_MARKER = "PAYLOAD_BUILDERS has no such entry" + + +def _run_pipeline(mdx_name: str, run_id: str, timeout: int = 240) -> subprocess.CompletedProcess: + """Spawn ``python -m src.phase_z2_pipeline `` and capture I/O.""" + return subprocess.run( + [ + sys.executable, + "-m", + "src.phase_z2_pipeline", + str(SAMPLES_DIR / mdx_name), + run_id, + ], + capture_output=True, + text=True, + timeout=timeout, + cwd=str(REPO_ROOT), + ) + + +def _unique_run_id(prefix: str) -> str: + return f"{prefix}_imp85_smoke_{uuid.uuid4().hex[:8]}" + + +@pytest.mark.parametrize( + "mdx_name,prefix", + [ + ("03.mdx", "mdx03"), + ("05.mdx", "mdx05"), + ], +) +def test_non_vp_smoke_runs_clean(mdx_name: str, prefix: str) -> None: + """mdx03 / mdx05 hit non-VP rank-1 frames; the pipeline runs to exit 0. + + Non-VP rank-1 selection is the normal Phase Z path and the + primary regression guard that u1-u6 do not perturb mapper / + pipeline behaviour for non-VP routes. + """ + cp = _run_pipeline(mdx_name, _unique_run_id(prefix)) + assert cp.returncode == 0, ( + f"{mdx_name} pipeline returncode={cp.returncode}\n" + f"--- stderr tail ---\n{cp.stderr[-1500:]}\n" + f"--- stdout tail ---\n{cp.stdout[-1500:]}" + ) + + +def test_mdx04_no_longer_emits_imp85_crash_signature() -> None: + """mdx04 must no longer surface the IMP-#85 uncaught crash marker. + + Before u1: missing-builder ``ValueError`` + (``'PAYLOAD_BUILDERS has no such entry'``) propagated uncaught and + killed the pipeline at the mapper call site + (``src/phase_z2_pipeline.py:4411-4413``, ``except FitError`` + only). After u1: the mapper raises + ``BuilderMissingError(FitError)``, the pipeline catches it at the + same ``except FitError`` block, and the zone is recorded under + ``adapter_needed (skip render)``. + + This smoke asserts only that the original IMP-#85 marker is gone + from both stdout and stderr — downstream crashes (e.g. + ``build_layout_css`` zone aggregation when all live zones are + adapter_needed) belong to a separate axis and are tracked as a + follow-up issue candidate. + """ + cp = _run_pipeline("04.mdx", _unique_run_id("mdx04")) + combined = cp.stdout + cp.stderr + assert IMP85_OLD_CRASH_MARKER not in combined, ( + "IMP-#85 original crash signature still present in pipeline output:\n" + f"--- stderr tail ---\n{cp.stderr[-1500:]}\n" + f"--- stdout tail ---\n{cp.stdout[-1500:]}" + ) + + +def test_conftest_env_isolation_active_for_ai_fallback_defaults() -> None: + """Direct assertion that ``tests/conftest.py`` isolated the AI + fallback env vars BEFORE ``src.config`` was first imported. + + With ``AI_FALLBACK_ENABLED=true`` in the live ``.env``, the + Settings default-OFF contract would otherwise be violated whenever + a developer runs ``pytest -q tests`` against a checkout that has a + live operator ``.env``. This test pins the contract to the source + of truth (``src/config.py`` defaults). + """ + from src.config import Settings + + s = Settings() + assert s.ai_fallback_enabled is False + assert s.ai_fallback_auto_cache is False