diff --git a/Front/client/src/services/designAgentApi.ts b/Front/client/src/services/designAgentApi.ts index 104b2c8..78b46d8 100644 --- a/Front/client/src/services/designAgentApi.ts +++ b/Front/client/src/services/designAgentApi.ts @@ -345,13 +345,25 @@ export interface PipelineOverrides { export async function runPipeline( file: File, - overrides?: PipelineOverrides + overrides?: PipelineOverrides, + // IMP-43 (#72) u6 — optional prev RUN_ID for incremental rerun. When set, + // the vite plugin forwards `--reuse-from ` to the backend + // and the pipeline resumes at Step 7 (Step 0/1/2/5/6 artifacts copied + // from the prior run). When omitted / empty, the POST body is + // byte-identical to pre-u6 (no reuseFromRunId key → no flag forwarded). + reuseFromRunId?: string, ): Promise { const content = await file.text(); + const body: Record = { + filename: file.name, + content, + overrides, + }; + if (reuseFromRunId) body.reuseFromRunId = reuseFromRunId; const res = await fetch("/api/run", { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ filename: file.name, content, overrides }), + body: JSON.stringify(body), }); const data = (await res.json()) as RunPipelineResult; if (!res.ok && !data.run_id) { diff --git a/Front/client/tests/run_pipeline_reuse_from.test.ts b/Front/client/tests/run_pipeline_reuse_from.test.ts new file mode 100644 index 0000000..f2b464b --- /dev/null +++ b/Front/client/tests/run_pipeline_reuse_from.test.ts @@ -0,0 +1,250 @@ +// IMP-43 (#72) u6 — /api/run reuseFromRunId forwarding coverage. +// +// Stage 2 unit scope: +// 1) Front/client/src/services/designAgentApi.ts `runPipeline`: +// • accepts an optional 3rd arg `reuseFromRunId: string`. +// • includes `reuseFromRunId` in the POST body when truthy. +// • OMITS `reuseFromRunId` from the body when absent / empty / undefined +// → byte-identical to the pre-u6 POST contract (absent flag = full +// pipeline; backend u1 guard never sees an empty PREV_RUN_ID). +// • leaves `filename`, `content`, and `overrides` untouched alongside +// the new field (no payload-shape regression). +// 2) Front/vite.config.ts `/api/run` handler: +// • declares `reuseFromRunId?: string` in the payload type so a typed +// client cannot send a payload the server silently drops. +// • destructures `reuseFromRunId` from `payload` (sibling of +// `overrides`, NOT nested under it — the backend u1 post-merge +// guard treats reuse as a pipeline mode, not an override). +// • forwards `--reuse-from ` to spawn cliArgs guarded by +// a truthy check (empty string / undefined ⇒ no flag, per Stage 2 +// contract: invalid CLI args must never reach argparse). +// • places the forward block AFTER the `--override-section-assignment` +// loop so the spawn argv preserves backend argparse's no-positional- +// before-flag expectation and so `--override-frame` (still allowed +// by the u1 guard) is positioned ahead of `--reuse-from`. +// +// runPipeline is exercised with a duck-typed `File` plus a `vi.stubGlobal` +// fetch mock — mirrors the user_overrides_service.test.ts pattern. The +// vite handler is source-sliced (mirrors handle_generate_diag.test.ts) +// because the handler spawns python and a real /api/run round-trip is +// out of unit-test scope. + +import { afterEach, beforeEach, describe, expect, it, vi, type Mock } from "vitest"; +import { readFileSync } from "node:fs"; +import { resolve } from "node:path"; +import { runPipeline } from "../src/services/designAgentApi"; + +// --------------------------------------------------------------------------- +// vite.config.ts source — read once for the handler source-slice assertions. +// Path: Front/client/tests/ → Front/vite.config.ts (two levels up). +// --------------------------------------------------------------------------- +const VITE_CONFIG_PATH = resolve(__dirname, "..", "..", "vite.config.ts"); +const VITE_CONFIG_SOURCE = readFileSync(VITE_CONFIG_PATH, "utf-8"); + +// --------------------------------------------------------------------------- +// fetch mock — minimal Response stub mirroring runPipeline's `.ok` + `.json()` +// + `.status` surface. Same shape as the user_overrides_service.test.ts +// helper so the two test files stay drift-free. +// --------------------------------------------------------------------------- +type MockResponse = { + ok: boolean; + status: number; + json: () => Promise; +}; + +function mockResponse(body: unknown, ok = true, status = 200): MockResponse { + return { ok, status, json: async () => body }; +} + +const SUCCESS_BODY = { + success: true, + run_id: "test_run_id_20260524", + exit_code: 0, + final_html_exists: true, + preview_exists: true, + stdout: "", + stderr: "", +}; + +// Duck-typed File — runPipeline reads only `.name` and `.text()`. Avoids a +// hard dependency on the global File constructor (varies across node / +// jsdom / happy-dom test environments). +function makeFakeFile(name: string, content: string): File { + return { + name, + text: async () => content, + } as unknown as File; +} + +let fetchMock: Mock; + +beforeEach(() => { + fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); +}); + +afterEach(() => { + vi.unstubAllGlobals(); +}); + +function lastPostBody(): Record { + const lastCall = fetchMock.mock.calls.at(-1); + if (!lastCall) throw new Error("fetch was not called"); + const init = lastCall[1] as RequestInit | undefined; + if (!init?.body) throw new Error("fetch was called without a body"); + return JSON.parse(String(init.body)); +} + +// ============================================================================ +// runPipeline (designAgentApi.ts) — forwarding/omission coverage +// ============================================================================ + +describe("runPipeline reuseFromRunId forwarding (IMP-43 #72 u6)", () => { + it("posts to /api/run via POST with JSON content-type", async () => { + fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY)); + await runPipeline(makeFakeFile("03.mdx", "# title")); + expect(fetchMock).toHaveBeenCalledTimes(1); + const [url, init] = fetchMock.mock.calls[0]; + expect(url).toBe("/api/run"); + expect((init as RequestInit).method).toBe("POST"); + expect((init as RequestInit).headers).toMatchObject({ + "Content-Type": "application/json", + }); + }); + + it("includes reuseFromRunId in the POST body when provided", async () => { + fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY)); + await runPipeline( + makeFakeFile("03.mdx", "# title"), + undefined, + "mdx03_20260524080000", + ); + const body = lastPostBody(); + expect(body.reuseFromRunId).toBe("mdx03_20260524080000"); + expect(body.filename).toBe("03.mdx"); + expect(body.content).toBe("# title"); + }); + + it("omits reuseFromRunId when 3rd arg is undefined (pre-u6 byte-identical)", async () => { + fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY)); + await runPipeline(makeFakeFile("03.mdx", "# title")); + const body = lastPostBody(); + expect("reuseFromRunId" in body).toBe(false); + // Pre-u6 contract: filename/content are the only keys when overrides + // is undefined (JSON.stringify drops undefined values; pre-u6 emitted + // `JSON.stringify({filename, content, overrides})` with the same + // drop-undefined behaviour, so the wire body is byte-identical). + expect(Object.keys(body).sort()).toEqual(["content", "filename"]); + }); + + it("omits reuseFromRunId but keeps overrides when only overrides provided", async () => { + fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY)); + await runPipeline(makeFakeFile("03.mdx", "# title"), { + frames: { "03-1": "frame_07" }, + }); + const body = lastPostBody(); + expect("reuseFromRunId" in body).toBe(false); + expect(Object.keys(body).sort()).toEqual([ + "content", + "filename", + "overrides", + ]); + expect(body.overrides).toEqual({ frames: { "03-1": "frame_07" } }); + }); + + it("omits reuseFromRunId when passed an empty string (truthy guard)", async () => { + fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY)); + await runPipeline(makeFakeFile("03.mdx", "# title"), undefined, ""); + const body = lastPostBody(); + expect("reuseFromRunId" in body).toBe(false); + }); + + it("forwards reuseFromRunId alongside frame overrides (the only u1-permitted combo)", async () => { + fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY)); + await runPipeline( + makeFakeFile("03.mdx", "# title"), + { frames: { "03-1+03-2": "frame_07" } }, + "mdx03_20260524080000", + ); + const body = lastPostBody(); + expect(body.overrides).toEqual({ frames: { "03-1+03-2": "frame_07" } }); + expect(body.reuseFromRunId).toBe("mdx03_20260524080000"); + }); + + it("returns the parsed RunPipelineResult on success", async () => { + fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY)); + const res = await runPipeline( + makeFakeFile("03.mdx", "# title"), + undefined, + "mdx03_20260524080000", + ); + expect(res.success).toBe(true); + expect(res.run_id).toBe("test_run_id_20260524"); + }); +}); + +// ============================================================================ +// /api/run handler (vite.config.ts) — source-slice forwarding contract +// ============================================================================ + +describe("/api/run handler reuseFromRunId source-slice (IMP-43 #72 u6)", () => { + it("declares reuseFromRunId?: string on the /api/run payload type", () => { + // Payload type at the top of the /api/run handler body. The + // optional-string declaration is the single source-of-truth for what + // shape the handler accepts; a typed frontend client (u5 saveUserOverrides + // sibling pattern) cannot silently send a payload the server drops. + expect(VITE_CONFIG_SOURCE).toMatch(/reuseFromRunId\?:\s*string\s*;/); + }); + + it("destructures reuseFromRunId from payload alongside filename/content/overrides", () => { + expect(VITE_CONFIG_SOURCE).toMatch( + /const\s*\{\s*filename\s*,\s*content\s*,\s*overrides\s*,\s*reuseFromRunId\s*\}\s*=\s*payload\s*;/, + ); + }); + + it("forwards --reuse-from after the override-section-assignment loop", () => { + // Stage 2 contract: reuse_from is a pipeline mode, not an override. + // The forward block must sit AFTER the last override loop so the spawn + // argv preserves the order documented in the u1 backend post-merge + // guard (overrides parsed first; reuse_from precondition runs against + // the merged overrides view). + const reuseFromIdx = VITE_CONFIG_SOURCE.indexOf('"--reuse-from"'); + const zoneSectionsIdx = VITE_CONFIG_SOURCE.indexOf( + '"--override-section-assignment"', + ); + expect(reuseFromIdx).toBeGreaterThan(-1); + expect(zoneSectionsIdx).toBeGreaterThan(-1); + expect(reuseFromIdx).toBeGreaterThan(zoneSectionsIdx); + }); + + it("guards the forward with a truthy check on reuseFromRunId", () => { + // Empty string / undefined ⇒ no flag pushed (Stage 2 contract: invalid + // CLI args must never reach argparse — the backend u1 guard would + // fail-closed with `reuse_artifact_missing` on the empty PREV_RUN_ID). + const reuseFromIdx = VITE_CONFIG_SOURCE.indexOf('"--reuse-from"'); + expect(reuseFromIdx).toBeGreaterThan(-1); + const preface = VITE_CONFIG_SOURCE.slice( + Math.max(0, reuseFromIdx - 200), + reuseFromIdx, + ); + expect(preface).toMatch(/if\s*\(\s*reuseFromRunId/); + expect(preface).toMatch(/typeof\s+reuseFromRunId\s*===\s*"string"/); + }); + + it("pushes reuseFromRunId as the --reuse-from argument value (no string interpolation)", () => { + // The CLI value must be the raw PREV_RUN_ID — no `=` join, no quoting + // (spawn is shell:false). Mirrors the `--override-layout` shape. + const reuseFromIdx = VITE_CONFIG_SOURCE.indexOf('"--reuse-from"'); + expect(reuseFromIdx).toBeGreaterThan(-1); + // Window spans both before (`cliArgs.push(`) and after + // (`reuseFromRunId)`) the literal so the full push expression is + // captured. + const window = VITE_CONFIG_SOURCE.slice( + Math.max(0, reuseFromIdx - 100), + reuseFromIdx + 200, + ); + expect(window).toMatch( + /cliArgs\.push\(\s*"--reuse-from"\s*,\s*reuseFromRunId\s*\)/, + ); + }); +}); diff --git a/Front/vite.config.ts b/Front/vite.config.ts index fed65e0..b1663ef 100644 --- a/Front/vite.config.ts +++ b/Front/vite.config.ts @@ -543,6 +543,13 @@ function vitePluginPhaseZApi(): Plugin { // (e.g., "top": ["03-1-sub-1"]). Forwarded as --override-section-assignment. zoneSections?: Record; }; + // IMP-43 (#72) u6 — optional PREV_RUN_ID to reuse Step 0/1/2/5/6 + // artifacts from a prior run and resume execution at Step 7. + // Lives at the payload root (NOT under `overrides`) because the + // backend u1 post-merge guard rejects most override axes when + // --reuse-from is supplied. Absent / empty = full pipeline + // (byte-identical to pre-u6 spawn). + reuseFromRunId?: string; }; try { payload = JSON.parse(body); @@ -554,7 +561,7 @@ function vitePluginPhaseZApi(): Plugin { return; } - const { filename, content, overrides } = payload; + const { filename, content, overrides, reuseFromRunId } = payload; if (!filename || typeof content !== "string") { res.writeHead(400, { "Content-Type": "application/json" }); res.end( @@ -638,6 +645,19 @@ function vitePluginPhaseZApi(): Plugin { ); } } + // IMP-43 (#72) u6 — --reuse-from forward. Backend + // (u1) parses this flag, validates the snapshot, copies Step + // 0/1/2/5/6 artifacts from data/runs//phase_z2 into + // the new run_dir, and resumes execution at Step 7. The post-merge + // guard at the same site rejects --override-layout / + // --override-zone-geometry / --override-section-assignment / + // --override-image with axis-named fail-closed exit; only + // --override-frame (above) is preserved. Truthy check excludes + // empty string + undefined so an invalid argument never reaches + // argparse. + if (reuseFromRunId && typeof reuseFromRunId === "string") { + cliArgs.push("--reuse-from", reuseFromRunId); + } console.log( `[phase-z-api] spawn pipeline: run_id=${runId}, mdx=${mdxPath}, args=${JSON.stringify(cliArgs.slice(2))}` ); diff --git a/docs/architecture/PHASE-Z-PIPELINE-STATUS-BOARD.md b/docs/architecture/PHASE-Z-PIPELINE-STATUS-BOARD.md index 22aab12..cca41d5 100644 --- a/docs/architecture/PHASE-Z-PIPELINE-STATUS-BOARD.md +++ b/docs/architecture/PHASE-Z-PIPELINE-STATUS-BOARD.md @@ -182,6 +182,27 @@ Step 0 (사전 준비) 의 Figma → HTML 변환은 *precondition phase 의 작 --- +## 8. IMP-43 (#72) `--reuse-from` measured savings + +> Stage 2 §u8 binding contract: the issue-body 50–70% / 10–20s → 3–8s claim is **unverified** and is **not** mirrored here. Numbers below come from `scripts/measure_reuse_savings.py` on the project reference host; until that script is run and the values committed, every cell stays `TBD`. + +| axis | value | +|---|---| +| measurement script | `scripts/measure_reuse_savings.py` | +| reuse boundary (Stage 1 lock) | Step 0 / 1 / 2 / 5 / 6 only; Step 7+ re-executes | +| full rerun seconds (p50) | TBD | +| full rerun seconds (p95) | TBD | +| reuse seconds (p50) | TBD | +| reuse seconds (p95) | TBD | +| reuse / full ratio (p50) | TBD | +| last measured | TBD (date / host / mdx / iterations) | + +Run protocol (per iteration): `(A)` seed → `(B)` full rerun with one self-discovered `--override-frame` pin → `(C)` `--reuse-from ` with the same pin. The `(A)` seed time is reported separately and **not** included in the B-vs-C comparison — the reuse path's whole point is that the seed already exists from a prior interactive run. + +Invocation: `python -m scripts.measure_reuse_savings samples/mdx_batch/02.mdx --iterations 5` (mdx is argv-driven; the script does not pin a sample internally). + +--- + ## 사용 방법 - 새 작업 들어오면 → 본 board 의 *어느 step* 의 status 를 바꾸는 작업인지 식별 diff --git a/pyproject.toml b/pyproject.toml index 494a5cf..78648a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,4 +34,5 @@ target-version = "py310" asyncio_mode = "auto" markers = [ "integration: end-to-end pipeline integration tests (heavy; invoke Selenium)", + "sweep: opt-in heavyweight sweep tests (IMP-43 u7b: 3 layouts × 3 mdx × frame-pin coverage). Invoke explicitly via `pytest -m sweep`; default CI must use `-m 'not sweep'`.", ] diff --git a/scripts/measure_reuse_savings.py b/scripts/measure_reuse_savings.py new file mode 100644 index 0000000..848f6f7 --- /dev/null +++ b/scripts/measure_reuse_savings.py @@ -0,0 +1,178 @@ +"""IMP-43 (#72) u8 — measure ``--reuse-from`` wall-clock savings. + +Argv-driven measurement helper for the Stage 2 §u8 binding contract: +re-derive a realistic savings target instead of mirroring the +unverified issue-body 50–70% / 10–20s → 3–8s claim. + +Per-iteration measurement protocol (mirrors the u7a equivalence +harness, ``tests/test_phase_z2_reuse_from_equivalence_unit.py``): + + (A) baseline full run, no overrides — reuse seed + (B) full rerun full run + one --override-frame pin — control path + (C) reuse --reuse-from + same pin — reuse path + +Wall-clock = ``time.perf_counter()`` around the subprocess.run call. +The (A) seed run time is reported separately and NOT included in the +B-vs-C comparison (the reuse path's whole point is that the seed +already exists from a prior interactive run). + +For each iteration the frame pin is self-discovered from the seed +run's ``step06_composition_plan.json``: the first unit's +``frame_template_id`` is re-pinned to itself, exercising the +``--override-frame`` CLI surface end-to-end without changing the +semantic frame assignment (same approach the u7a/u7b equivalence +tests already lock). + +Output: a JSON document to stdout with per-iteration timings, +B/C p50 + p95, and the ratio C/B. Stderr carries the subprocess +stdout/stderr tails on non-zero exits. + +Guardrails (Stage 2): + * argv-driven, no hardcoded mdx — caller picks the sample + * no hardcoded savings target — TBD until measured + * value + path + upstream provenance lives in the printed JSON + * does NOT mutate prev_run_dir; new runs land under fresh run_ids +""" +from __future__ import annotations + +import argparse +import json +import statistics +import subprocess +import sys +import time +import uuid +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +RUNS_DIR = REPO_ROOT / "data" / "runs" + + +def _unique_run_id(prefix: str) -> str: + return f"{prefix}_imp43_u8_{uuid.uuid4().hex[:8]}" + + +def _spawn(extra_args: list[str], timeout: int) -> tuple[subprocess.CompletedProcess, float]: + start = time.perf_counter() + cp = subprocess.run( + [sys.executable, "-m", "src.phase_z2_pipeline", *extra_args], + capture_output=True, + text=True, + timeout=timeout, + cwd=str(REPO_ROOT), + ) + return cp, time.perf_counter() - start + + +def _assert_ok(label: str, cp: subprocess.CompletedProcess) -> None: + if cp.returncode != 0: + sys.stderr.write( + f"[measure_reuse_savings] {label} failed rc={cp.returncode}\n" + f"--- stderr tail ---\n{cp.stderr[-2000:]}\n" + f"--- stdout tail ---\n{cp.stdout[-2000:]}\n" + ) + raise SystemExit(2) + + +def _discover_first_frame_pin(seed_run_id: str) -> tuple[str, str]: + p = RUNS_DIR / seed_run_id / "phase_z2" / "steps" / "step06_composition_plan.json" + payload = json.loads(p.read_text(encoding="utf-8")) + for u in payload.get("data", {}).get("selected_units") or []: + sids = u.get("source_section_ids") or [] + tpl = u.get("frame_template_id") + if isinstance(sids, list) and sids and isinstance(tpl, str) and tpl: + return ("+".join(str(s) for s in sids), tpl) + raise SystemExit( + f"[measure_reuse_savings] seed {seed_run_id} step06 has no pinnable " + f"(unit_id, frame_template_id); path={p}" + ) + + +def _percentile(values: list[float], pct: float) -> float: + if not values: + return float("nan") + if len(values) == 1: + return values[0] + s = sorted(values) + k = (len(s) - 1) * pct + lo = int(k) + hi = min(lo + 1, len(s) - 1) + return s[lo] + (s[hi] - s[lo]) * (k - lo) + + +def main() -> int: + ap = argparse.ArgumentParser( + prog="python -m scripts.measure_reuse_savings", + description="Measure IMP-43 --reuse-from wall-clock savings.", + ) + ap.add_argument("mdx_path", type=Path, help="MDX sample to measure against") + ap.add_argument("--iterations", type=int, default=3, help="trials (default 3)") + ap.add_argument("--timeout", type=int, default=900, help="per-run timeout seconds") + args = ap.parse_args() + + if not args.mdx_path.is_file(): + sys.stderr.write(f"[measure_reuse_savings] mdx not found: {args.mdx_path}\n") + return 2 + + iterations: list[dict] = [] + for i in range(args.iterations): + seed_id = _unique_run_id(f"seed{i}") + cp_a, t_a = _spawn([str(args.mdx_path), seed_id], args.timeout) + _assert_ok(f"(A) seed iter={i}", cp_a) + + unit_id, tpl_id = _discover_first_frame_pin(seed_id) + override = ["--override-frame", f"{unit_id}={tpl_id}"] + + full_id = _unique_run_id(f"full{i}") + cp_b, t_b = _spawn([str(args.mdx_path), full_id, *override], args.timeout) + _assert_ok(f"(B) full rerun iter={i}", cp_b) + + reuse_id = _unique_run_id(f"reuse{i}") + cp_c, t_c = _spawn( + [str(args.mdx_path), reuse_id, "--reuse-from", seed_id, *override], + args.timeout, + ) + _assert_ok(f"(C) reuse iter={i}", cp_c) + + iterations.append({ + "iter": i, + "seed_run_id": seed_id, + "full_run_id": full_id, + "reuse_run_id": reuse_id, + "override_frame": f"{unit_id}={tpl_id}", + "seed_seconds": t_a, + "full_rerun_seconds": t_b, + "reuse_seconds": t_c, + }) + + full_times = [it["full_rerun_seconds"] for it in iterations] + reuse_times = [it["reuse_seconds"] for it in iterations] + + summary = { + "mdx_path": str(args.mdx_path), + "iterations_count": len(iterations), + "full_rerun_seconds_p50": _percentile(full_times, 0.50), + "full_rerun_seconds_p95": _percentile(full_times, 0.95), + "reuse_seconds_p50": _percentile(reuse_times, 0.50), + "reuse_seconds_p95": _percentile(reuse_times, 0.95), + "reuse_over_full_ratio_p50": ( + _percentile(reuse_times, 0.50) / _percentile(full_times, 0.50) + if full_times and statistics.median(full_times) > 0 + else float("nan") + ), + "iterations": iterations, + "note": ( + "IMP-43 (#72) u8 measurement. Issue-body 50–70% / 10–20s → 3–8s " + "claim is NOT honored here — actual numbers depend on host, " + "Selenium cold-start, and AI cache state. Update " + "docs/architecture/PHASE-Z-PIPELINE-STATUS-BOARD.md §8 with the " + "p50/p95 reported here when run on the project's reference host." + ), + } + sys.stdout.write(json.dumps(summary, ensure_ascii=False, indent=2)) + sys.stdout.write("\n") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/phase_z2_pipeline.py b/src/phase_z2_pipeline.py index 029ac1f..d5f0251 100644 --- a/src/phase_z2_pipeline.py +++ b/src/phase_z2_pipeline.py @@ -25,6 +25,7 @@ MVP-1.5b spec : - mvp1.5b_test* : 본 모듈, 원래 설계 라인 합류 """ +import hashlib import json import os import re @@ -33,7 +34,7 @@ import sys import time from dataclasses import asdict, dataclass, field from pathlib import Path -from typing import Optional +from typing import Any, Optional import yaml from jinja2 import Environment, FileSystemLoader, select_autoescape @@ -99,6 +100,15 @@ from src.phase_z2_ai_fallback.step12 import gather_step12_ai_repair_proposals # idempotent ``has_popup`` marker onto retry_trace per unit. No AI call. from src.phase_z2_ai_fallback.step17 import run_step17_popup_gate +# IMP-43 (#72) u3 — Step 6 reuse snapshot sidecar (JSON-only). Schema + +# serializers + validator live in u2 (``src.phase_z2_reuse_snapshot``); +# this module's call site at the Step 6 boundary writes the sidecar +# alongside ``steps/step06_composition_plan.json`` so that future +# ``--reuse-from`` runs (u4) can resume at Step 7 without re-deriving +# Step 0/1/2/5/6 state. ``--reuse-from`` is u4 scope; here we only +# WRITE the snapshot — restore wiring lands in u4. +from src.phase_z2_reuse_snapshot import build_snapshot, SNAPSHOT_FILENAME + # ─── Constants ────────────────────────────────────────────────── @@ -3853,6 +3863,564 @@ def _write_step_artifact( return fpath +# IMP-43 (#72) u3 — Step 6 reuse snapshot sidecar writer. +# +# Scope (u3 only — Stage 2 unit split): +# * Writes ``run_dir/_reuse_snapshot.json`` *after* the Step 6 artifact. +# * JSON-only (per Stage 2 guardrail — pickle forbidden); schema + +# ``build_snapshot`` live in u2 (``src.phase_z2_reuse_snapshot``). +# * Write failure WARNS and CONTINUES — the snapshot is an OPTIONAL +# sidecar; absence means future ``--reuse-from`` (u4) will fail +# closed when it cannot find / load the file. The main pipeline +# run must not abort on snapshot write failure. +# * Returns the run_dir-relative path (``"_reuse_snapshot.json"``) on +# success, ``None`` on failure. The caller stamps the returned value +# (or the constant when known ahead of time) into the Step 6 artifact. +def _write_reuse_snapshot( + run_dir: Path, + *, + mdx_source_text: str, + slide_title: Optional[str], + slide_footer: Optional[str], + sections: list, + stage0_adapter_diagnostics: Optional[dict], + stage0_normalized_assets: Optional[dict], + v4_evidence: list, + layout_preset_pre_override: Optional[str], + units: list, + comp_debug: Optional[dict], + v4_fallback_traces: Optional[dict], + ai_preflight: Optional[dict], +) -> Optional[str]: + try: + mdx_sha256 = hashlib.sha256(mdx_source_text.encode("utf-8")).hexdigest() + snapshot = build_snapshot( + mdx_sha256=mdx_sha256, + slide_title=slide_title, + slide_footer=slide_footer, + sections=sections, + stage0_adapter_diagnostics=stage0_adapter_diagnostics, + stage0_normalized_assets=stage0_normalized_assets, + v4_evidence=v4_evidence, + layout_preset_pre_override=layout_preset_pre_override, + units=units, + comp_debug=comp_debug, + v4_fallback_traces=v4_fallback_traces, + ai_preflight=ai_preflight, + ) + fpath = run_dir / SNAPSHOT_FILENAME + fpath.write_text( + json.dumps(snapshot, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + return SNAPSHOT_FILENAME + except Exception as exc: + print( + f" [reuse-snapshot] WARN — failed to write {SNAPSHOT_FILENAME} " + f"(reason={type(exc).__name__}: {exc}); --reuse-from will not be " + f"available from this run.", + file=sys.stderr, + ) + return None + + +# IMP-43 (#72) u4 — --reuse-from copy + restore + entry helpers. +# +# Scope (u4 only — Stage 2 unit split): +# * Pure path resolution / file copy / snapshot load+validate / +# section + unit rehydration / marker writing. +# * NO edits to ``run_phase_z2_mvp1`` body — the kwarg threading and +# the entry-point branch that invokes these helpers land in u5. +# * NO sys.exit(2) translation — helpers RAISE +# (FileNotFoundError / SnapshotValidationError / OSError); u4b adds +# the stderr + exit-code-2 wrapper, the prev_run_dir == new_run_dir +# accidental-write guard, and the mdx_sha256 mismatch surface +# fingerprint. +# +# Restore contract (Stage 2 boundary): Step 0/1/2/5/6 artifacts + +# ``_reuse_snapshot.json``. Step numbers 3 / 4 are deliberately absent +# — the pipeline DOES write ``step03_content_objects.json`` and +# ``step04_internal_composition.json`` AFTER the Step 6 artifact and +# BEFORE the Step 7 artifact (see ``_write_step_artifact`` call sites +# for ``run_dir, 3`` and ``run_dir, 4`` above the ``run_dir, 7`` call +# in this file), but both are emitted with +# ``step_status="trace-only"`` and ``pipeline_path_connected=False``: +# they are diagnostic projections derived from the Step 6 +# ``debug_zones`` snapshot, not deterministic inputs that Step 7+ +# consume. Restoring them is unnecessary because downstream code +# reads ``debug_zones`` directly (rehydrated from the snapshot), and +# copying trace-only files would muddle the boundary audit. Stage 2 +# boundary lock = pipeline-path-connected pre-Step 7 artifacts only. + +_REUSE_STEP_ARTIFACTS: tuple[str, ...] = ( + "step00_preconditions.json", + "step01_mdx_upload.json", + "step01_mdx_source.md", + "step02_normalized.json", + "step05_v4_evidence.json", + "step06_composition_plan.json", +) + +REUSE_MARKER_FILENAME = "_reuse_marker.json" + + +def _resolve_reuse_from_prev_run_dir(reuse_from: str) -> Path: + """Resolve ``--reuse-from PREV_RUN_ID`` to its ``phase_z2`` run_dir. + + Pure path computation — does NOT check existence. u4b adds the + fail-closed prev-run-missing translation around this helper. + """ + return RUNS_DIR / reuse_from / "phase_z2" + + +def _copy_reuse_artifacts_from_prev_run( + prev_run_dir: Path, new_run_dir: Path +) -> dict[str, str]: + """Copy Step 0/1/2/5/6 artifacts + ``_reuse_snapshot.json`` into new_run_dir. + + Returns ``{artifact_name: new_run_dir-relative_path}`` for all + copied files. Raises ``FileNotFoundError`` when any required + artifact is missing in ``prev_run_dir`` (u4b translates to exit 2). + """ + new_steps = new_run_dir / "steps" + new_steps.mkdir(parents=True, exist_ok=True) + copied: dict[str, str] = {} + for fname in _REUSE_STEP_ARTIFACTS: + src = prev_run_dir / "steps" / fname + if not src.exists(): + raise FileNotFoundError( + f"reuse artifact missing in prev_run_dir: steps/{fname} " + f"(expected at {src})" + ) + shutil.copyfile(src, new_steps / fname) + copied[fname] = f"steps/{fname}" + snap_src = prev_run_dir / SNAPSHOT_FILENAME + if not snap_src.exists(): + raise FileNotFoundError( + f"reuse snapshot missing in prev_run_dir: {SNAPSHOT_FILENAME} " + f"(expected at {snap_src})" + ) + shutil.copyfile(snap_src, new_run_dir / SNAPSHOT_FILENAME) + copied[SNAPSHOT_FILENAME] = SNAPSHOT_FILENAME + return copied + + +def _load_and_validate_reuse_snapshot( + new_run_dir: Path, *, mdx_source_text: str +) -> dict: + """Load + validate the reuse snapshot already copied into ``new_run_dir``. + + Computes the expected ``mdx_sha256`` from ``mdx_source_text`` UTF-8 + bytes — same derivation as ``_write_reuse_snapshot`` so the + integrity check is symmetric. Delegates structural validation to + u2's ``validate_snapshot``; that raises + ``SnapshotValidationError`` (subclass of ``ValueError``) on + schema_version mismatch, mdx_sha256 mismatch, missing required + keys, or malformed wrappers — u4b catches and translates. + """ + from src.phase_z2_reuse_snapshot import validate_snapshot + + snap_path = new_run_dir / SNAPSHOT_FILENAME + snapshot = json.loads(snap_path.read_text(encoding="utf-8")) + expected_sha = hashlib.sha256(mdx_source_text.encode("utf-8")).hexdigest() + validate_snapshot(snapshot, expected_mdx_sha256=expected_sha) + return snapshot + + +@dataclass +class _RehydratedV4Candidate: + """V4Match-shape duck type restored from snapshot ``v4_candidates``. + + Exposes the 6-attribute contract that the reuse path's downstream + consumers read off ``unit.v4_candidates`` entries: + * template_id / frame_id / frame_number / confidence / label — + read by ``_apply_frame_override_to_unit`` (frame swap). + * v4_rank — read by ``_build_application_plan_unit`` (Step 9 + payload, ``data.application_plan.zones[i].v4_candidates[j]``). + Default ``None`` keeps the dataclass safe to construct from + legacy snapshots that pre-date the u4 fix where the snapshot + serializer did not persist per-candidate rank. + Kept local — circular-dep-free; the production ``V4Match`` dataclass + additionally carries section_id / selection_path / fallback_reason / + provisional that the reuse boundary deliberately does not require. + """ + template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + v4_rank: Optional[int] = None + + +def _rehydrate_mdx_sections_from_snapshot(snapshot: dict) -> list: + """Rebuild ``list[MdxSection]`` from snapshot ``sections`` wrapper. + + Mirrors the ``serialize_section`` field list (u2 source of truth). + Returns a Python list of ``MdxSection`` dataclass instances so the + Step 7+ pipeline code that does ``[s.section_id for s in sections]`` + keeps byte-for-byte behavior. + """ + entries = snapshot["sections"]["value"] + return [ + MdxSection( + section_id=e["section_id"], + section_num=e["section_num"], + title=e["title"], + raw_content=e["raw_content"], + heading_number=e.get("heading_number"), + v4_alias_keys=list(e.get("v4_alias_keys") or []), + sub_sections=list(e.get("sub_sections") or []), + ) + for e in entries + ] + + +def _rehydrate_composition_units_from_snapshot(snapshot: dict) -> list: + """Rebuild ``list[CompositionUnit]`` from snapshot ``units`` wrapper. + + ``v4_candidates`` entries are restored as ``_RehydratedV4Candidate`` + instances so attribute access (``cand.template_id`` etc.) works + end-to-end through ``_apply_frame_override_to_unit`` without + serializing the production ``V4Match`` dataclass shape. + + Uses the ``src.phase_z2_composition`` import path to match + line 4976 / 5125's local re-imports — the module is loaded under + both ``phase_z2_composition`` and ``src.phase_z2_composition`` due + to historical sys.path duality, so a top-level CompositionUnit + reference would create a class-identity mismatch against tests and + downstream code that imports via the ``src.`` path. + """ + from src.phase_z2_composition import CompositionUnit as _CompositionUnit + + entries = snapshot["units"]["value"] + units: list = [] + for e in entries: + cands = [ + _RehydratedV4Candidate( + template_id=c["template_id"], + frame_id=c["frame_id"], + frame_number=int(c["frame_number"]), + confidence=float(c["confidence"]), + label=c["label"], + v4_rank=( + int(c["v4_rank"]) + if c.get("v4_rank") is not None + else None + ), + ) + for c in (e.get("v4_candidates") or []) + ] + units.append(_CompositionUnit( + source_section_ids=list(e["source_section_ids"]), + merge_type=e["merge_type"], + frame_template_id=e["frame_template_id"], + frame_id=e["frame_id"], + frame_number=int(e["frame_number"]), + confidence=float(e["confidence"]), + label=e["label"], + phase_z_status=e["phase_z_status"], + raw_content=e["raw_content"], + title=e["title"], + v4_rank=e.get("v4_rank"), + selection_path=e.get("selection_path") or "rank_1", + fallback_reason=e.get("fallback_reason"), + score=float(e.get("score") or 0.0), + rationale=dict(e.get("rationale") or {}), + auto_selectable=bool(e.get("auto_selectable", True)), + filter_reasons=list(e.get("filter_reasons") or []), + notes=list(e.get("notes") or []), + v4_candidates=cands, + provisional=bool(e.get("provisional", False)), + )) + return units + + +REUSE_MARKER_SCHEMA_VERSION = 1 + + +def _write_reuse_marker( + new_run_dir: Path, + *, + prev_run_id: str, + copied_artifacts: dict[str, str], +) -> Path: + """Write ``_reuse_marker.json`` to ``new_run_dir`` for audit trail. + + Records prev_run_id, copied artifact map, the locked Step 0/1/2/5/6 + boundary, and ``resume_at_step=7``. Informational sidecar — absence + does not break the reused run; presence lets operators trace which + prev_run_id the reuse path was sourced from. u5 invokes this after + a successful copy + restore. + """ + marker = { + "schema_version": REUSE_MARKER_SCHEMA_VERSION, + "reuse_from_prev_run_id": prev_run_id, + "snapshot_filename": SNAPSHOT_FILENAME, + "copied_artifacts": dict(copied_artifacts), + "boundary_steps": list(_REUSE_STEP_ARTIFACTS), + "resume_at_step": 7, + "note": ( + "IMP-43 (#72) u4 — this run was sourced from prev_run_id via " + "--reuse-from. Steps 0/1/2/5/6 artifacts copied; Step 7+ " + "re-executed in this run_dir." + ), + } + fpath = new_run_dir / REUSE_MARKER_FILENAME + fpath.write_text( + json.dumps(marker, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + return fpath + + +# IMP-43 (#72) u4b — fail-closed wrapper around the u4 helpers. +# +# Scope (u4b only — Stage 2 unit split): +# * Translate the u4 raises (FileNotFoundError, SnapshotValidationError, +# json.JSONDecodeError, OSError) into the CLI fail-closed contract: +# stderr message + ``sys.exit(2)``. +# * Add the prev_run_dir == new_run_dir accidental-write guard BEFORE +# any copy attempt — otherwise ``_copy_reuse_artifacts_from_prev_run`` +# would overwrite prev_run_dir's own step files with itself and +# mutate the "read-only" reuse source. +# * Add the missing-prev-run-dir surface so the user gets a clean +# "run id not found" message instead of the raw FileNotFoundError +# stack from inside _copy_reuse_artifacts_from_prev_run. +# * Surface the mdx_sha256 mismatch as its OWN axis (distinct from +# generic snapshot validation failures) so the operator can tell +# "wrong --mdx-path for this prev_run_id" apart from "snapshot file +# is broken". +# +# Out of scope: signature threading into ``run_phase_z2_mvp1`` (u5), +# the actual call site dispatch into Step 7+ (u5). +# +# Diagnostic format (factual-verification guardrail): +# [error] --reuse-from fail-closed: +# value: +# path: +# upstream: +# reason: : (only when exc != None) +# +# axis vocabulary (closed enum — tests pin this set): +# * prev_run_dir_missing +# * prev_run_dir_equals_new_run_dir +# * reuse_artifact_missing +# * reuse_copy_os_error # OSError != FileNotFoundError during copy +# # (PermissionError, IsADirectoryError, +# # OSError(errno.EXDEV), full-disk, etc.) +# * snapshot_missing_after_copy +# * snapshot_corrupt_json +# * snapshot_read_os_error # OSError != FileNotFoundError during +# # snapshot read (permission denied, +# # path-became-dir, lower-level IO) +# * mdx_sha256_mismatch +# * snapshot_validation_failed + + +REUSE_FAIL_CLOSED_AXES: frozenset[str] = frozenset({ + "prev_run_dir_missing", + "prev_run_dir_equals_new_run_dir", + "reuse_artifact_missing", + "reuse_copy_os_error", + "snapshot_missing_after_copy", + "snapshot_corrupt_json", + "snapshot_read_os_error", + "mdx_sha256_mismatch", + "snapshot_validation_failed", +}) + + +def _abort_reuse_from( + *, + axis: str, + value: Any, + path: str, + upstream: str, + exc: Optional[BaseException] = None, +) -> "NoReturn": + """Print provenance-tagged stderr message and ``sys.exit(2)``. + + All four `value+path+upstream+axis` fields are mandatory so the + operator can pinpoint the failed precondition without grepping the + pipeline source. ``exc`` (when supplied) adds the underlying type + + message — useful for ``json.JSONDecodeError`` line/col info or + OSError errno. + """ + if axis not in REUSE_FAIL_CLOSED_AXES: + raise AssertionError( + f"_abort_reuse_from: unknown axis {axis!r} " + f"(expected one of {sorted(REUSE_FAIL_CLOSED_AXES)})" + ) + lines = [ + f"[error] --reuse-from fail-closed: {axis}", + f" value: {value!r}", + f" path: {path}", + f" upstream: {upstream}", + ] + if exc is not None: + lines.append(f" reason: {type(exc).__name__}: {exc}") + print("\n".join(lines), file=sys.stderr) + sys.exit(2) + + +def _paths_equivalent(a: Path, b: Path) -> bool: + """Return True when ``a`` and ``b`` resolve to the same filesystem + location, falling back to lexical equality when ``a`` doesn't + exist yet (Path.resolve(strict=False) still normalizes case + sep + on Windows + collapses ``..`` segments). + """ + try: + return a.resolve(strict=False) == b.resolve(strict=False) + except (OSError, RuntimeError): + return a == b + + +def execute_reuse_from_or_fail_closed( + *, + reuse_from: str, + new_run_dir: Path, + mdx_source_text: str, +) -> tuple[Path, dict[str, str], dict]: + """Orchestrate u4 helpers under the u4b fail-closed contract. + + Returns ``(prev_run_dir, copied_artifacts, snapshot)`` on success. + Calls ``sys.exit(2)`` on any of the seven fail-closed axes; does + NOT return in that case. + + The caller (u5, into ``run_phase_z2_mvp1``) does NOT need to wrap + this in its own try/except — every reachable failure inside this + function terminates the process directly. + """ + from src.phase_z2_reuse_snapshot import SnapshotValidationError + + prev_run_dir = _resolve_reuse_from_prev_run_dir(reuse_from) + + # Guard 1: prev_run_dir must exist. + if not prev_run_dir.exists(): + _abort_reuse_from( + axis="prev_run_dir_missing", + value=reuse_from, + path=str(prev_run_dir), + upstream="--reuse-from CLI argument", + ) + + # Guard 2: prev_run_dir must NOT be the same as new_run_dir. + # Without this, the copy step would overwrite prev_run_dir's own + # files with themselves and break the RO guarantee on the reuse + # source. The check resolves both sides so a relative-vs-absolute + # or symlinked collision still trips it. + if _paths_equivalent(prev_run_dir, new_run_dir): + _abort_reuse_from( + axis="prev_run_dir_equals_new_run_dir", + value=reuse_from, + path=str(prev_run_dir), + upstream=( + "_resolve_reuse_from_prev_run_dir(reuse_from) == new_run_dir " + "(would overwrite prev_run_dir during copy)" + ), + ) + + # Copy step 0/1/2/5/6 + snapshot from prev_run_dir → new_run_dir. + # FileNotFoundError MUST be caught before the bare OSError handler — + # it is a subclass of OSError and the missing-artifact case has its + # own dedicated axis. + try: + copied = _copy_reuse_artifacts_from_prev_run(prev_run_dir, new_run_dir) + except FileNotFoundError as exc: + _abort_reuse_from( + axis="reuse_artifact_missing", + value=str(exc), + path=str(prev_run_dir), + upstream=( + "Step 0/1/2/5/6 deterministic artifacts + " + f"{SNAPSHOT_FILENAME} under prev_run_dir/steps/" + ), + exc=exc, + ) + except OSError as exc: + # PermissionError, IsADirectoryError, OSError(errno.EXDEV) when + # crossing filesystems with shutil.copyfile, disk-full, etc. + # Without this branch the raw traceback would escape the wrapper + # and contradict the docstring contract ("every reachable + # failure inside this function terminates the process directly"). + _abort_reuse_from( + axis="reuse_copy_os_error", + value=str(exc), + path=str(prev_run_dir), + upstream=( + "_copy_reuse_artifacts_from_prev_run " + "(OSError != FileNotFoundError; shutil.copyfile or " + "Path.mkdir surface)" + ), + exc=exc, + ) + + # Load + validate snapshot. Exception fan-out below mirrors the + # u4 helper raise surface; each fail-closed axis is reported + # separately so operators can tell the cases apart. + # FileNotFoundError MUST be caught before the bare OSError handler. + try: + snapshot = _load_and_validate_reuse_snapshot( + new_run_dir, mdx_source_text=mdx_source_text, + ) + except FileNotFoundError as exc: + # Should not happen — copy step would have failed first — but + # left explicit to make the contract symmetric. + _abort_reuse_from( + axis="snapshot_missing_after_copy", + value=str(exc), + path=str(new_run_dir / SNAPSHOT_FILENAME), + upstream="_copy_reuse_artifacts_from_prev_run side effect", + exc=exc, + ) + except json.JSONDecodeError as exc: + _abort_reuse_from( + axis="snapshot_corrupt_json", + value=str(exc), + path=str(new_run_dir / SNAPSHOT_FILENAME), + upstream=f"json.loads({SNAPSHOT_FILENAME})", + exc=exc, + ) + except OSError as exc: + # Permission denied on the copied snapshot, snap_path turned out + # to be a directory, lower-level IO error. JSONDecodeError is + # ValueError (independent of OSError) so order with that branch + # does not matter; this branch only needs to follow FNF. + _abort_reuse_from( + axis="snapshot_read_os_error", + value=str(exc), + path=str(new_run_dir / SNAPSHOT_FILENAME), + upstream=( + "_load_and_validate_reuse_snapshot " + "(OSError != FileNotFoundError; Path.read_text surface)" + ), + exc=exc, + ) + except SnapshotValidationError as exc: + msg = str(exc) + if "mdx_sha256 mismatch" in msg: + _abort_reuse_from( + axis="mdx_sha256_mismatch", + value=msg, + path=str(new_run_dir / SNAPSHOT_FILENAME), + upstream=( + "sha256(mdx_source_text) vs " + f"{SNAPSHOT_FILENAME}#/mdx_sha256" + ), + exc=exc, + ) + else: + _abort_reuse_from( + axis="snapshot_validation_failed", + value=msg, + path=str(new_run_dir / SNAPSHOT_FILENAME), + upstream="src.phase_z2_reuse_snapshot.validate_snapshot", + exc=exc, + ) + + return prev_run_dir, copied, snapshot + + def _write_step_html( run_dir: Path, step_num: int, @@ -4284,6 +4852,7 @@ def run_phase_z2_mvp1( override_zone_geometries: Optional[dict[str, dict]] = None, override_section_assignments: Optional[dict[str, list[str]]] = None, override_image_overrides: Optional[dict[str, dict]] = None, + reuse_from: Optional[str] = None, ) -> Path: """MVP-1.5b entry — single slide + composition planner v0 + 8 preset vocabulary. @@ -4306,6 +4875,22 @@ def run_phase_z2_mvp1( backend contract (KNOWN_AXES u1 + Vite allowlist u2 + typed client u3 + stamper u4) end-to-end addressable from CLI without diverging the function signature. + + Incremental rerun (IMP-43 #72, u5) : + reuse_from : Optional PREV_RUN_ID. When set, Steps 0/1/2/5/6 artifacts + are copied from ``RUNS_DIR / PREV_RUN_ID / phase_z2`` + and the in-memory state (sections, units, layout_preset, + comp_debug, v4_fallback_traces, slide_title/footer, + stage0_*, v4_evidence, ai_preflight) is rehydrated + from ``_reuse_snapshot.json`` via the u4 helpers, + wrapped by u4b's fail-closed contract. Step 7+ then + re-executes against ``override_frames`` in this new + run_dir. ``None`` preserves the legacy single-pass + behaviour (Steps 0-6 derive state from scratch). + The post-merge u1 guard at the CLI surface rejects + any layout / zone_geometry / zone_section / image + override under ``--reuse-from`` so only frame + overrides reach this kwarg's reuse branch. """ mdx_path = Path(mdx_path) if run_id is None: @@ -4315,693 +4900,817 @@ def run_phase_z2_mvp1( print(f"[Phase Z-2 MVP-1.5b] start — mdx={mdx_path.name}, run_id={run_id}") - # ─── Step 0: 사전 준비 (precondition snapshot) ─── - # IMP-92 u4 — boot-time AI fallback preflight (gated on - # settings.ai_fallback_enabled; default OFF = skipped, no API call). - # Persistent setup errors raise Step0PreflightError before Step 1. - ai_preflight = _run_step0_ai_preflight() - _write_step_artifact( - run_dir, 0, "preconditions", - data={ - "v4_source": str(V4_RESULT_PATH.relative_to(PROJECT_ROOT)), - "templates_dir": str(TEMPLATE_DIR.relative_to(PROJECT_ROOT)), - "assets_source_base": str(ASSETS_SOURCE_BASE.relative_to(PROJECT_ROOT)), - "frame_contracts_loaded": len(load_frame_contracts()), - "frame_contracts_template_ids": sorted(load_frame_contracts().keys()), - "v4_label_to_phase_z_status": V4_LABEL_TO_PHASE_Z_STATUS, - "mvp1_allowed_statuses": sorted(MVP1_ALLOWED_STATUSES), - "ai_preflight": ai_preflight, - }, - step_status="partial", - pipeline_path_connected=True, - inputs=[ - "templates/phase_z2/catalog/frame_contracts.yaml", - "tests/matching/v4_full32_result.yaml", - "templates/phase_z2/families/*.html", - "figma_to_html_agent/blocks/", - ], - outputs=["step00_preconditions.json"], - note=( - "frame_contracts.yaml 에 등록된 frame 만 mapping 가능. " - "V4 결과의 32 frame 중 다수 (F11/F14/F18 등) 미등록 — Step 0 ⚠ partial." - ), - ) - - # ─── Step 1: MDX 업로드 ─── - mdx_source_text = mdx_path.read_text(encoding="utf-8") - (run_dir / "steps").mkdir(exist_ok=True) - (run_dir / "steps" / "step01_mdx_source.md").write_text(mdx_source_text, encoding="utf-8") - _write_step_artifact( - run_dir, 1, "mdx_upload", - data={ - "mdx_path": str(mdx_path), - "run_id": run_id, - "run_dir": str(run_dir), - "mdx_source_size_bytes": len(mdx_source_text.encode("utf-8")), - "mdx_source_lines": mdx_source_text.count("\n") + 1, - }, - step_status="done", - pipeline_path_connected=True, - inputs=[str(mdx_path)], - outputs=["step01_mdx_upload.json", "step01_mdx_source.md"], - note="MDX 원본 그대로 step01_mdx_source.md 에 복사.", - ) - - # 1. Parse MDX (V4 무관) - legacy_slide_title, legacy_sections, legacy_footer = parse_mdx(mdx_path) - # IMP-02 — Stage 0 chained adapter dispatch (default OFF canary). - # When env PHASE_Z_STAGE0_ADAPTER_ENABLED=1/true/yes the adapter chain - # (mdx_normalizer + section_parser) replaces legacy parse_mdx output; - # on any contract failure or exception, falls back to legacy with - # fallback_reason recorded in stage0_adapter_diagnostics. - # IMP-03 — 5-tuple return adds stage0_normalized_assets (Step 3 handoff). - ( - slide_title, - sections, - slide_footer, - stage0_adapter_diagnostics, - stage0_normalized_assets, - ) = _stage0_chained_adapter( - mdx_path, legacy_slide_title, legacy_sections, legacy_footer, - ) - _adapter_tag = ( - "adapter-used" if stage0_adapter_diagnostics["used"] - else f"legacy({stage0_adapter_diagnostics['fallback_reason'] or 'disabled'})" - ) - print(f" parsed : title='{slide_title}', sections={len(sections)} " - f"({[s.section_id for s in sections]}), footer={'yes' if slide_footer else 'no'}, " - f"stage0={_adapter_tag}") - - # ─── Step 2: MDX 정규화 ─── - # orphans / details 필드는 schema lock — 빈 배열이라도 박혀야 - # "검사 안 함" vs "없음" 구분 가능 (사용자 직설 2026-05-07). - # 실제 orphan / details 감지 로직은 별 axis (Step 2 보강). - _write_step_artifact( - run_dir, 2, "normalized", - data={ - "slide_title": slide_title, - "slide_footer": slide_footer, - "sections_count": len(sections), - "sections": [ - { - "section_id": s.section_id, - "section_num": s.section_num, - "title": s.title, - "raw_content_length": len(s.raw_content), - "raw_content": s.raw_content, - } - for s in sections + # IMP-43 (#72) u5 — Steps 0/1/2/5/6 entry-point branch. + # ``reuse_from is None`` = normal pipeline (Steps 0-6 derive state). + # ``reuse_from is not None`` = restore Steps 0/1/2/5/6 state from + # prev_run snapshot via the u4 helpers wrapped by u4b's fail-closed + # contract, then fall through to the shared Step 7+ block below. + # The post-merge u1 guard has already rejected any layout / + # zone_geometry / zone_section / image override on the reuse path, + # so only --override-frame (handled at the Step 7-A axis below the + # branch) survives into this code path. + if reuse_from is None: + # ─── Step 0: 사전 준비 (precondition snapshot) ─── + # IMP-92 u4 — boot-time AI fallback preflight (gated on + # settings.ai_fallback_enabled; default OFF = skipped, no API call). + # Persistent setup errors raise Step0PreflightError before Step 1. + ai_preflight = _run_step0_ai_preflight() + _write_step_artifact( + run_dir, 0, "preconditions", + data={ + "v4_source": str(V4_RESULT_PATH.relative_to(PROJECT_ROOT)), + "templates_dir": str(TEMPLATE_DIR.relative_to(PROJECT_ROOT)), + "assets_source_base": str(ASSETS_SOURCE_BASE.relative_to(PROJECT_ROOT)), + "frame_contracts_loaded": len(load_frame_contracts()), + "frame_contracts_template_ids": sorted(load_frame_contracts().keys()), + "v4_label_to_phase_z_status": V4_LABEL_TO_PHASE_Z_STATUS, + "mvp1_allowed_statuses": sorted(MVP1_ALLOWED_STATUSES), + "ai_preflight": ai_preflight, + }, + step_status="partial", + pipeline_path_connected=True, + inputs=[ + "templates/phase_z2/catalog/frame_contracts.yaml", + "tests/matching/v4_full32_result.yaml", + "templates/phase_z2/families/*.html", + "figma_to_html_agent/blocks/", ], - "orphans": [], # schema lock — 중목차에 안 속한 텍스트 (감지 미구현) - "details": [], # schema lock —
팝업 콘텐츠 (감지 미구현) - # IMP-02 — additive only. enabled/used/fallback_reason + id reconstruction - # trace + count diff. Out of scope: V4 / align / composition. - "stage0_adapter_diagnostics": stage0_adapter_diagnostics, - # IMP-03 — Step 3 handoff (slide-level rich asset list). - # env=OFF / fallback 시 모든 list 가 비어 있음. consumer = Step 3 - # rich extractor (PHASE_Z_STEP3_RICH_OBJECTS_ENABLED canary). - "stage0_normalized_assets": stage0_normalized_assets, - }, - step_status="partial", - pipeline_path_connected=True, - inputs=["step01_mdx_source.md"], - outputs=["step02_normalized.json"], - note=( - "parse_mdx 결과: title / sections / footer 분리 + raw_content 보존. " - "heading tree 미생성, orphan / details 감지 미완 (Step 2 ⚠ partial — 별 axis). " - "orphans / details 필드는 schema lock — 빈 배열이라도 'detection 미수행' marker. " - "stage0_adapter_diagnostics = IMP-02 chained adapter trace (default OFF canary). " - "stage0_normalized_assets = IMP-03 Step 3 slide-level handoff (popups/images/tables list)." - ), - ) + outputs=["step00_preconditions.json"], + note=( + "frame_contracts.yaml 에 등록된 frame 만 mapping 가능. " + "V4 결과의 32 frame 중 다수 (F11/F14/F18 등) 미등록 — Step 0 ⚠ partial." + ), + ) - # 2. Load V4 - v4 = load_v4_result() + # ─── Step 1: MDX 업로드 ─── + mdx_source_text = mdx_path.read_text(encoding="utf-8") + (run_dir / "steps").mkdir(exist_ok=True) + (run_dir / "steps" / "step01_mdx_source.md").write_text(mdx_source_text, encoding="utf-8") + _write_step_artifact( + run_dir, 1, "mdx_upload", + data={ + "mdx_path": str(mdx_path), + "run_id": run_id, + "run_dir": str(run_dir), + "mdx_source_size_bytes": len(mdx_source_text.encode("utf-8")), + "mdx_source_lines": mdx_source_text.count("\n") + 1, + }, + step_status="done", + pipeline_path_connected=True, + inputs=[str(mdx_path)], + outputs=["step01_mdx_upload.json", "step01_mdx_source.md"], + note="MDX 원본 그대로 step01_mdx_source.md 에 복사.", + ) - # 3. Align sections to V4 granularity (### drill if needed). - # IMP-08 B-3 / Stage 5 R2 : forward override target ids so sub-id - # drag/drop targets force-drill their parent section even when V4 - # carries the parent exact key (deterministic drag/drop addressing). - _override_target_sids: list[str] = [] - if override_section_assignments: - for _sids in override_section_assignments.values(): - for _sid in _sids: - if isinstance(_sid, str) and _sid: - _override_target_sids.append(_sid) - sections = align_sections_to_v4_granularity( - sections, - v4, - override_target_section_ids=_override_target_sids or None, - ) - print(f" aligned : sections={len(sections)} ({[s.section_id for s in sections]})") + # 1. Parse MDX (V4 무관) + legacy_slide_title, legacy_sections, legacy_footer = parse_mdx(mdx_path) + # IMP-02 — Stage 0 chained adapter dispatch (default OFF canary). + # When env PHASE_Z_STAGE0_ADAPTER_ENABLED=1/true/yes the adapter chain + # (mdx_normalizer + section_parser) replaces legacy parse_mdx output; + # on any contract failure or exception, falls back to legacy with + # fallback_reason recorded in stage0_adapter_diagnostics. + # IMP-03 — 5-tuple return adds stage0_normalized_assets (Step 3 handoff). + ( + slide_title, + sections, + slide_footer, + stage0_adapter_diagnostics, + stage0_normalized_assets, + ) = _stage0_chained_adapter( + mdx_path, legacy_slide_title, legacy_sections, legacy_footer, + ) + _adapter_tag = ( + "adapter-used" if stage0_adapter_diagnostics["used"] + else f"legacy({stage0_adapter_diagnostics['fallback_reason'] or 'disabled'})" + ) + print(f" parsed : title='{slide_title}', sections={len(sections)} " + f"({[s.section_id for s in sections]}), footer={'yes' if slide_footer else 'no'}, " + f"stage0={_adapter_tag}") - # ─── Step 5: V4 매칭 evidence (non-reject max-6 후보 list — 사용자 lock 2026-05-08) ─── - v4_evidence_list = [] - for s in sections: - candidates = lookup_v4_candidates(v4, s.section_id) - v4_evidence_list.append({ - "section_id": s.section_id, - "v4_candidates": [ - { - "template_id": c.template_id, - "frame_id": c.frame_id, - "frame_number": c.frame_number, - "confidence": c.confidence, - "label": c.label, - } - for c in candidates - ], - "candidate_status": "ok" if candidates else "no_non_reject_v4_candidate", - }) - _write_step_artifact( - run_dir, 5, "v4_evidence", - data={ - "v4_source": str(V4_RESULT_PATH.relative_to(PROJECT_ROOT)), - "aligned_section_ids": [s.section_id for s in sections], - "evidence_per_section": v4_evidence_list, - }, - step_status="done", - pipeline_path_connected=True, - inputs=["step02_normalized.json", "tests/matching/v4_full32_result.yaml"], - outputs=["step05_v4_evidence.json"], - note=( - "V4 non-reject max-6 후보 list (Step 9 application_plan input). " - "raw 32 entry 는 tests/matching/v4_full32_result.yaml 에 영속. " - "candidate_status='ok' = 후보 1개 이상 / 'no_non_reject_v4_candidate' = " - "0개 (Step 9 fallback path 입력). " - "Step 6 plan_composition() 은 lookup_v4_match() (rank-1) 그대로 사용 — " - "backward compat (Step 6-A axis 까지)." - ), - ) + # ─── Step 2: MDX 정규화 ─── + # orphans / details 필드는 schema lock — 빈 배열이라도 박혀야 + # "검사 안 함" vs "없음" 구분 가능 (사용자 직설 2026-05-07). + # 실제 orphan / details 감지 로직은 별 axis (Step 2 보강). + _write_step_artifact( + run_dir, 2, "normalized", + data={ + "slide_title": slide_title, + "slide_footer": slide_footer, + "sections_count": len(sections), + "sections": [ + { + "section_id": s.section_id, + "section_num": s.section_num, + "title": s.title, + "raw_content_length": len(s.raw_content), + "raw_content": s.raw_content, + } + for s in sections + ], + "orphans": [], # schema lock — 중목차에 안 속한 텍스트 (감지 미구현) + "details": [], # schema lock —
팝업 콘텐츠 (감지 미구현) + # IMP-02 — additive only. enabled/used/fallback_reason + id reconstruction + # trace + count diff. Out of scope: V4 / align / composition. + "stage0_adapter_diagnostics": stage0_adapter_diagnostics, + # IMP-03 — Step 3 handoff (slide-level rich asset list). + # env=OFF / fallback 시 모든 list 가 비어 있음. consumer = Step 3 + # rich extractor (PHASE_Z_STEP3_RICH_OBJECTS_ENABLED canary). + "stage0_normalized_assets": stage0_normalized_assets, + }, + step_status="partial", + pipeline_path_connected=True, + inputs=["step01_mdx_source.md"], + outputs=["step02_normalized.json"], + note=( + "parse_mdx 결과: title / sections / footer 분리 + raw_content 보존. " + "heading tree 미생성, orphan / details 감지 미완 (Step 2 ⚠ partial — 별 axis). " + "orphans / details 필드는 schema lock — 빈 배열이라도 'detection 미수행' marker. " + "stage0_adapter_diagnostics = IMP-02 chained adapter trace (default OFF canary). " + "stage0_normalized_assets = IMP-03 Step 3 slide-level handoff (popups/images/tables list)." + ), + ) - # 4. Composition planner v0 — replaces per-section + select_layout_preset. - # candidate (separate / parent_merged) → score → greedy non-overlapping select → - # layout preset (count-based v0). - section_content_by_id = {s.section_id: s.raw_content for s in sections} - # IMP-08 B-3 : sub-section ordinal id -> legacy V4 key aliases (e.g. "04-2.1"). - # Empty list for canonical (top-level) sections — U1 baseline path is exact-only. - section_alias_by_id: dict[str, list] = { - s.section_id: list(getattr(s, "v4_alias_keys", []) or []) for s in sections - } - v4_fallback_traces: dict[str, dict] = {} + # 2. Load V4 + v4 = load_v4_result() - def lookup_fn(sid: str) -> Optional[V4Match]: - match, trace = lookup_v4_match_with_fallback( + # 3. Align sections to V4 granularity (### drill if needed). + # IMP-08 B-3 / Stage 5 R2 : forward override target ids so sub-id + # drag/drop targets force-drill their parent section even when V4 + # carries the parent exact key (deterministic drag/drop addressing). + _override_target_sids: list[str] = [] + if override_section_assignments: + for _sids in override_section_assignments.values(): + for _sid in _sids: + if isinstance(_sid, str) and _sid: + _override_target_sids.append(_sid) + sections = align_sections_to_v4_granularity( + sections, v4, - sid, - raw_content=section_content_by_id.get(sid), - alias_keys=section_alias_by_id.get(sid), + override_target_section_ids=_override_target_sids or None, ) - v4_fallback_traces[sid] = trace - return match + print(f" aligned : sections={len(sections)} ({[s.section_id for s in sections]})") - # Step 6-A axis (사용자 lock 2026-05-08) — V4 raw dict 흡수 fn. - # composition module 은 V4 yaml shape 모름. 본 fn 만 통해 후보 list 받음. - def candidates_lookup_fn(sid: str) -> list[V4Match]: - return lookup_v4_candidates(v4, sid, alias_keys=section_alias_by_id.get(sid)) + # ─── Step 5: V4 매칭 evidence (non-reject max-6 후보 list — 사용자 lock 2026-05-08) ─── + v4_evidence_list = [] + for s in sections: + candidates = lookup_v4_candidates(v4, s.section_id) + v4_evidence_list.append({ + "section_id": s.section_id, + "v4_candidates": [ + { + "template_id": c.template_id, + "frame_id": c.frame_id, + "frame_number": c.frame_number, + "confidence": c.confidence, + "label": c.label, + } + for c in candidates + ], + "candidate_status": "ok" if candidates else "no_non_reject_v4_candidate", + }) + _write_step_artifact( + run_dir, 5, "v4_evidence", + data={ + "v4_source": str(V4_RESULT_PATH.relative_to(PROJECT_ROOT)), + "aligned_section_ids": [s.section_id for s in sections], + "evidence_per_section": v4_evidence_list, + }, + step_status="done", + pipeline_path_connected=True, + inputs=["step02_normalized.json", "tests/matching/v4_full32_result.yaml"], + outputs=["step05_v4_evidence.json"], + note=( + "V4 non-reject max-6 후보 list (Step 9 application_plan input). " + "raw 32 entry 는 tests/matching/v4_full32_result.yaml 에 영속. " + "candidate_status='ok' = 후보 1개 이상 / 'no_non_reject_v4_candidate' = " + "0개 (Step 9 fallback path 입력). " + "Step 6 plan_composition() 은 lookup_v4_match() (rank-1) 그대로 사용 — " + "backward compat (Step 6-A axis 까지)." + ), + ) - units, layout_preset, comp_debug = plan_composition( - sections, lookup_fn, V4_LABEL_TO_PHASE_Z_STATUS, MVP1_ALLOWED_STATUSES, - capacity_fit_fn=compute_capacity_fit, - v4_candidates_lookup_fn=candidates_lookup_fn, - ) - comp_debug["v4_fallback_selections"] = list(v4_fallback_traces.values()) - # IMP-05 L3 (Codex #10 D4) — Step 20 qualifier fields (additive only, no top-level enum change). - # `fallback_selection_count` = number of sections where rank-2/3 was promoted. - # `selection_paths` = per-section selection_path summary (rank_1 / rank_N_fallback / chain_exhausted). - # Top-level slide status enum (PASS / PARTIAL_COVERAGE / ...) remains stable. - _imp05_selection_paths = [ - { - "section_id": sid, - "selection_path": t.get("selection_path"), - "selected_rank": t.get("selected_rank"), - "selected_template_id": t.get("selected_template_id"), - "fallback_trigger": t.get("fallback_reason") if t.get("fallback_used") else None, + # 4. Composition planner v0 — replaces per-section + select_layout_preset. + # candidate (separate / parent_merged) → score → greedy non-overlapping select → + # layout preset (count-based v0). + section_content_by_id = {s.section_id: s.raw_content for s in sections} + # IMP-08 B-3 : sub-section ordinal id -> legacy V4 key aliases (e.g. "04-2.1"). + # Empty list for canonical (top-level) sections — U1 baseline path is exact-only. + section_alias_by_id: dict[str, list] = { + s.section_id: list(getattr(s, "v4_alias_keys", []) or []) for s in sections } - for sid, t in v4_fallback_traces.items() - ] - comp_debug["v4_fallback_summary"] = { - "fallback_used_count": sum(1 for t in v4_fallback_traces.values() if t.get("fallback_used")), - "fallback_selection_count": sum(1 for t in v4_fallback_traces.values() if t.get("fallback_used")), - "chain_exhausted_count": sum( - 1 for t in v4_fallback_traces.values() - if t.get("selection_path") == "chain_exhausted" - ), - "selection_paths": _imp05_selection_paths, - "policy": ( - "IMP-05: rank-1 is kept when usable; rank-2/3 may be promoted only when " - "the earlier rank is not auto-renderable, has no catalog contract, or fails " - "capacity precheck. calculate_fit is not used." - ), - } + v4_fallback_traces: dict[str, dict] = {} - # IMP-47B u12 — mixed direct+reject first-render admission. - # When initial plan_composition produces a viable layout but at least one - # section remains uncovered (typically chain_exhausted / reject), re-run - # with allow_provisional in the lookup + allow_provisional_fill=True so - # reject sections gain a provisional rank-1 V4Match and a last-resort - # provisional candidate fill. This admits the mixed direct+reject case - # to the AI repair path (IMP-47B u4/u5) on first render. Skipped under - # --override-section-assignments to preserve the operator's plan and - # mirror the IMP-30 u4 retry's section_assignment_plan gate. All-direct - # slides have no uncovered sections so this is a no-op. The all-reject - # case is still handled by the IMP-30 u4 retry block below (initial - # plan_composition returns units=[]). - if units and layout_preset is not None and not override_section_assignments: - _u12_covered_ids: set[str] = set() - for _u in units: - _u12_covered_ids.update(_u.source_section_ids) - _u12_uncovered_ids = [ - s.section_id for s in sections if s.section_id not in _u12_covered_ids + def lookup_fn(sid: str) -> Optional[V4Match]: + match, trace = lookup_v4_match_with_fallback( + v4, + sid, + raw_content=section_content_by_id.get(sid), + alias_keys=section_alias_by_id.get(sid), + ) + v4_fallback_traces[sid] = trace + return match + + # Step 6-A axis (사용자 lock 2026-05-08) — V4 raw dict 흡수 fn. + # composition module 은 V4 yaml shape 모름. 본 fn 만 통해 후보 list 받음. + def candidates_lookup_fn(sid: str) -> list[V4Match]: + return lookup_v4_candidates(v4, sid, alias_keys=section_alias_by_id.get(sid)) + + units, layout_preset, comp_debug = plan_composition( + sections, lookup_fn, V4_LABEL_TO_PHASE_Z_STATUS, MVP1_ALLOWED_STATUSES, + capacity_fit_fn=compute_capacity_fit, + v4_candidates_lookup_fn=candidates_lookup_fn, + ) + comp_debug["v4_fallback_selections"] = list(v4_fallback_traces.values()) + # IMP-05 L3 (Codex #10 D4) — Step 20 qualifier fields (additive only, no top-level enum change). + # `fallback_selection_count` = number of sections where rank-2/3 was promoted. + # `selection_paths` = per-section selection_path summary (rank_1 / rank_N_fallback / chain_exhausted). + # Top-level slide status enum (PASS / PARTIAL_COVERAGE / ...) remains stable. + _imp05_selection_paths = [ + { + "section_id": sid, + "selection_path": t.get("selection_path"), + "selected_rank": t.get("selected_rank"), + "selected_template_id": t.get("selected_template_id"), + "fallback_trigger": t.get("fallback_reason") if t.get("fallback_used") else None, + } + for sid, t in v4_fallback_traces.items() ] - if _u12_uncovered_ids: - def _lookup_fn_mixed_admission(sid: str) -> Optional[V4Match]: - match, trace = lookup_v4_match_with_fallback( - v4, - sid, - raw_content=section_content_by_id.get(sid), - alias_keys=section_alias_by_id.get(sid), - allow_provisional=True, + comp_debug["v4_fallback_summary"] = { + "fallback_used_count": sum(1 for t in v4_fallback_traces.values() if t.get("fallback_used")), + "fallback_selection_count": sum(1 for t in v4_fallback_traces.values() if t.get("fallback_used")), + "chain_exhausted_count": sum( + 1 for t in v4_fallback_traces.values() + if t.get("selection_path") == "chain_exhausted" + ), + "selection_paths": _imp05_selection_paths, + "policy": ( + "IMP-05: rank-1 is kept when usable; rank-2/3 may be promoted only when " + "the earlier rank is not auto-renderable, has no catalog contract, or fails " + "capacity precheck. calculate_fit is not used." + ), + } + + # IMP-47B u12 — mixed direct+reject first-render admission. + # When initial plan_composition produces a viable layout but at least one + # section remains uncovered (typically chain_exhausted / reject), re-run + # with allow_provisional in the lookup + allow_provisional_fill=True so + # reject sections gain a provisional rank-1 V4Match and a last-resort + # provisional candidate fill. This admits the mixed direct+reject case + # to the AI repair path (IMP-47B u4/u5) on first render. Skipped under + # --override-section-assignments to preserve the operator's plan and + # mirror the IMP-30 u4 retry's section_assignment_plan gate. All-direct + # slides have no uncovered sections so this is a no-op. The all-reject + # case is still handled by the IMP-30 u4 retry block below (initial + # plan_composition returns units=[]). + if units and layout_preset is not None and not override_section_assignments: + _u12_covered_ids: set[str] = set() + for _u in units: + _u12_covered_ids.update(_u.source_section_ids) + _u12_uncovered_ids = [ + s.section_id for s in sections if s.section_id not in _u12_covered_ids + ] + if _u12_uncovered_ids: + def _lookup_fn_mixed_admission(sid: str) -> Optional[V4Match]: + match, trace = lookup_v4_match_with_fallback( + v4, + sid, + raw_content=section_content_by_id.get(sid), + alias_keys=section_alias_by_id.get(sid), + allow_provisional=True, + ) + v4_fallback_traces[sid] = trace + return match + + units_mixed, layout_preset_mixed, _comp_debug_mixed = plan_composition( + sections, + _lookup_fn_mixed_admission, + V4_LABEL_TO_PHASE_Z_STATUS, + MVP1_ALLOWED_STATUSES, + capacity_fit_fn=compute_capacity_fit, + v4_candidates_lookup_fn=candidates_lookup_fn, + allow_provisional_fill=True, ) - v4_fallback_traces[sid] = trace - return match + if units_mixed and layout_preset_mixed is not None: + units = units_mixed + layout_preset = layout_preset_mixed + comp_debug["v4_fallback_selections"] = list(v4_fallback_traces.values()) + comp_debug["imp47b_u12_mixed_admission"] = { + "applied": True, + "uncovered_before": _u12_uncovered_ids, + "result_unit_count": len(units_mixed), + "result_layout_preset": layout_preset_mixed, + } - units_mixed, layout_preset_mixed, _comp_debug_mixed = plan_composition( - sections, - _lookup_fn_mixed_admission, - V4_LABEL_TO_PHASE_Z_STATUS, - MVP1_ALLOWED_STATUSES, - capacity_fit_fn=compute_capacity_fit, - v4_candidates_lookup_fn=candidates_lookup_fn, - allow_provisional_fill=True, + # ── Step 7-A axis : layout override ── + # 사용자가 LayoutPanel 에서 다른 preset 을 선택했을 때 자동 결정값을 강제 변경. + # 길이 mismatch (positions count vs unit count) 는 zone loop 의 fallback (zone_{i}) + # 으로 처리됨. 알 수 없는 preset 이면 ValueError. + auto_layout_preset = layout_preset + layout_override_applied = False + if override_layout is not None and override_layout != layout_preset: + if override_layout not in LAYOUT_PRESETS: + raise ValueError( + f"--override-layout '{override_layout}' is not a known preset. " + f"Available: {sorted(LAYOUT_PRESETS.keys())}" + ) + print( + f" [override] layout_preset: {layout_preset} → {override_layout}", + file=sys.stderr, ) - if units_mixed and layout_preset_mixed is not None: - units = units_mixed - layout_preset = layout_preset_mixed - comp_debug["v4_fallback_selections"] = list(v4_fallback_traces.values()) - comp_debug["imp47b_u12_mixed_admission"] = { - "applied": True, - "uncovered_before": _u12_uncovered_ids, - "result_unit_count": len(units_mixed), - "result_layout_preset": layout_preset_mixed, - } + layout_preset = override_layout + layout_override_applied = True - # ── Step 7-A axis : layout override ── - # 사용자가 LayoutPanel 에서 다른 preset 을 선택했을 때 자동 결정값을 강제 변경. - # 길이 mismatch (positions count vs unit count) 는 zone loop 의 fallback (zone_{i}) - # 으로 처리됨. 알 수 없는 preset 이면 ValueError. - auto_layout_preset = layout_preset - layout_override_applied = False - if override_layout is not None and override_layout != layout_preset: - if override_layout not in LAYOUT_PRESETS: - raise ValueError( - f"--override-layout '{override_layout}' is not a known preset. " - f"Available: {sorted(LAYOUT_PRESETS.keys())}" - ) - print( - f" [override] layout_preset: {layout_preset} → {override_layout}", - file=sys.stderr, - ) - layout_preset = override_layout - layout_override_applied = True - - # IMP-06 (#6 / Codex #6/#7/#10/#11/#12 lock) — zone-section assignment override. - # Applied AFTER final layout_preset resolution. ZONE_ID = layout positions. - # The helper validates unknown zone ids / unknown section ids and builds a - # `position_assignment_plan`. Immediately below, Stage 4 Part 2 rebuilds the - # `units` list aligned with that plan: cli_override entries synthesize a - # CompositionUnit, auto entries reuse the original planner unit, and empty/ - # collision-skipped entries become None placeholders. The downstream - # zones_data / debug_zones loop then handles None entries by emitting an - # explicit empty zone record (template_id="__empty__") so the slide grid - # preserves position identity without distorting layout allocation. - section_assignment_plan: Optional[list[dict]] = None - section_assignment_summary: Optional[dict] = None - if override_section_assignments and layout_preset is not None: - positions = list(LAYOUT_PRESETS[layout_preset]["positions"]) - # Validate ZONE_IDs against active layout positions (fail-fast). - unknown_zones = [z for z in override_section_assignments if z not in positions] - if unknown_zones: - raise ValueError( - f"--override-section-assignment unknown ZONE_ID(s) {unknown_zones} for " - f"layout '{layout_preset}'. Available positions: {positions}" - ) - # Validate section_ids against aligned sections (fail-fast). - aligned_section_ids = {s.section_id for s in sections} - sections_by_id = {s.section_id: s for s in sections} - unknown_sections: list[str] = [] - for zid, sids in override_section_assignments.items(): - for sid in sids: - if sid not in aligned_section_ids: - unknown_sections.append(sid) - if unknown_sections: - raise ValueError( - f"--override-section-assignment unknown section_id(s) {unknown_sections}. " - f"Aligned sections: {sorted(aligned_section_ids)}" - ) - section_assignment_plan, section_assignment_summary = _build_position_assignment_plan( - units=units, - positions=positions, - override_section_assignments=override_section_assignments, - sections_by_id=sections_by_id, - override_frames=override_frames, - v4=v4, - ) - comp_debug["section_assignment_plan"] = section_assignment_plan - comp_debug["section_assignment_summary"] = section_assignment_summary - print( - f" [override] section_assignment applied: " - f"{section_assignment_summary['applied_count']} position(s), " - f"{section_assignment_summary['skipped_count']} skipped, " - f"uncovered_sections={section_assignment_summary['uncovered_section_ids']}", - file=sys.stderr, - ) - - # Stage 4 blocker-fix (Codex #13/#14/#15/#16/#17) — rebuild units as a pure - # `list[CompositionUnit]` (renderable only, no None). Position-aware truth - # lives in `render_records` (built after frame_overrides apply) per Codex - # internal contract: units = canonical renderable list, render_records = - # canonical per-position view including empty/skipped entries. - from src.phase_z2_composition import CompositionUnit - plan_units: list = [] - # Maintain ordered alignment with section_assignment_plan for the - # render_records build step below: plan_unit_by_position[pos] = unit | None. - plan_unit_by_position: dict[str, object] = {} - for entry in section_assignment_plan: - assignment_source = entry["assignment_source"] - pos = entry["position"] - if assignment_source == "cli_override" and entry["template_id"] is not None: - sids = entry["source_section_ids"] - raw_content_parts = [] - title_parts = [] + # IMP-06 (#6 / Codex #6/#7/#10/#11/#12 lock) — zone-section assignment override. + # Applied AFTER final layout_preset resolution. ZONE_ID = layout positions. + # The helper validates unknown zone ids / unknown section ids and builds a + # `position_assignment_plan`. Immediately below, Stage 4 Part 2 rebuilds the + # `units` list aligned with that plan: cli_override entries synthesize a + # CompositionUnit, auto entries reuse the original planner unit, and empty/ + # collision-skipped entries become None placeholders. The downstream + # zones_data / debug_zones loop then handles None entries by emitting an + # explicit empty zone record (template_id="__empty__") so the slide grid + # preserves position identity without distorting layout allocation. + section_assignment_plan: Optional[list[dict]] = None + section_assignment_summary: Optional[dict] = None + if override_section_assignments and layout_preset is not None: + positions = list(LAYOUT_PRESETS[layout_preset]["positions"]) + # Validate ZONE_IDs against active layout positions (fail-fast). + unknown_zones = [z for z in override_section_assignments if z not in positions] + if unknown_zones: + raise ValueError( + f"--override-section-assignment unknown ZONE_ID(s) {unknown_zones} for " + f"layout '{layout_preset}'. Available positions: {positions}" + ) + # Validate section_ids against aligned sections (fail-fast). + aligned_section_ids = {s.section_id for s in sections} + sections_by_id = {s.section_id: s for s in sections} + unknown_sections: list[str] = [] + for zid, sids in override_section_assignments.items(): for sid in sids: - sect = sections_by_id.get(sid) - if sect is None: - continue - raw_content_parts.append(sect.raw_content or "") - if sect.title: - title_parts.append(sect.title) - contract = get_contract(entry["template_id"]) - contract_frame_id = (contract or {}).get("frame_id") or "" - override_unit = CompositionUnit( - source_section_ids=list(sids), - merge_type="cli_override", - frame_template_id=entry["template_id"], - frame_id=str(contract_frame_id), + if sid not in aligned_section_ids: + unknown_sections.append(sid) + if unknown_sections: + raise ValueError( + f"--override-section-assignment unknown section_id(s) {unknown_sections}. " + f"Aligned sections: {sorted(aligned_section_ids)}" + ) + section_assignment_plan, section_assignment_summary = _build_position_assignment_plan( + units=units, + positions=positions, + override_section_assignments=override_section_assignments, + sections_by_id=sections_by_id, + override_frames=override_frames, + v4=v4, + ) + comp_debug["section_assignment_plan"] = section_assignment_plan + comp_debug["section_assignment_summary"] = section_assignment_summary + print( + f" [override] section_assignment applied: " + f"{section_assignment_summary['applied_count']} position(s), " + f"{section_assignment_summary['skipped_count']} skipped, " + f"uncovered_sections={section_assignment_summary['uncovered_section_ids']}", + file=sys.stderr, + ) + + # Stage 4 blocker-fix (Codex #13/#14/#15/#16/#17) — rebuild units as a pure + # `list[CompositionUnit]` (renderable only, no None). Position-aware truth + # lives in `render_records` (built after frame_overrides apply) per Codex + # internal contract: units = canonical renderable list, render_records = + # canonical per-position view including empty/skipped entries. + from src.phase_z2_composition import CompositionUnit + plan_units: list = [] + # Maintain ordered alignment with section_assignment_plan for the + # render_records build step below: plan_unit_by_position[pos] = unit | None. + plan_unit_by_position: dict[str, object] = {} + for entry in section_assignment_plan: + assignment_source = entry["assignment_source"] + pos = entry["position"] + if assignment_source == "cli_override" and entry["template_id"] is not None: + sids = entry["source_section_ids"] + raw_content_parts = [] + title_parts = [] + for sid in sids: + sect = sections_by_id.get(sid) + if sect is None: + continue + raw_content_parts.append(sect.raw_content or "") + if sect.title: + title_parts.append(sect.title) + contract = get_contract(entry["template_id"]) + contract_frame_id = (contract or {}).get("frame_id") or "" + override_unit = CompositionUnit( + source_section_ids=list(sids), + merge_type="cli_override", + frame_template_id=entry["template_id"], + frame_id=str(contract_frame_id), + frame_number=0, + confidence=0.0, + label="use_as_is", + phase_z_status="matched_zone", + raw_content="\n\n".join(raw_content_parts), + title=" / ".join(title_parts) if title_parts else "+".join(sids), + v4_rank=None, + selection_path="cli_override", + fallback_reason=None, + score=0.0, + rationale={ + "section_assignment_override": entry["section_assignment_override"], + "replaced_auto_unit": entry["replaced_auto_unit"], + }, + ) + plan_units.append(override_unit) + plan_unit_by_position[pos] = override_unit + elif assignment_source == "auto": + # Find original auto unit by source_section_ids. + matched = None + for u in units: + if list(u.source_section_ids) == entry["source_section_ids"]: + matched = u + break + if matched is not None: + plan_units.append(matched) + plan_unit_by_position[pos] = matched + else: + # Unexpected — auto plan entry without a matching original unit. + plan_unit_by_position[pos] = None + else: + # empty / collision-skipped — NO None in units list, but the position + # is preserved in plan_unit_by_position so render_records can emit + # an empty zone record below (after frame_overrides apply). + plan_unit_by_position[pos] = None + units = plan_units + + if not units or layout_preset is None: + # IMP-30 u4 — first-render invariant. The pre-u4 path here was + # `sys.exit(1)` after writing error.json. That violated the invariant + # ("final.html + Step 20 slide_status MUST be written for every input + # where Step 0~5 succeed") whenever V4 evidence for any section was + # restructure/reject (chain_exhausted) or missing (no_v4_section / + # empty_v4_judgments). + # + # Recovery has two phases: + # Phase A — provisional retry (u1 + u3 opt-in). Re-run plan_composition + # with allow_provisional=True (in lookup_fn) and allow_provisional_fill + # =True. Synthesizes rank-1 provisional V4Match on chain_exhausted + # (u1) and last-resort-fills uncovered sections with provisional + # candidates (u3). Skipped when the CLI override path was used — + # re-running plan_composition there would discard the override. + # Phase B — terminal empty-shell. If retry still yields zero units + # (true "no rank-1 V4 anywhere" case, or override path with no + # resolvable assignments), synthesize a single placeholder + # CompositionUnit with frame_template_id="__empty__", layout_preset + # ="single". The per-unit loop's __empty__ guard emits a placeholder + # zones_data / debug_zones record; final.html renders the slide + # base shell (title + footer + empty zone) so the first-render + # invariant holds. Provisional flag = True surfaces the "needs + # adaptation" signal (u5 zone class + u6 status qualifier). + provisional_recovered = False + if section_assignment_plan is None: + def _lookup_fn_provisional(sid: str) -> Optional[V4Match]: + match, trace = lookup_v4_match_with_fallback( + v4, + sid, + raw_content=section_content_by_id.get(sid), + alias_keys=section_alias_by_id.get(sid), + allow_provisional=True, + ) + v4_fallback_traces[sid] = trace + return match + + units_retry, layout_preset_retry, comp_debug_retry = plan_composition( + sections, + _lookup_fn_provisional, + V4_LABEL_TO_PHASE_Z_STATUS, + MVP1_ALLOWED_STATUSES, + capacity_fit_fn=compute_capacity_fit, + v4_candidates_lookup_fn=candidates_lookup_fn, + allow_provisional_fill=True, + ) + comp_debug["imp30_u4_provisional_retry"] = { + "applied": True, + "result_unit_count": len(units_retry), + "result_layout_preset": layout_preset_retry, + "candidates_summary": comp_debug_retry.get("candidates_summary"), + } + if units_retry and layout_preset_retry is not None: + units = units_retry + layout_preset = layout_preset_retry + provisional_recovered = True + # v4_fallback_traces was overwritten by _lookup_fn_provisional; + # refresh the IMP-05 selection_paths telemetry so Step 20 reflects + # the actual selection (provisional_rank_1) rather than the stale + # chain_exhausted state from the first attempt. + _imp05_selection_paths_retry = [ + { + "section_id": sid, + "selection_path": t.get("selection_path"), + "selected_rank": t.get("selected_rank"), + "selected_template_id": t.get("selected_template_id"), + "fallback_trigger": ( + t.get("fallback_reason") if t.get("fallback_used") else None + ), + } + for sid, t in v4_fallback_traces.items() + ] + comp_debug["v4_fallback_selections"] = list(v4_fallback_traces.values()) + if "v4_fallback_summary" in comp_debug: + comp_debug["v4_fallback_summary"]["selection_paths"] = ( + _imp05_selection_paths_retry + ) + print( + f" [IMP-30 u4] provisional retry recovered {len(units)} unit(s) " + f"— first-render invariant preserved.", + file=sys.stderr, + ) + + if not provisional_recovered: + # Phase B — terminal empty-shell. No rank-1 V4 evidence for any + # section, or override path produced no renderable assignments. + from src.phase_z2_composition import CompositionUnit as _CompositionUnit + run_dir.mkdir(parents=True, exist_ok=True) + empty_shell_unit = _CompositionUnit( + source_section_ids=[s.section_id for s in sections], + merge_type="empty_shell", + frame_template_id="__empty__", + frame_id="__empty__", frame_number=0, confidence=0.0, - label="use_as_is", - phase_z_status="matched_zone", - raw_content="\n\n".join(raw_content_parts), - title=" / ".join(title_parts) if title_parts else "+".join(sids), + label="empty_shell", + phase_z_status="empty_shell", + raw_content="\n\n".join((s.raw_content or "") for s in sections), + title=" / ".join((s.title or "") for s in sections), v4_rank=None, - selection_path="cli_override", - fallback_reason=None, + selection_path="empty_shell", + fallback_reason="no_v4_rank_1_for_any_section", score=0.0, rationale={ - "section_assignment_override": entry["section_assignment_override"], - "replaced_auto_unit": entry["replaced_auto_unit"], - }, - ) - plan_units.append(override_unit) - plan_unit_by_position[pos] = override_unit - elif assignment_source == "auto": - # Find original auto unit by source_section_ids. - matched = None - for u in units: - if list(u.source_section_ids) == entry["source_section_ids"]: - matched = u - break - if matched is not None: - plan_units.append(matched) - plan_unit_by_position[pos] = matched - else: - # Unexpected — auto plan entry without a matching original unit. - plan_unit_by_position[pos] = None - else: - # empty / collision-skipped — NO None in units list, but the position - # is preserved in plan_unit_by_position so render_records can emit - # an empty zone record below (after frame_overrides apply). - plan_unit_by_position[pos] = None - units = plan_units - - if not units or layout_preset is None: - # IMP-30 u4 — first-render invariant. The pre-u4 path here was - # `sys.exit(1)` after writing error.json. That violated the invariant - # ("final.html + Step 20 slide_status MUST be written for every input - # where Step 0~5 succeed") whenever V4 evidence for any section was - # restructure/reject (chain_exhausted) or missing (no_v4_section / - # empty_v4_judgments). - # - # Recovery has two phases: - # Phase A — provisional retry (u1 + u3 opt-in). Re-run plan_composition - # with allow_provisional=True (in lookup_fn) and allow_provisional_fill - # =True. Synthesizes rank-1 provisional V4Match on chain_exhausted - # (u1) and last-resort-fills uncovered sections with provisional - # candidates (u3). Skipped when the CLI override path was used — - # re-running plan_composition there would discard the override. - # Phase B — terminal empty-shell. If retry still yields zero units - # (true "no rank-1 V4 anywhere" case, or override path with no - # resolvable assignments), synthesize a single placeholder - # CompositionUnit with frame_template_id="__empty__", layout_preset - # ="single". The per-unit loop's __empty__ guard emits a placeholder - # zones_data / debug_zones record; final.html renders the slide - # base shell (title + footer + empty zone) so the first-render - # invariant holds. Provisional flag = True surfaces the "needs - # adaptation" signal (u5 zone class + u6 status qualifier). - provisional_recovered = False - if section_assignment_plan is None: - def _lookup_fn_provisional(sid: str) -> Optional[V4Match]: - match, trace = lookup_v4_match_with_fallback( - v4, - sid, - raw_content=section_content_by_id.get(sid), - alias_keys=section_alias_by_id.get(sid), - allow_provisional=True, - ) - v4_fallback_traces[sid] = trace - return match - - units_retry, layout_preset_retry, comp_debug_retry = plan_composition( - sections, - _lookup_fn_provisional, - V4_LABEL_TO_PHASE_Z_STATUS, - MVP1_ALLOWED_STATUSES, - capacity_fit_fn=compute_capacity_fit, - v4_candidates_lookup_fn=candidates_lookup_fn, - allow_provisional_fill=True, - ) - comp_debug["imp30_u4_provisional_retry"] = { - "applied": True, - "result_unit_count": len(units_retry), - "result_layout_preset": layout_preset_retry, - "candidates_summary": comp_debug_retry.get("candidates_summary"), - } - if units_retry and layout_preset_retry is not None: - units = units_retry - layout_preset = layout_preset_retry - provisional_recovered = True - # v4_fallback_traces was overwritten by _lookup_fn_provisional; - # refresh the IMP-05 selection_paths telemetry so Step 20 reflects - # the actual selection (provisional_rank_1) rather than the stale - # chain_exhausted state from the first attempt. - _imp05_selection_paths_retry = [ - { - "section_id": sid, - "selection_path": t.get("selection_path"), - "selected_rank": t.get("selected_rank"), - "selected_template_id": t.get("selected_template_id"), - "fallback_trigger": ( - t.get("fallback_reason") if t.get("fallback_used") else None + "imp30_u4": "terminal_first_render_empty_shell", + "reason": ( + "no_rank_1_V4_evidence_in_any_section" + if section_assignment_plan is None + else "section_assignment_override_yielded_no_renderable_units" ), - } - for sid, t in v4_fallback_traces.items() - ] - comp_debug["v4_fallback_selections"] = list(v4_fallback_traces.values()) - if "v4_fallback_summary" in comp_debug: - comp_debug["v4_fallback_summary"]["selection_paths"] = ( - _imp05_selection_paths_retry - ) - print( - f" [IMP-30 u4] provisional retry recovered {len(units)} unit(s) " - f"— first-render invariant preserved.", - file=sys.stderr, + "aligned_section_ids": [s.section_id for s in sections], + }, + provisional=True, ) - - if not provisional_recovered: - # Phase B — terminal empty-shell. No rank-1 V4 evidence for any - # section, or override path produced no renderable assignments. - from src.phase_z2_composition import CompositionUnit as _CompositionUnit - run_dir.mkdir(parents=True, exist_ok=True) - empty_shell_unit = _CompositionUnit( - source_section_ids=[s.section_id for s in sections], - merge_type="empty_shell", - frame_template_id="__empty__", - frame_id="__empty__", - frame_number=0, - confidence=0.0, - label="empty_shell", - phase_z_status="empty_shell", - raw_content="\n\n".join((s.raw_content or "") for s in sections), - title=" / ".join((s.title or "") for s in sections), - v4_rank=None, - selection_path="empty_shell", - fallback_reason="no_v4_rank_1_for_any_section", - score=0.0, - rationale={ - "imp30_u4": "terminal_first_render_empty_shell", + units = [empty_shell_unit] + layout_preset = "single" + comp_debug["imp30_u4_empty_shell"] = { + "applied": True, "reason": ( - "no_rank_1_V4_evidence_in_any_section" + "no_rank_1_V4_for_any_section" if section_assignment_plan is None else "section_assignment_override_yielded_no_renderable_units" ), "aligned_section_ids": [s.section_id for s in sections], - }, - provisional=True, - ) - units = [empty_shell_unit] - layout_preset = "single" - comp_debug["imp30_u4_empty_shell"] = { - "applied": True, - "reason": ( - "no_rank_1_V4_for_any_section" - if section_assignment_plan is None - else "section_assignment_override_yielded_no_renderable_units" - ), - "aligned_section_ids": [s.section_id for s in sections], - } - print( - f"\n[Phase Z-2 IMP-30 u4] EMPTY-SHELL @ composition_planner", - file=sys.stderr, - ) - print( - f" reason : " - f"{'no rank-1 V4 evidence for any section' if section_assignment_plan is None else 'override produced no renderable units'}", - file=sys.stderr, - ) - print( - f" shell : 1 placeholder unit, preset='single' " - f"(sections={[s.section_id for s in sections]})", - file=sys.stderr, - ) - - # IMP-48 (#77) — re-split merged-reject units into per-section singles. - # One-shot, deterministic (AI=0) post-pass. Fires AFTER all Step 6 settling - # chains (initial plan_composition / u12 mixed admission / u4 provisional - # retry / empty-shell) and AFTER section_assignment_plan is known, but - # BEFORE the Step 6 artifact write below — so the artifact reflects the - # post-resplit unit list. SKIPS when --override-section-assignments is - # active (IMP-06 / #6 is the ground truth). Helper guardrails (coverage - # equality / beneficial split / layout cap ≤ 4) keep mdx03 byte-identical - # (no-op on use_as_is / light_edit slides). u5 re-derives layout_preset - # below using the audit payload. - units, _imp48_audit = resplit_all_reject_merges( - units, - sections, - lookup_fn, - V4_LABEL_TO_PHASE_Z_STATUS, - MVP1_ALLOWED_STATUSES, - capacity_fit_fn=compute_capacity_fit, - v4_candidates_lookup_fn=candidates_lookup_fn, - section_assignment_override=section_assignment_plan is not None, - ) - comp_debug["imp48_resplit"] = _imp48_audit - # u5 — re-derive layout_preset from helper audit (post-split count via - # select_layout_preset(out_units)). Helper guarantees post_split_unit_count - # ≤ 4 (layout cap abort), so the derived preset is always renderable by - # LAYOUT_PRESETS. Respect --override-layout when present (user's explicit - # choice wins over auto-redrive; mirrors the override gate above at L3697). - if _imp48_audit.get("applied"): - _imp48_post_preset = _imp48_audit.get("post_split_layout_preset") - if _imp48_post_preset and not layout_override_applied: - if _imp48_post_preset != layout_preset: + } print( - f" [IMP-48] layout_preset re-derived: {layout_preset} → " - f"{_imp48_post_preset} (post-split unit count=" - f"{_imp48_audit.get('post_split_unit_count')})", + f"\n[Phase Z-2 IMP-30 u4] EMPTY-SHELL @ composition_planner", file=sys.stderr, ) - layout_preset = _imp48_post_preset - print( - f" [IMP-48] re-split applied — " - f"split={len(_imp48_audit.get('split_units', []))} " - f"skipped={len(_imp48_audit.get('skipped_units', []))} " - f"post_count={_imp48_audit.get('post_split_unit_count')} " - f"post_preset={_imp48_audit.get('post_split_layout_preset')!r}", - file=sys.stderr, + print( + f" reason : " + f"{'no rank-1 V4 evidence for any section' if section_assignment_plan is None else 'override produced no renderable units'}", + file=sys.stderr, + ) + print( + f" shell : 1 placeholder unit, preset='single' " + f"(sections={[s.section_id for s in sections]})", + file=sys.stderr, + ) + + # IMP-48 (#77) — re-split merged-reject units into per-section singles. + # One-shot, deterministic (AI=0) post-pass. Fires AFTER all Step 6 settling + # chains (initial plan_composition / u12 mixed admission / u4 provisional + # retry / empty-shell) and AFTER section_assignment_plan is known, but + # BEFORE the Step 6 artifact write below — so the artifact reflects the + # post-resplit unit list. SKIPS when --override-section-assignments is + # active (IMP-06 / #6 is the ground truth). Helper guardrails (coverage + # equality / beneficial split / layout cap ≤ 4) keep mdx03 byte-identical + # (no-op on use_as_is / light_edit slides). u5 re-derives layout_preset + # below using the audit payload. + units, _imp48_audit = resplit_all_reject_merges( + units, + sections, + lookup_fn, + V4_LABEL_TO_PHASE_Z_STATUS, + MVP1_ALLOWED_STATUSES, + capacity_fit_fn=compute_capacity_fit, + v4_candidates_lookup_fn=candidates_lookup_fn, + section_assignment_override=section_assignment_plan is not None, + ) + comp_debug["imp48_resplit"] = _imp48_audit + # u5 — re-derive layout_preset from helper audit (post-split count via + # select_layout_preset(out_units)). Helper guarantees post_split_unit_count + # ≤ 4 (layout cap abort), so the derived preset is always renderable by + # LAYOUT_PRESETS. Respect --override-layout when present (user's explicit + # choice wins over auto-redrive; mirrors the override gate above at L3697). + if _imp48_audit.get("applied"): + _imp48_post_preset = _imp48_audit.get("post_split_layout_preset") + if _imp48_post_preset and not layout_override_applied: + if _imp48_post_preset != layout_preset: + print( + f" [IMP-48] layout_preset re-derived: {layout_preset} → " + f"{_imp48_post_preset} (post-split unit count=" + f"{_imp48_audit.get('post_split_unit_count')})", + file=sys.stderr, + ) + layout_preset = _imp48_post_preset + print( + f" [IMP-48] re-split applied — " + f"split={len(_imp48_audit.get('split_units', []))} " + f"skipped={len(_imp48_audit.get('skipped_units', []))} " + f"post_count={_imp48_audit.get('post_split_unit_count')} " + f"post_preset={_imp48_audit.get('post_split_layout_preset')!r}", + file=sys.stderr, + ) + + print(f" preset : {layout_preset} ({len(units)} units, composition v0 count-based)") + for u in units: + print(f" unit : {u.source_section_ids} merge={u.merge_type} → " + f"frame {u.frame_number} ({u.frame_template_id}) " + f"label={u.label} score={u.score:.3f}") + + # ─── Step 6: Composition Planning ─── + _write_step_artifact( + run_dir, 6, "composition_plan", + data={ + "selected_units_count": len(units), + "layout_preset_decided": layout_preset, + "candidates_summary": comp_debug.get("candidates_summary"), + "candidates_total": comp_debug.get("candidates_total"), + "candidates_viable_auto": comp_debug.get("candidates_viable_auto"), + "selected_units": [ + { + "source_section_ids": u.source_section_ids, + "merge_type": u.merge_type, + "frame_id": u.frame_id, + "frame_number": u.frame_number, + "frame_template_id": u.frame_template_id, + "label": u.label, + "v4_rank": u.v4_rank, + "selection_path": u.selection_path, + "fallback_reason": u.fallback_reason, + "score": u.score, + "phase_z_status": u.phase_z_status, + "rationale": u.rationale, + "notes": list(u.notes), + # Step 6-A axis (사용자 lock 2026-05-08) — V4 후보 list. + # 단일 frame_* / label / confidence 와 일관 (candidates[0] = rank-1 non-reject). + "v4_candidates": [ + { + "template_id": c.template_id, + "frame_id": c.frame_id, + "frame_number": c.frame_number, + "confidence": c.confidence, + "label": c.label, + } + for c in u.v4_candidates + ], + } + for u in units + ], + # IMP-48 (#77) — re-split audit. Additive field. AI=0 deterministic + # one-shot post-pass on Step 6 settling result. applied=True means + # ≥1 parent_merged / parent_merged_inferred reject unit was split + # into per-section singles; selected_units already reflects the + # post-split list. Skipped reasons (incomplete_rebuild / + # no_beneficial_split / layout_cap_exceeded) keep the merged unit + # for IMP-47B (#76) AI handoff. section_assignment_override skip + # honors IMP-06 (#6) zoneSections ground truth. + "imp48_resplit": _imp48_audit, + # IMP-43 (#72) u3 — additive informational field recording the + # run_dir-relative location of the ``--reuse-from`` sidecar + # (written immediately after this artifact). Path is stamped + # unconditionally so that a future ``--reuse-from`` consumer + # (u4) can locate the expected sidecar even when its write + # failed (u4 then fail-closes on missing/invalid sidecar via + # u2's ``validate_snapshot``). + "reuse_snapshot_path": SNAPSHOT_FILENAME, + }, + step_status="done", + pipeline_path_connected=True, + inputs=["step02_normalized.json", "step05_v4_evidence.json"], + outputs=["step06_composition_plan.json", SNAPSHOT_FILENAME], + note=( + "composition v0 count-based — sections → candidates → score → greedy select. " + "Step 6-A (사용자 lock 2026-05-08): selected_units[i].v4_candidates 추가 " + "(non-reject max-6 후보 list, candidates[0] = 단일 frame_* 와 일관). " + "IMP-48 (#77, 2026-05-22): merged-reject 자동 분리 post-pass — " + "parent_merged / parent_merged_inferred + label=reject + ≥2 sections " + "→ per-section singles (each own rank-1 V4 evidence + raw_content 보존). " + "guardrails: coverage equality / beneficial split (≥1 non-reject) / " + "layout cap (≤4 units). imp48_resplit audit additive. " + "logic 무변 — runtime 결과 동일. Step 9 application_plan input. " + "IMP-43 (#72) u3: _reuse_snapshot.json sidecar written next to " + "this artifact (run_dir level) for future --reuse-from (u4) " + "consumption. Optional sidecar — write failure warns + continues." + ), ) - print(f" preset : {layout_preset} ({len(units)} units, composition v0 count-based)") - for u in units: - print(f" unit : {u.source_section_ids} merge={u.merge_type} → " - f"frame {u.frame_number} ({u.frame_template_id}) " - f"label={u.label} score={u.score:.3f}") - - # ─── Step 6: Composition Planning ─── - _write_step_artifact( - run_dir, 6, "composition_plan", - data={ - "selected_units_count": len(units), - "layout_preset_decided": layout_preset, - "candidates_summary": comp_debug.get("candidates_summary"), - "candidates_total": comp_debug.get("candidates_total"), - "candidates_viable_auto": comp_debug.get("candidates_viable_auto"), - "selected_units": [ - { - "source_section_ids": u.source_section_ids, - "merge_type": u.merge_type, - "frame_id": u.frame_id, - "frame_number": u.frame_number, - "frame_template_id": u.frame_template_id, - "label": u.label, - "v4_rank": u.v4_rank, - "selection_path": u.selection_path, - "fallback_reason": u.fallback_reason, - "score": u.score, - "phase_z_status": u.phase_z_status, - "rationale": u.rationale, - "notes": list(u.notes), - # Step 6-A axis (사용자 lock 2026-05-08) — V4 후보 list. - # 단일 frame_* / label / confidence 와 일관 (candidates[0] = rank-1 non-reject). - "v4_candidates": [ - { - "template_id": c.template_id, - "frame_id": c.frame_id, - "frame_number": c.frame_number, - "confidence": c.confidence, - "label": c.label, - } - for c in u.v4_candidates - ], - } - for u in units - ], - # IMP-48 (#77) — re-split audit. Additive field. AI=0 deterministic - # one-shot post-pass on Step 6 settling result. applied=True means - # ≥1 parent_merged / parent_merged_inferred reject unit was split - # into per-section singles; selected_units already reflects the - # post-split list. Skipped reasons (incomplete_rebuild / - # no_beneficial_split / layout_cap_exceeded) keep the merged unit - # for IMP-47B (#76) AI handoff. section_assignment_override skip - # honors IMP-06 (#6) zoneSections ground truth. - "imp48_resplit": _imp48_audit, - }, - step_status="done", - pipeline_path_connected=True, - inputs=["step02_normalized.json", "step05_v4_evidence.json"], - outputs=["step06_composition_plan.json"], - note=( - "composition v0 count-based — sections → candidates → score → greedy select. " - "Step 6-A (사용자 lock 2026-05-08): selected_units[i].v4_candidates 추가 " - "(non-reject max-6 후보 list, candidates[0] = 단일 frame_* 와 일관). " - "IMP-48 (#77, 2026-05-22): merged-reject 자동 분리 post-pass — " - "parent_merged / parent_merged_inferred + label=reject + ≥2 sections " - "→ per-section singles (each own rank-1 V4 evidence + raw_content 보존). " - "guardrails: coverage equality / beneficial split (≥1 non-reject) / " - "layout cap (≤4 units). imp48_resplit audit additive. " - "logic 무변 — runtime 결과 동일. Step 9 application_plan input." - ), - ) + # IMP-43 (#72) u3 — write Step 6 reuse snapshot sidecar AFTER the + # step06 artifact. The sidecar captures the in-memory state that + # downstream steps need but that the canonical step02 / step05 / + # step06 artifacts do not preserve in a deserialize-ready form (e.g. + # ``CompositionUnit`` instances, raw ``comp_debug``, untruncated + # ``v4_fallback_traces``, pre-override ``layout_preset``). Helper + # warns + returns ``None`` on failure — does NOT abort the run. + # Restore wiring (``--reuse-from``) lands in u4. + _write_reuse_snapshot( + run_dir, + mdx_source_text=mdx_source_text, + slide_title=slide_title, + slide_footer=slide_footer, + sections=sections, + stage0_adapter_diagnostics=stage0_adapter_diagnostics, + stage0_normalized_assets=stage0_normalized_assets, + v4_evidence=v4_evidence_list, + layout_preset_pre_override=layout_preset, + units=units, + comp_debug=comp_debug, + v4_fallback_traces=v4_fallback_traces, + ai_preflight=ai_preflight, + ) + else: + # IMP-43 (#72) u5 — reuse path: restore Steps 0/1/2/5/6 state + # from prev_run snapshot. u4b's execute_reuse_from_or_fail_closed + # handles all nine fail-closed axes (prev_run_dir_missing, + # snapshot_corrupt_json, mdx_sha256_mismatch, etc.) — on success + # it returns ``(prev_run_dir, copied_artifacts, snapshot)``; + # any reachable failure terminates the process before this branch + # binds a local. + # + # State variable shape matches the locals produced by Steps 0-6 + # above so the Step 7+ block reads them transparently: + # ai_preflight : Step 0 preflight dict + # slide_title / slide_footer : parse_mdx output + # sections : list[MdxSection], post-align + # stage0_adapter_diagnostics : Stage 0 adapter trace dict + # stage0_normalized_assets : Step 3 handoff dict (popups/...) + # v4_evidence_list : list[dict] (Step 5 artifact) + # layout_preset : Step 6 post-IMP-48 preset + # units : list[CompositionUnit] + # comp_debug : Step 6 debug dict + # v4_fallback_traces : dict[sid -> trace dict] + # + # NOT serialized (deterministic from external sources or restored + # sections — recomputed here): + # v4 : load_v4_result() — V4_RESULT_PATH on disk + # section_alias_by_id : derived from restored sections + # + # u1 guard ensures override_layout is None on the reuse path, so + # layout_override_applied / auto_layout_preset reflect the + # restored Step 6 preset for the Step 7 artifact. + mdx_source_text = mdx_path.read_text(encoding="utf-8") + (run_dir / "steps").mkdir(exist_ok=True) + _prev_run_dir, _copied_artifacts, _snapshot = execute_reuse_from_or_fail_closed( + reuse_from=reuse_from, + new_run_dir=run_dir, + mdx_source_text=mdx_source_text, + ) + ai_preflight = _snapshot["ai_preflight"]["value"] + slide_title = _snapshot["slide_title"]["value"] + slide_footer = _snapshot["slide_footer"]["value"] + sections = _rehydrate_mdx_sections_from_snapshot(_snapshot) + stage0_adapter_diagnostics = _snapshot["stage0_adapter_diagnostics"]["value"] + stage0_normalized_assets = _snapshot["stage0_normalized_assets"]["value"] + v4_evidence_list = _snapshot["v4_evidence"]["value"] + layout_preset = _snapshot["layout_preset_pre_override"]["value"] + units = _rehydrate_composition_units_from_snapshot(_snapshot) + comp_debug = _snapshot["comp_debug"]["value"] + v4_fallback_traces = _snapshot["v4_fallback_traces"]["value"] + v4 = load_v4_result() + section_alias_by_id = { + s.section_id: list(getattr(s, "v4_alias_keys", []) or []) + for s in sections + } + auto_layout_preset = layout_preset + layout_override_applied = False + # IMP-43 (#72) u4 fix — shared Step 7+ block reads + # ``section_assignment_plan`` unconditionally at the render_records + # gate below, and ``section_assignment_summary`` is mirrored into + # comp_debug via the normal-path override branch. Both stay at + # their "no override applied" defaults on the reuse path because + # u1's fail-closed guard already rejected --override-section- + # assignment when --reuse-from is set. Without these explicit + # defaults the reuse branch falls through to ``if + # section_assignment_plan is not None:`` (line ~5754) with an + # unbound local and the run aborts with UnboundLocalError before + # Step 7 can begin (see Codex #14 rewind report). + section_assignment_plan: Optional[list[dict]] = None + section_assignment_summary: Optional[dict] = None + _write_reuse_marker( + run_dir, + prev_run_id=reuse_from, + copied_artifacts=_copied_artifacts, + ) + print( + f" reuse : sections={len(sections)} " + f"({[s.section_id for s in sections]}), " + f"units={len(units)}, layout={layout_preset}, " + f"prev_run_id={reuse_from}" + ) # 5. Per-unit: synthesize MdxSection → mapper → assets → zone data # mapper FitError 는 catch — 자동 파이프라인은 다른 zone 계속 진행. abort X. @@ -7211,6 +7920,28 @@ if __name__ == "__main__": "settings.ai_fallback_auto_cache=True for this run." ), ) + # IMP-43 (#72) u1 — incremental rerun reuse pointer. Reuse target + # = Step 0/1/2/5/6 deterministic artifacts from a prior run; Step 7 + # onward re-executes against the new frame overrides. Only frame + # overrides preserve the reusable subset (Stage 2 boundary lock); + # layout/geometry/section/image overrides invalidate it and are + # rejected by the post-merge guard below. Signature threading + + # snapshot copy/restore land in u5 and u4 respectively; this unit + # only adds the CLI surface + fail-closed precondition guard. + parser.add_argument( + "--reuse-from", + dest="reuse_from", + default=None, + metavar="PREV_RUN_ID", + help=( + "Reuse Step 0/1/2/5/6 artifacts from a previous run id " + "(directory under data/runs//phase_z2) and resume " + "execution at Step 7. Only --override-frame is preserved; " + "--override-layout / --override-zone-geometry / " + "--override-section-assignment / --override-image invalidate " + "the reusable boundary and will be rejected." + ), + ) args = parser.parse_args() if args.auto_cache: @@ -7436,6 +8167,37 @@ if __name__ == "__main__": continue overrides_images = _accepted_img + # IMP-43 (#72) u1 — fail-closed reuse_from precondition guard. + # Placed AFTER the user_overrides.json merge so persisted overrides + # are evaluated against the same reuse boundary as CLI overrides + # (Stage 2 lock: "fail-closed guard after user_overrides.json merge + # and before dispatch"). Reuse target = Step 0/1/2/5/6 deterministic + # artifacts; only frame overrides preserve that subset. layout / + # zone_geometry / zone_section / image overrides each invalidate at + # least one of Step 0/1/2/5/6 and must reject. Frame-only is allowed + # (no rejected axes → falls through to dispatch). Error stderr names + # every rejected axis so the user can either drop the rejected axes + # or rerun without --reuse-from. + if args.reuse_from is not None: + _rejected_axes: list[str] = [] + if _final_override_layout is not None: + _rejected_axes.append("layout") + if overrides_geoms: + _rejected_axes.append("zone_geometry") + if overrides_section_assignments: + _rejected_axes.append("zone_section") + if overrides_images: + _rejected_axes.append("image") + if _rejected_axes: + print( + f"[error] --reuse-from incompatible with override axes: " + f"{', '.join(_rejected_axes)}. Only --override-frame is " + f"preserved across Step 0/1/2/5/6 reuse; drop the rejected " + f"overrides or rerun without --reuse-from.", + file=sys.stderr, + ) + sys.exit(2) + run_phase_z2_mvp1( args.mdx_path, args.run_id, @@ -7444,4 +8206,5 @@ if __name__ == "__main__": override_zone_geometries=overrides_geoms or None, override_section_assignments=overrides_section_assignments or None, override_image_overrides=overrides_images or None, + reuse_from=args.reuse_from, ) diff --git a/src/phase_z2_reuse_snapshot.py b/src/phase_z2_reuse_snapshot.py new file mode 100644 index 0000000..aa4c892 --- /dev/null +++ b/src/phase_z2_reuse_snapshot.py @@ -0,0 +1,301 @@ +"""IMP-43 (#72) u2 — Step 6 reuse snapshot schema (JSON-only). + +Stage 2 plan (locked) — ``--reuse-from PREV_RUN_ID`` reuses the +Step 0 / 1 / 2 / 5 / 6 deterministic artifact subset plus the +in-memory state that downstream steps need but that the existing +``step02_normalized.json`` / ``step05_v4_evidence.json`` / +``step06_composition_plan.json`` artifacts do not capture in a +deserialize-ready form (e.g. ``CompositionUnit`` instances, +``comp_debug``, ``v4_fallback_traces`` raw map, pre-override +``layout_preset``). This module owns the schema for the additional +``_reuse_snapshot.json`` sidecar written next to ``step06_composition_plan.json``. + +Scope (u2 only, Stage 2 unit split): + * Pure schema + serializers + validator. No file I/O. + * JSON-only — pickle is forbidden per Stage 2 guardrails. + * Provenance per top-level field: ``{value, source_path, upstream_step}``. + * ``mdx_sha256`` integrity key — ``--reuse-from`` must fail closed when + the prev run's MDX bytes don't match the current MDX bytes. + * ``schema_version`` — bumped on any non-additive shape change. + +Out of scope (deferred to later units): + * Writing the snapshot into the run_dir (u3). + * Copy / restore on ``--reuse-from`` (u4). + * Fail-closed snapshot/path errors at restore time (u4b). + * Threading ``reuse_from`` through ``run_phase_z2_mvp1`` (u5). +""" +from __future__ import annotations + +import json +from typing import Any, Optional + + +SNAPSHOT_VERSION = 1 +SNAPSHOT_FILENAME = "_reuse_snapshot.json" + + +# Required top-level keys. Bare scalars (no provenance wrapper): +# - schema_version (contract key) +# - mdx_sha256 (integrity key) +# All other keys are wrapped {value, source_path, upstream_step}. +REQUIRED_TOP_LEVEL_KEYS: tuple[str, ...] = ( + "schema_version", + "mdx_sha256", + "slide_title", + "slide_footer", + "sections", + "stage0_adapter_diagnostics", + "stage0_normalized_assets", + "v4_evidence", + "layout_preset_pre_override", + "units", + "comp_debug", + "v4_fallback_traces", + "ai_preflight", +) + +_BARE_KEYS: frozenset[str] = frozenset({"schema_version", "mdx_sha256"}) + + +def _wrap(value: Any, *, source_path: str, upstream_step: str) -> dict[str, Any]: + return { + "value": value, + "source_path": source_path, + "upstream_step": upstream_step, + } + + +def serialize_section(section: Any) -> dict[str, Any]: + """Serialize an ``MdxSection``-shaped object into a JSON-safe dict. + + Duck-typed: accepts the production ``MdxSection`` dataclass or any + object exposing the same attribute names. Preserves the subset of + fields needed to reconstruct downstream pipeline behavior on the + reuse path. + """ + return { + "section_id": section.section_id, + "section_num": section.section_num, + "title": section.title, + "raw_content": section.raw_content, + "heading_number": getattr(section, "heading_number", None), + "v4_alias_keys": list(getattr(section, "v4_alias_keys", []) or []), + "sub_sections": list(getattr(section, "sub_sections", []) or []), + } + + +def serialize_unit(unit: Any) -> dict[str, Any]: + """Serialize a ``CompositionUnit``-shaped object into a JSON-safe dict. + + ``v4_candidates`` entries are V4Match-duck-typed per the + CompositionUnit docstring; each is unwrapped to its 6 named + attributes so the snapshot file does not pin V4Match's dataclass + layout. ``v4_rank`` is included so the reuse path's Step 9 + application-plan payload (``_build_application_plan_unit``) + remains byte-equivalent to the full-rerun path — full rerun stamps + each candidate's rank via ``_v4_match_from_judgment`` (e.g. 1, 2, + 3, …) and Step 9 surfaces it under ``v4_candidates[i].v4_rank``. + Persisting it here lets the rehydrated ``_RehydratedV4Candidate`` + expose the same attribute end-to-end and avoids None drift in the + Step 13 equivalence comparison (u7a). + """ + return { + "source_section_ids": list(unit.source_section_ids), + "merge_type": unit.merge_type, + "frame_template_id": unit.frame_template_id, + "frame_id": unit.frame_id, + "frame_number": unit.frame_number, + "confidence": float(unit.confidence), + "label": unit.label, + "phase_z_status": unit.phase_z_status, + "raw_content": unit.raw_content, + "title": unit.title, + "v4_rank": unit.v4_rank, + "selection_path": unit.selection_path, + "fallback_reason": unit.fallback_reason, + "score": float(unit.score), + "rationale": dict(unit.rationale or {}), + "auto_selectable": bool(unit.auto_selectable), + "filter_reasons": list(unit.filter_reasons or []), + "notes": list(unit.notes or []), + "v4_candidates": [ + { + "template_id": c.template_id, + "frame_id": c.frame_id, + "frame_number": c.frame_number, + "confidence": float(c.confidence), + "label": c.label, + "v4_rank": getattr(c, "v4_rank", None), + } + for c in (unit.v4_candidates or []) + ], + "provisional": bool(getattr(unit, "provisional", False)), + } + + +def build_snapshot( + *, + mdx_sha256: str, + slide_title: Optional[str], + slide_footer: Optional[str], + sections: list, + stage0_adapter_diagnostics: Optional[dict], + stage0_normalized_assets: Optional[dict], + v4_evidence: list, + layout_preset_pre_override: Optional[str], + units: list, + comp_debug: Optional[dict], + v4_fallback_traces: Optional[dict], + ai_preflight: Optional[dict], +) -> dict[str, Any]: + """Build a JSON-serializable Step 6 reuse snapshot with provenance. + + Each top-level entry — except the two bare contract / integrity + keys (``schema_version``, ``mdx_sha256``) — is wrapped with + ``{value, source_path, upstream_step}``. + + The function calls ``json.dumps(snapshot)`` at the end to enforce + JSON-safety at build time: any latent non-JSON value (set, Path, + dataclass instance, etc.) raises ``TypeError`` at the call site, + not later at restore. + """ + snapshot: dict[str, Any] = { + "schema_version": SNAPSHOT_VERSION, + "mdx_sha256": mdx_sha256, + "slide_title": _wrap( + slide_title, + source_path="steps/step02_normalized.json#/slide_title", + upstream_step="step02", + ), + "slide_footer": _wrap( + slide_footer, + source_path="steps/step02_normalized.json#/slide_footer", + upstream_step="step02", + ), + "sections": _wrap( + [serialize_section(s) for s in sections], + source_path="steps/step02_normalized.json#/sections", + upstream_step="step02", + ), + "stage0_adapter_diagnostics": _wrap( + dict(stage0_adapter_diagnostics or {}), + source_path="steps/step02_normalized.json#/stage0_adapter_diagnostics", + upstream_step="step02", + ), + "stage0_normalized_assets": _wrap( + dict(stage0_normalized_assets or {}), + source_path="steps/step02_normalized.json#/stage0_normalized_assets", + upstream_step="step02", + ), + "v4_evidence": _wrap( + list(v4_evidence or []), + source_path="steps/step05_v4_evidence.json#/evidence_per_section", + upstream_step="step05", + ), + "layout_preset_pre_override": _wrap( + layout_preset_pre_override, + source_path="steps/step06_composition_plan.json#/layout_preset_decided", + upstream_step="step06", + ), + "units": _wrap( + [serialize_unit(u) for u in units], + source_path="steps/step06_composition_plan.json#/selected_units", + upstream_step="step06", + ), + "comp_debug": _wrap( + dict(comp_debug or {}), + source_path="steps/step06_composition_plan.json#/*", + upstream_step="step06", + ), + "v4_fallback_traces": _wrap( + dict(v4_fallback_traces or {}), + # v4_fallback_traces is assembled inside run_phase_z2_mvp1 + # (see phase_z2_pipeline.py around the Step 5/6 boundary) and + # surfaces only partially into step06_composition_plan.json + # via the v4_fallback_summary / imp48_resplit fields. The + # canonical untruncated source is the in-memory dict at end + # of Step 6 — that's what the reuse path needs. + source_path="phase_z2_pipeline.run_phase_z2_mvp1::v4_fallback_traces", + upstream_step="step06", + ), + "ai_preflight": _wrap( + dict(ai_preflight or {}), + source_path="steps/step00_preconditions.json#/ai_preflight", + upstream_step="step00", + ), + } + json.dumps(snapshot) + return snapshot + + +class SnapshotValidationError(ValueError): + """Raised by ``validate_snapshot`` when the snapshot is structurally + unusable or fails the ``mdx_sha256`` integrity check. + + Subclass of ``ValueError`` so existing ``except ValueError`` callers + (u4b will add a tighter ``except SnapshotValidationError``) still + catch it without escaping to the outer CLI. + """ + + +def validate_snapshot( + snapshot: Any, + *, + expected_mdx_sha256: str, +) -> None: + """Validate a loaded snapshot dict (fail-closed). + + Raises ``SnapshotValidationError`` when: + * ``snapshot`` is not a dict + * ``schema_version`` is missing or != ``SNAPSHOT_VERSION`` + * ``mdx_sha256`` is missing, non-string, or doesn't match + ``expected_mdx_sha256`` + * any required top-level key is missing + * a wrapped entry doesn't expose ``{value, source_path, upstream_step}`` + + Returns ``None`` on success. + + Callers (u4b) translate the raised error into an exit-code-2 abort + with the failing axis surfaced as `value + path + upstream` + (factual-verification guardrail). + """ + if not isinstance(snapshot, dict): + raise SnapshotValidationError( + f"snapshot is not a dict (got {type(snapshot).__name__})" + ) + + version = snapshot.get("schema_version") + if version != SNAPSHOT_VERSION: + raise SnapshotValidationError( + f"schema_version mismatch: expected {SNAPSHOT_VERSION!r}, got {version!r}" + ) + + actual_sha = snapshot.get("mdx_sha256") + if not isinstance(actual_sha, str) or not actual_sha: + raise SnapshotValidationError( + f"mdx_sha256 missing or non-string: got {actual_sha!r}" + ) + if actual_sha != expected_mdx_sha256: + raise SnapshotValidationError( + f"mdx_sha256 mismatch: snapshot={actual_sha!r} " + f"expected={expected_mdx_sha256!r}" + ) + + missing = [k for k in REQUIRED_TOP_LEVEL_KEYS if k not in snapshot] + if missing: + raise SnapshotValidationError( + f"missing required keys: {missing!r}" + ) + + for key, entry in snapshot.items(): + if key in _BARE_KEYS: + continue + if not isinstance(entry, dict): + raise SnapshotValidationError( + f"key {key!r}: expected wrapper dict, got {type(entry).__name__}" + ) + for field_name in ("value", "source_path", "upstream_step"): + if field_name not in entry: + raise SnapshotValidationError( + f"key {key!r}: wrapper missing {field_name!r}" + ) diff --git a/tests/test_phase_z2_cli_reuse_from.py b/tests/test_phase_z2_cli_reuse_from.py new file mode 100644 index 0000000..2c856af --- /dev/null +++ b/tests/test_phase_z2_cli_reuse_from.py @@ -0,0 +1,383 @@ +"""IMP-43 (#72) u1 + u5 — focused tests for the ``--reuse-from`` CLI surface. + +u1 scope (per the Stage 2 Exit Report): + +- argparse flag ``--reuse-from PREV_RUN_ID`` parses without error. +- Fail-closed precondition guard runs AFTER the ``user_overrides.json`` + merge and BEFORE dispatch. With ``--reuse-from`` set, the guard + must: + * accept frame-only overrides (or no overrides at all); + * reject layout / zone-geometry / zone-section / image overrides + with ``sys.exit(2)`` whose stderr names every rejected axis. + +u5 scope (added 2026-05-24): + +- ``reuse_from`` is keyword-only on ``run_phase_z2_mvp1`` and defaults + to ``None`` so the absent-flag path preserves pre-u5 behaviour. +- The CLI dispatch forwards ``args.reuse_from`` verbatim — both + ``None`` (flag absent) and ``"PREV_RUN_ID"`` (flag present) reach + the kwarg unchanged. +- The fake ``run_phase_z2_mvp1`` stub below mirrors the production + signature so the forwarding lock would fail loudly on any + forwarding regression. + +The harness mirrors ``tests/test_phase_z2_cli_overrides.py`` — the +``if __name__ == "__main__"`` block of ``src.phase_z2_pipeline`` is +exec'd inside the module's namespace after monkeypatching +``run_phase_z2_mvp1`` with a recording stub. The persistence fallback +is silenced by redirecting ``src.user_overrides_io.DEFAULT_OVERRIDES_ROOT`` +to a clean tmp directory so persisted state from prior runs cannot bleed +into the parser-only assertions here. +""" +from __future__ import annotations + +import ast +import sys +from pathlib import Path +from typing import Any + +import pytest + +import src.phase_z2_pipeline as _pz2 +import src.user_overrides_io as _io + + +# -- harness --------------------------------------------------------------- + + +def _exec_main_block( + captured: dict[str, Any], argv: list[str], monkeypatch +) -> None: + """Run the ``__main__`` body of phase_z2_pipeline.py with a fake + ``run_phase_z2_mvp1`` so its kwargs are observable. Captures the + presence of the call (``called=True``) so guard-driven early exits + can be distinguished from a successful parse + dispatch.""" + + def _fake_run( + mdx_path, + run_id, + *, + override_layout=None, + override_frames=None, + override_zone_geometries=None, + override_section_assignments=None, + override_image_overrides=None, + reuse_from=None, + ): + captured["called"] = True + captured["mdx_path"] = mdx_path + captured["run_id"] = run_id + captured["override_layout"] = override_layout + captured["override_frames"] = override_frames + captured["override_zone_geometries"] = override_zone_geometries + captured["override_section_assignments"] = override_section_assignments + captured["override_image_overrides"] = override_image_overrides + captured["reuse_from"] = reuse_from + + monkeypatch.setattr(_pz2, "run_phase_z2_mvp1", _fake_run) + monkeypatch.setattr(sys, "argv", argv) + + src_path = Path(_pz2.__file__) + source = src_path.read_text(encoding="utf-8") + tree = ast.parse(source) + for node in tree.body: + if ( + isinstance(node, ast.If) + and isinstance(node.test, ast.Compare) + and isinstance(node.test.left, ast.Name) + and node.test.left.id == "__name__" + ): + block = ast.Module(body=node.body, type_ignores=[]) + exec(compile(block, str(src_path), "exec"), _pz2.__dict__) + return + raise AssertionError("no `if __name__ == '__main__'` block found") + + +def _redirect_overrides_root(tmp_path: Path, monkeypatch) -> None: + """Isolate the persistence fallback so file state never leaks in.""" + monkeypatch.setattr(_io, "DEFAULT_OVERRIDES_ROOT", tmp_path) + + +# -- success paths -------------------------------------------------------- + + +def test_reuse_from_alone_parses_and_dispatches(tmp_path, monkeypatch): + """``--reuse-from`` with no other overrides must parse cleanly and + fall through to dispatch (frame-only / empty override is allowed). + u5 (2026-05-24): also asserts the CLI threads ``args.reuse_from`` + verbatim into the ``reuse_from`` kwarg.""" + _redirect_overrides_root(tmp_path, monkeypatch) + captured: dict[str, Any] = {} + _exec_main_block( + captured, + [ + "src.phase_z2_pipeline", + "03.mdx", + "--reuse-from", + "03__DX_20260508025134", + ], + monkeypatch, + ) + + assert captured.get("called") is True + # u5 — verbatim threading. + assert captured["reuse_from"] == "03__DX_20260508025134" + + +def test_reuse_from_with_frame_override_dispatches(tmp_path, monkeypatch): + """Frame overrides ARE preserved across Step 0/1/2/5/6 reuse, so + ``--reuse-from`` + ``--override-frame`` must reach dispatch. + u5: forwards both ``reuse_from`` and ``override_frames`` in the + same call.""" + _redirect_overrides_root(tmp_path, monkeypatch) + captured: dict[str, Any] = {} + _exec_main_block( + captured, + [ + "src.phase_z2_pipeline", + "03.mdx", + "--reuse-from", + "03__DX_20260508025134", + "--override-frame", + "03-1=frame_foo", + ], + monkeypatch, + ) + + assert captured.get("called") is True + assert captured["override_frames"] == {"03-1": "frame_foo"} + # u5 — frame override + reuse_from reach the kwarg simultaneously. + assert captured["reuse_from"] == "03__DX_20260508025134" + + +# -- u5 — flag-absent default + signature surface ------------------------ + + +def test_no_reuse_from_threads_none_kwarg(tmp_path, monkeypatch): + """u5 — when ``--reuse-from`` is absent, the kwarg must reach + ``run_phase_z2_mvp1`` as ``None`` (not omitted, not ``""``). This + locks the "default None preserves current behavior" requirement + from the Stage 2 plan §u5.""" + _redirect_overrides_root(tmp_path, monkeypatch) + captured: dict[str, Any] = {} + _exec_main_block( + captured, + ["src.phase_z2_pipeline", "03.mdx"], + monkeypatch, + ) + + assert captured.get("called") is True + assert captured["reuse_from"] is None + + +def test_run_phase_z2_mvp1_signature_includes_reuse_from(): + """Production signature lock — ``reuse_from`` must be a keyword-only + parameter with default ``None``. Mirror of the entry-tests + invariant; kept here so the CLI-surface test file fails loudly if + the production signature drifts away from the dispatch contract.""" + import inspect + + sig = inspect.signature(_pz2.run_phase_z2_mvp1) + assert "reuse_from" in sig.parameters, list(sig.parameters) + param = sig.parameters["reuse_from"] + assert param.kind is inspect.Parameter.KEYWORD_ONLY, param.kind + assert param.default is None, param.default + + +# -- fail-closed (single-axis rejection) ---------------------------------- + + +def test_reuse_from_with_layout_override_exits(tmp_path, monkeypatch, capsys): + _redirect_overrides_root(tmp_path, monkeypatch) + captured: dict[str, Any] = {} + with pytest.raises(SystemExit) as excinfo: + _exec_main_block( + captured, + [ + "src.phase_z2_pipeline", + "03.mdx", + "--reuse-from", + "03__DX_20260508025134", + "--override-layout", + "horizontal-2", + ], + monkeypatch, + ) + + assert excinfo.value.code == 2 + err = capsys.readouterr().err + assert "--reuse-from incompatible with override axes" in err + assert "layout" in err + assert captured.get("called") is not True + + +def test_reuse_from_with_zone_geometry_override_exits( + tmp_path, monkeypatch, capsys +): + _redirect_overrides_root(tmp_path, monkeypatch) + captured: dict[str, Any] = {} + with pytest.raises(SystemExit) as excinfo: + _exec_main_block( + captured, + [ + "src.phase_z2_pipeline", + "03.mdx", + "--reuse-from", + "03__DX_20260508025134", + "--override-zone-geometry", + "top=0,0,1,0.3", + ], + monkeypatch, + ) + + assert excinfo.value.code == 2 + err = capsys.readouterr().err + assert "--reuse-from incompatible with override axes" in err + assert "zone_geometry" in err + assert captured.get("called") is not True + + +def test_reuse_from_with_zone_section_override_exits( + tmp_path, monkeypatch, capsys +): + _redirect_overrides_root(tmp_path, monkeypatch) + captured: dict[str, Any] = {} + with pytest.raises(SystemExit) as excinfo: + _exec_main_block( + captured, + [ + "src.phase_z2_pipeline", + "03.mdx", + "--reuse-from", + "03__DX_20260508025134", + "--override-section-assignment", + "top=03-1", + ], + monkeypatch, + ) + + assert excinfo.value.code == 2 + err = capsys.readouterr().err + assert "--reuse-from incompatible with override axes" in err + assert "zone_section" in err + assert captured.get("called") is not True + + +def test_reuse_from_with_image_override_exits(tmp_path, monkeypatch, capsys): + _redirect_overrides_root(tmp_path, monkeypatch) + captured: dict[str, Any] = {} + with pytest.raises(SystemExit) as excinfo: + _exec_main_block( + captured, + [ + "src.phase_z2_pipeline", + "03.mdx", + "--reuse-from", + "03__DX_20260508025134", + "--override-image", + "img-abc=10,15,30,25", + ], + monkeypatch, + ) + + assert excinfo.value.code == 2 + err = capsys.readouterr().err + assert "--reuse-from incompatible with override axes" in err + assert "image" in err + assert captured.get("called") is not True + + +# -- fail-closed (multi-axis aggregation) --------------------------------- + + +def test_reuse_from_with_multiple_rejected_axes_lists_all( + tmp_path, monkeypatch, capsys +): + """Stderr must enumerate every rejected axis (not stop at first).""" + _redirect_overrides_root(tmp_path, monkeypatch) + captured: dict[str, Any] = {} + with pytest.raises(SystemExit) as excinfo: + _exec_main_block( + captured, + [ + "src.phase_z2_pipeline", + "03.mdx", + "--reuse-from", + "03__DX_20260508025134", + "--override-layout", + "horizontal-2", + "--override-zone-geometry", + "top=0,0,1,0.3", + "--override-image", + "img-abc=10,15,30,25", + ], + monkeypatch, + ) + + assert excinfo.value.code == 2 + err = capsys.readouterr().err + assert "layout" in err + assert "zone_geometry" in err + assert "image" in err + assert captured.get("called") is not True + + +# -- guard inactive when --reuse-from absent ------------------------------ + + +def test_no_reuse_from_layout_override_still_dispatches( + tmp_path, monkeypatch +): + """Without ``--reuse-from``, the guard must be silent — existing + override behaviour is preserved end-to-end.""" + _redirect_overrides_root(tmp_path, monkeypatch) + captured: dict[str, Any] = {} + _exec_main_block( + captured, + [ + "src.phase_z2_pipeline", + "03.mdx", + "--override-layout", + "horizontal-2", + ], + monkeypatch, + ) + + assert captured.get("called") is True + assert captured["override_layout"] == "horizontal-2" + + +# -- fail-closed honours persisted overrides ------------------------------ + + +def test_reuse_from_with_persisted_layout_override_exits( + tmp_path, monkeypatch, capsys +): + """The guard runs AFTER the user_overrides.json merge, so a layout + persisted on disk (not on the CLI) must still reject when + ``--reuse-from`` is set. This locks the Stage 2 placement rule.""" + _redirect_overrides_root(tmp_path, monkeypatch) + # Persist a layout override keyed by the MDX stem ``03``. + overrides_dir = tmp_path + overrides_dir.mkdir(parents=True, exist_ok=True) + (overrides_dir / "03.json").write_text( + '{"layout": "vertical-2"}', encoding="utf-8" + ) + captured: dict[str, Any] = {} + with pytest.raises(SystemExit) as excinfo: + _exec_main_block( + captured, + [ + "src.phase_z2_pipeline", + "03.mdx", + "--reuse-from", + "03__DX_20260508025134", + ], + monkeypatch, + ) + + assert excinfo.value.code == 2 + err = capsys.readouterr().err + assert "--reuse-from incompatible with override axes" in err + assert "layout" in err + assert captured.get("called") is not True diff --git a/tests/test_phase_z2_reuse_from_entry.py b/tests/test_phase_z2_reuse_from_entry.py new file mode 100644 index 0000000..3cf32d9 --- /dev/null +++ b/tests/test_phase_z2_reuse_from_entry.py @@ -0,0 +1,555 @@ +"""IMP-43 (#72) u4 — focused tests for the --reuse-from entry helpers. + +u4 scope (per the Stage 2 Exit Report): + +- Pure path resolution, file copy, snapshot load+validate, MdxSection + + CompositionUnit rehydration, and reuse-marker writing. +- Helpers RAISE on missing artifacts / corrupt snapshot / mdx_sha256 + mismatch — u4b adds the stderr + sys.exit(2) translation and the + prev_run_dir == new_run_dir accidental-write guard around them. +- The kwarg threading + the in-``run_phase_z2_mvp1`` branch that + invokes these helpers land in u5. + +Tested helpers (``src/phase_z2_pipeline.py``): + * ``_resolve_reuse_from_prev_run_dir`` + * ``_copy_reuse_artifacts_from_prev_run`` + * ``_load_and_validate_reuse_snapshot`` + * ``_rehydrate_mdx_sections_from_snapshot`` + * ``_rehydrate_composition_units_from_snapshot`` + * ``_write_reuse_marker`` + * ``_RehydratedV4Candidate`` (V4Match-shape duck type) + * ``_REUSE_STEP_ARTIFACTS`` / ``REUSE_MARKER_FILENAME`` / + ``REUSE_MARKER_SCHEMA_VERSION`` +""" +from __future__ import annotations + +import hashlib +import json +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Optional + +import pytest + +import src.phase_z2_pipeline as _pz2 +from src.phase_z2_composition import CompositionUnit +from src.phase_z2_reuse_snapshot import ( + SNAPSHOT_FILENAME, + SNAPSHOT_VERSION, + SnapshotValidationError, + build_snapshot, +) + + +# -- synthetic duck-typed inputs (mirror u3 test fixture) ----------------- + + +@dataclass +class _Section: + section_id: str + section_num: int + title: str + raw_content: str + heading_number: Optional[str] = None + v4_alias_keys: list = field(default_factory=list) + sub_sections: list = field(default_factory=list) + + +@dataclass +class _V4Candidate: + template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + + +@dataclass +class _Unit: + source_section_ids: list + merge_type: str + frame_template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + phase_z_status: str + raw_content: str + title: str + score: float + v4_rank: Optional[int] = 1 + selection_path: str = "rank_1" + fallback_reason: Optional[str] = None + rationale: dict = field(default_factory=dict) + auto_selectable: bool = True + filter_reasons: list = field(default_factory=list) + notes: list = field(default_factory=list) + v4_candidates: list = field(default_factory=list) + provisional: bool = False + + +def _mdx_text() -> str: + return "# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n" + + +def _build_canonical_snapshot( + *, + mdx_source_text: Optional[str] = None, + layout_preset: str = "single", +) -> dict: + text = mdx_source_text if mdx_source_text is not None else _mdx_text() + cand = _V4Candidate( + template_id="tpl_a", + frame_id="fid_a", + frame_number=13, + confidence=0.91, + label="use_as_is", + ) + section = _Section( + section_id="03-1", + section_num=1, + title="DX status", + raw_content="- bullet one\n- bullet two", + heading_number="3.1", + v4_alias_keys=["03-1.1"], + sub_sections=[], + ) + unit = _Unit( + source_section_ids=["03-1"], + merge_type="single", + frame_template_id="tpl_a", + frame_id="fid_a", + frame_number=13, + confidence=0.91, + label="use_as_is", + phase_z_status="auto_renderable", + raw_content="- bullet one\n- bullet two", + title="DX status", + score=0.91, + v4_candidates=[cand], + provisional=False, + auto_selectable=True, + filter_reasons=[], + notes=["a note"], + rationale={"weight": 1.0}, + ) + return build_snapshot( + mdx_sha256=hashlib.sha256(text.encode("utf-8")).hexdigest(), + slide_title="Slide", + slide_footer=None, + sections=[section], + stage0_adapter_diagnostics={"used": True, "fallback_reason": None}, + stage0_normalized_assets={"popups": [], "images": [], "tables": []}, + v4_evidence=[ + { + "section_id": "03-1", + "v4_candidates": [ + { + "template_id": "tpl_a", + "frame_id": "fid_a", + "frame_number": 13, + "confidence": 0.91, + "label": "use_as_is", + } + ], + "candidate_status": "ok", + } + ], + layout_preset_pre_override=layout_preset, + units=[unit], + comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}}, + v4_fallback_traces={"03-1": {"selection_path": "rank_1"}}, + ai_preflight={"enabled": False, "skipped": True}, + ) + + +def _seed_prev_run_dir(prev_run_dir: Path, *, snapshot: dict) -> None: + """Populate ``prev_run_dir`` with the Step 0/1/2/5/6 artifacts plus + the reuse snapshot — minimal but valid surface for u4 helpers.""" + (prev_run_dir / "steps").mkdir(parents=True, exist_ok=True) + for fname in _pz2._REUSE_STEP_ARTIFACTS: + # JSON-shaped surface — exact shape doesn't matter for u4 (the + # copy helper doesn't introspect contents); just must exist. + (prev_run_dir / "steps" / fname).write_text( + f'{{"name": "{fname}"}}' + if fname.endswith(".json") + else "raw mdx body bytes", + encoding="utf-8", + ) + (prev_run_dir / SNAPSHOT_FILENAME).write_text( + json.dumps(snapshot, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + + +# -- _REUSE_STEP_ARTIFACTS constant --------------------------------------- + + +def test_reuse_step_artifacts_locks_stage2_boundary(): + """Stage 2 boundary lock — Step 0/1/2/5/6 artifacts only. + Step 3/4 deliberately absent: step03 / step04 ARE written after + Step 6 (around src/phase_z2_pipeline.py:5931 / 5964) before the + Step 7 artifact (~6294), but both are emitted with + step_status='trace-only' / pipeline_path_connected=False — they + are diagnostic projections of the Step 6 debug_zones, not + pipeline-path-connected inputs that Step 7+ rehydrate from.""" + assert _pz2._REUSE_STEP_ARTIFACTS == ( + "step00_preconditions.json", + "step01_mdx_upload.json", + "step01_mdx_source.md", + "step02_normalized.json", + "step05_v4_evidence.json", + "step06_composition_plan.json", + ) + + +def test_reuse_marker_filename_is_dotfile_at_run_dir_root(): + assert _pz2.REUSE_MARKER_FILENAME == "_reuse_marker.json" + + +# -- _resolve_reuse_from_prev_run_dir ------------------------------------- + + +def test_resolve_prev_run_dir_returns_runs_dir_phase_z2_path(): + rv = _pz2._resolve_reuse_from_prev_run_dir("20260524_120000_phase_z2") + expected = _pz2.RUNS_DIR / "20260524_120000_phase_z2" / "phase_z2" + assert rv == expected + + +def test_resolve_prev_run_dir_does_not_check_existence(tmp_path: Path): + """Pure path computation — must NOT touch the filesystem (u4b + handles the missing-prev-run case).""" + rv = _pz2._resolve_reuse_from_prev_run_dir("never_existed_run_id") + assert isinstance(rv, Path) + # The path does not actually exist; helper still returned cleanly. + assert not rv.exists() + + +# -- _copy_reuse_artifacts_from_prev_run ---------------------------------- + + +def test_copy_reuse_artifacts_copies_all_step_files(tmp_path: Path): + prev = tmp_path / "prev" / "phase_z2" + new = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev, snapshot=snap) + + copied = _pz2._copy_reuse_artifacts_from_prev_run(prev, new) + + for fname in _pz2._REUSE_STEP_ARTIFACTS: + assert (new / "steps" / fname).exists(), f"missing copy: {fname}" + assert copied[fname] == f"steps/{fname}" + + +def test_copy_reuse_artifacts_copies_snapshot_to_run_dir_root(tmp_path: Path): + prev = tmp_path / "prev" / "phase_z2" + new = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev, snapshot=snap) + + copied = _pz2._copy_reuse_artifacts_from_prev_run(prev, new) + + # Snapshot lives at run_dir root (NOT under steps/) per u3 contract. + assert (new / SNAPSHOT_FILENAME).exists() + assert copied[SNAPSHOT_FILENAME] == SNAPSHOT_FILENAME + + +def test_copy_reuse_artifacts_creates_steps_subdir_if_absent(tmp_path: Path): + prev = tmp_path / "prev" / "phase_z2" + new = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev, snapshot=snap) + + # new_run_dir / steps does not yet exist + assert not (new / "steps").exists() + _pz2._copy_reuse_artifacts_from_prev_run(prev, new) + assert (new / "steps").is_dir() + + +def test_copy_reuse_artifacts_missing_step_raises_filenotfound( + tmp_path: Path, +): + prev = tmp_path / "prev" / "phase_z2" + new = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev, snapshot=snap) + # Delete one of the required step artifacts. + (prev / "steps" / "step05_v4_evidence.json").unlink() + + with pytest.raises(FileNotFoundError) as ei: + _pz2._copy_reuse_artifacts_from_prev_run(prev, new) + msg = str(ei.value) + assert "step05_v4_evidence.json" in msg + assert "prev_run_dir" in msg + + +def test_copy_reuse_artifacts_missing_snapshot_raises_filenotfound( + tmp_path: Path, +): + prev = tmp_path / "prev" / "phase_z2" + new = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev, snapshot=snap) + (prev / SNAPSHOT_FILENAME).unlink() + + with pytest.raises(FileNotFoundError) as ei: + _pz2._copy_reuse_artifacts_from_prev_run(prev, new) + assert SNAPSHOT_FILENAME in str(ei.value) + + +def test_copy_reuse_artifacts_byte_identical_copy(tmp_path: Path): + """Bytes must match exactly — copy, not transform.""" + prev = tmp_path / "prev" / "phase_z2" + new = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev, snapshot=snap) + + _pz2._copy_reuse_artifacts_from_prev_run(prev, new) + + for fname in _pz2._REUSE_STEP_ARTIFACTS: + assert ( + (prev / "steps" / fname).read_bytes() + == (new / "steps" / fname).read_bytes() + ) + assert ( + (prev / SNAPSHOT_FILENAME).read_bytes() + == (new / SNAPSHOT_FILENAME).read_bytes() + ) + + +# -- _load_and_validate_reuse_snapshot ------------------------------------ + + +def test_load_and_validate_returns_snapshot_dict(tmp_path: Path): + text = _mdx_text() + snap = _build_canonical_snapshot(mdx_source_text=text) + (tmp_path / SNAPSHOT_FILENAME).write_text( + json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8" + ) + + loaded = _pz2._load_and_validate_reuse_snapshot( + tmp_path, mdx_source_text=text + ) + assert loaded["schema_version"] == SNAPSHOT_VERSION + assert loaded["slide_title"]["value"] == "Slide" + + +def test_load_and_validate_mdx_sha256_mismatch_raises(tmp_path: Path): + """Snapshot was built for ``text_a`` but caller passes ``text_b``; + u2 validator raises ``SnapshotValidationError`` (subclass of + ``ValueError``). u4b translates to exit 2 — here we only assert the + raise.""" + text_a = "# Slide A\n" + text_b = "# Slide B (different bytes)\n" + snap = _build_canonical_snapshot(mdx_source_text=text_a) + (tmp_path / SNAPSHOT_FILENAME).write_text( + json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8" + ) + + with pytest.raises(SnapshotValidationError) as ei: + _pz2._load_and_validate_reuse_snapshot( + tmp_path, mdx_source_text=text_b + ) + assert "mdx_sha256 mismatch" in str(ei.value) + + +def test_load_and_validate_corrupt_json_raises(tmp_path: Path): + (tmp_path / SNAPSHOT_FILENAME).write_text( + "{ not valid json", encoding="utf-8" + ) + with pytest.raises(json.JSONDecodeError): + _pz2._load_and_validate_reuse_snapshot( + tmp_path, mdx_source_text=_mdx_text() + ) + + +def test_load_and_validate_missing_snapshot_file_raises(tmp_path: Path): + """No snapshot at all — bare ``read_text`` raises FileNotFoundError. + u4b translates this to exit 2 with a provenance message.""" + with pytest.raises(FileNotFoundError): + _pz2._load_and_validate_reuse_snapshot( + tmp_path, mdx_source_text=_mdx_text() + ) + + +def test_load_and_validate_schema_version_mismatch_raises(tmp_path: Path): + text = _mdx_text() + snap = _build_canonical_snapshot(mdx_source_text=text) + snap["schema_version"] = SNAPSHOT_VERSION + 1 # force mismatch + (tmp_path / SNAPSHOT_FILENAME).write_text( + json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8" + ) + with pytest.raises(SnapshotValidationError) as ei: + _pz2._load_and_validate_reuse_snapshot( + tmp_path, mdx_source_text=text + ) + assert "schema_version" in str(ei.value) + + +# -- _rehydrate_mdx_sections_from_snapshot -------------------------------- + + +def test_rehydrate_sections_returns_mdxsection_instances(): + snap = _build_canonical_snapshot() + sections = _pz2._rehydrate_mdx_sections_from_snapshot(snap) + assert len(sections) == 1 + assert isinstance(sections[0], _pz2.MdxSection) + assert sections[0].section_id == "03-1" + assert sections[0].title == "DX status" + assert sections[0].raw_content == "- bullet one\n- bullet two" + + +def test_rehydrate_sections_preserves_heading_number_and_aliases(): + snap = _build_canonical_snapshot() + sections = _pz2._rehydrate_mdx_sections_from_snapshot(snap) + assert sections[0].heading_number == "3.1" + assert sections[0].v4_alias_keys == ["03-1.1"] + assert sections[0].sub_sections == [] + + +# -- _rehydrate_composition_units_from_snapshot --------------------------- + + +def test_rehydrate_units_returns_composition_unit_instances(): + snap = _build_canonical_snapshot() + units = _pz2._rehydrate_composition_units_from_snapshot(snap) + assert len(units) == 1 + assert isinstance(units[0], CompositionUnit) + + +def test_rehydrate_units_preserves_core_fields(): + snap = _build_canonical_snapshot() + units = _pz2._rehydrate_composition_units_from_snapshot(snap) + u = units[0] + assert u.source_section_ids == ["03-1"] + assert u.merge_type == "single" + assert u.frame_template_id == "tpl_a" + assert u.frame_id == "fid_a" + assert u.frame_number == 13 + assert u.confidence == pytest.approx(0.91) + assert u.label == "use_as_is" + assert u.phase_z_status == "auto_renderable" + assert u.title == "DX status" + assert u.score == pytest.approx(0.91) + + +def test_rehydrate_units_preserves_provisional_and_auto_selectable(): + snap = _build_canonical_snapshot() + units = _pz2._rehydrate_composition_units_from_snapshot(snap) + assert units[0].provisional is False + assert units[0].auto_selectable is True + assert units[0].filter_reasons == [] + assert units[0].notes == ["a note"] + assert units[0].rationale == {"weight": 1.0} + + +def test_rehydrate_units_v4_candidates_expose_attribute_access(): + """``_apply_frame_override_to_unit`` reads + ``cand.template_id`` / ``cand.frame_id`` / etc. off + ``unit.v4_candidates`` — restored entries MUST expose attribute + access, not raw dict access.""" + snap = _build_canonical_snapshot() + units = _pz2._rehydrate_composition_units_from_snapshot(snap) + cands = units[0].v4_candidates + assert len(cands) == 1 + c = cands[0] + assert isinstance(c, _pz2._RehydratedV4Candidate) + assert c.template_id == "tpl_a" + assert c.frame_id == "fid_a" + assert c.frame_number == 13 + assert c.confidence == pytest.approx(0.91) + assert c.label == "use_as_is" + + +def test_rehydrate_units_empty_v4_candidates_yields_empty_list(): + snap = _build_canonical_snapshot() + snap["units"]["value"][0]["v4_candidates"] = [] + units = _pz2._rehydrate_composition_units_from_snapshot(snap) + assert units[0].v4_candidates == [] + + +# -- _write_reuse_marker -------------------------------------------------- + + +def test_write_reuse_marker_writes_json_with_prev_run_id(tmp_path: Path): + copied = { + "step00_preconditions.json": "steps/step00_preconditions.json", + SNAPSHOT_FILENAME: SNAPSHOT_FILENAME, + } + rv = _pz2._write_reuse_marker( + tmp_path, + prev_run_id="20260524_010101_phase_z2", + copied_artifacts=copied, + ) + assert rv == tmp_path / _pz2.REUSE_MARKER_FILENAME + marker = json.loads(rv.read_text(encoding="utf-8")) + assert marker["schema_version"] == _pz2.REUSE_MARKER_SCHEMA_VERSION + assert marker["reuse_from_prev_run_id"] == "20260524_010101_phase_z2" + assert marker["snapshot_filename"] == SNAPSHOT_FILENAME + + +def test_write_reuse_marker_records_copied_artifacts_and_boundary( + tmp_path: Path, +): + copied = { + fname: f"steps/{fname}" for fname in _pz2._REUSE_STEP_ARTIFACTS + } + copied[SNAPSHOT_FILENAME] = SNAPSHOT_FILENAME + _pz2._write_reuse_marker( + tmp_path, + prev_run_id="20260524_010101_phase_z2", + copied_artifacts=copied, + ) + marker = json.loads( + (tmp_path / _pz2.REUSE_MARKER_FILENAME).read_text(encoding="utf-8") + ) + assert marker["copied_artifacts"] == copied + assert marker["boundary_steps"] == list(_pz2._REUSE_STEP_ARTIFACTS) + assert marker["resume_at_step"] == 7 + + +# -- module surface anchors ----------------------------------------------- + + +def test_pipeline_exposes_all_u4_helpers(): + """u5 wires these into ``run_phase_z2_mvp1`` — they must remain + module-level callable surface on ``phase_z2_pipeline``.""" + for name in ( + "_resolve_reuse_from_prev_run_dir", + "_copy_reuse_artifacts_from_prev_run", + "_load_and_validate_reuse_snapshot", + "_rehydrate_mdx_sections_from_snapshot", + "_rehydrate_composition_units_from_snapshot", + "_write_reuse_marker", + "_RehydratedV4Candidate", + "_REUSE_STEP_ARTIFACTS", + "REUSE_MARKER_FILENAME", + "REUSE_MARKER_SCHEMA_VERSION", + ): + assert hasattr(_pz2, name), f"u4 surface missing: {name}" + + +def test_pipeline_run_signature_reuse_from_is_kw_only_optional_none(): + """u5 — ``reuse_from`` is now part of ``run_phase_z2_mvp1``'s public + signature. The kwarg MUST be keyword-only (after the ``*`` barrier), + default to ``None`` (so absent flag preserves the pre-u5 behaviour), + and sit alongside the existing override kwargs. The locked + ``until_u5`` regression has flipped — keep this assertion as the + forward-direction lock so future signature drift (e.g. a positional + promotion or a default change) trips loudly.""" + import inspect + + sig = inspect.signature(_pz2.run_phase_z2_mvp1) + assert "reuse_from" in sig.parameters, ( + "u5 must thread reuse_from into run_phase_z2_mvp1 — kwarg missing. " + f"current params: {list(sig.parameters)}" + ) + param = sig.parameters["reuse_from"] + assert param.kind is inspect.Parameter.KEYWORD_ONLY, ( + f"reuse_from must be keyword-only (after the ``*`` barrier); " + f"got kind={param.kind}" + ) + assert param.default is None, ( + f"reuse_from must default to None to preserve pre-u5 behaviour; " + f"got default={param.default!r}" + ) diff --git a/tests/test_phase_z2_reuse_from_equivalence_sweep.py b/tests/test_phase_z2_reuse_from_equivalence_sweep.py new file mode 100644 index 0000000..42b2f61 --- /dev/null +++ b/tests/test_phase_z2_reuse_from_equivalence_sweep.py @@ -0,0 +1,261 @@ +"""IMP-43 (#72) u7b — Opt-in sweep equivalence test for full rerun vs +``--reuse-from`` across 3 layouts × 3 mdx samples × per-baseline frame pins. + +u7b scope (per the Stage 2 Exit Report): + + * Three mdx samples — ``01.mdx``, ``02.mdx``, ``03.mdx`` (the baseline + full run for each must exit 0 to give step13 equivalence something + to compare; ``04.mdx`` / ``05.mdx`` are deliberately excluded per + the u7a docstring — adapter_needed / EMPTY_SHELL_NO_CONTENT). + * Three ``--override-layout`` axes — ``None`` (auto), ``horizontal-2``, + ``vertical-2``. ``None`` exercises the natural layout for that mdx; + the explicit pins exercise the layout-locked branch (Step 7-B + ``select_layout_preset`` honors ``--override-layout`` per + ``src/phase_z2_pipeline.py:5210``). The reuse path (C) inherits the + locked layout via the Step 6 snapshot ``layout_preset_pre_override`` + (u2) — it MUST NOT pass ``--override-layout`` itself (u1 fail-closed + guard at ``src/phase_z2_pipeline.py:8181-8199`` rejects layout + overrides combined with ``--reuse-from``). + * "All 32 frames" coverage axis — each test case discovers ALL pinnable + ``(unit_id, frame_template_id)`` pairs from its baseline ``step06_ + composition_plan.json`` and uses every pin in (B) and (C). Union of + pins across the 9 (mdx, layout) cases approximates the V4 catalog + coverage; pure Cartesian 3×3×32 = 288 parametrize combos × 3 + subprocess runs ≈ 864 pipeline runs is impractical even opt-in. + +Three subprocess pipeline runs per case (same shape as u7a): + (A) baseline full run — no frame overrides — reuse seed. + (B) full rerun with the discovered frame overrides — independent + control path that does NOT touch ``--reuse-from``. + (C) ``--reuse-from `` with the same frame overrides — the + reuse path. + +Assert: ``step13_render.json`` from (B) and (C) is byte-equal modulo the +Stage 2 whitelist (only ``run_id`` substring inside +``data.final_html_path`` is normalized — see u7a docstring for the full +whitelist rationale). + +Opt-in: + * ``@pytest.mark.sweep`` — marker registered in ``pyproject.toml``. + Default CI must run ``pytest -m 'not sweep'``; explicit opt-in is + ``pytest -m sweep tests/test_phase_z2_reuse_from_equivalence_sweep.py``. + * If an mdx / layout combo's baseline (A) returns non-zero (e.g., a + layout pin incompatible with the mdx's natural unit_count produces + a pipeline error), the case is skipped — u7b is a reuse-equivalence + test, not a baseline-correctness test (those live elsewhere). + +Persisted ``data/user_overrides/.json`` isolation: + IMP-52 (#80) u2 introduced an MDX-keyed persistence fallback at + ``src/phase_z2_pipeline.py:8075-8168`` that merges the on-disk file + into the subprocess overrides regardless of CLI flags. For mdx stems + whose persistence file carries non-frame axes (e.g., + ``data/user_overrides/03.json`` holds ``layout`` + ``zone_geometries``), + two orthogonality problems break u7b: + + 1. (A) and (B) absorb the persisted ``layout`` / ``zone_geometries`` + independent of the ``layout_pin`` parameter, collapsing the test + matrix — the parametrized layout axis stops being a real axis. + 2. (C) on the reuse path receives the persisted non-frame axes via + the same merge, which the u1 fail-closed guard at + ``src/phase_z2_pipeline.py:8181-8199`` rejects with exit code 2 + before step13 equivalence can be measured. + + The ``_isolated_persisted_overrides`` context manager renames the + persistence file out of the way for the duration of each parametrized + case (try/finally restore; crash-resistant via a startup recovery + branch). The hidden backup filename starts with ``.`` so + ``user_overrides_io.validate_key`` (``src/user_overrides_io.py:72``) + cannot accidentally re-load it mid-run. The pipeline subprocess does + not write the persistence file (writes are gated to the Vite + ``/api/user-overrides`` endpoint), so the rename is safe across the + three subprocess spawns. The real-world reuse-from × persistence + interaction (where ``--reuse-from`` should arguably suppress + non-frame persistence injection rather than fail closed) is a + follow-up issue candidate, surfaced in this unit's unit_executed + Gitea comment. +""" +from __future__ import annotations + +import contextlib +import json +import os +import subprocess +import sys +import uuid +from pathlib import Path + +import pytest + +from tests.test_phase_z2_reuse_from_equivalence_unit import ( + _assert_run_ok, + _frame_override_args, + _normalize_step13, + _read_step_artifact, + _spawn_pipeline, +) + +REPO_ROOT = Path(__file__).resolve().parents[1] +SAMPLES_DIR = REPO_ROOT / "samples" / "mdx_batch" +RUNS_DIR = REPO_ROOT / "data" / "runs" +OVERRIDES_DIR = REPO_ROOT / "data" / "user_overrides" + +MDX_FILES = ("01.mdx", "02.mdx", "03.mdx") +LAYOUT_PINS = (None, "horizontal-2", "vertical-2") + + +def _unique(prefix: str) -> str: + return f"{prefix}_imp43_u7b_{uuid.uuid4().hex[:8]}" + + +@contextlib.contextmanager +def _isolated_persisted_overrides(mdx_name: str): + """Temporarily rename ``data/user_overrides/.json`` so the + three subprocess runs see a clean persistence state. + + Rationale: see module docstring "Persisted ... isolation" section. + The pipeline reads the file at + ``src/phase_z2_pipeline.py:8098`` via ``load(key)`` which resolves + to ``DEFAULT_OVERRIDES_ROOT`` (``src/user_overrides_io.py:54``); + moving the file out of the way reduces ``load(key) -> {}`` and + prevents the merge from injecting persisted axes. + + Crash recovery: a prior run that crashed between rename and + restore would leave ``..imp43_u7b_isolation.bak`` next to + the missing ``.json``. The recovery branch at startup + restores the backup before proceeding so we never lose the + original on a second invocation. + """ + stem = Path(mdx_name).stem + src = OVERRIDES_DIR / f"{stem}.json" + backup = OVERRIDES_DIR / f".{stem}.imp43_u7b_isolation.bak" + if backup.is_file() and not src.is_file(): + os.replace(backup, src) + moved = False + if src.is_file(): + OVERRIDES_DIR.mkdir(parents=True, exist_ok=True) + os.replace(src, backup) + moved = True + try: + yield + finally: + if moved and backup.is_file(): + os.replace(backup, src) + + +def _discover_all_frame_pins(seed_run_id: str) -> list[tuple[str, str]]: + """Discover ALL ``(unit_id, frame_template_id)`` pins from baseline plan. + + Unlike u7a (capped at 2 for fast CI), u7b uses every pin so the sweep + naturally exercises the union of frame templates produced across the + 9 (mdx, layout) cases — the practical realization of the Stage 2 + plan's "all 32 frames" axis (full Cartesian 3×3×32 would be 288×3 = + 864 pipeline runs; impractical even opt-in). + + Schema source: ``src/phase_z2_pipeline.py:5530-5560`` — step06 artifact + emits ``data.selected_units[*].{source_section_ids, frame_template_id}``; + ``unit_id = "+".join(source_section_ids)`` per the ``--override-frame`` + contract documented at ``src/phase_z2_pipeline.py:7827-7832``. + """ + step06 = _read_step_artifact(seed_run_id, "step06_composition_plan.json") + selected_units = step06.get("data", {}).get("selected_units") or [] + pins: list[tuple[str, str]] = [] + for u in selected_units: + sids = u.get("source_section_ids") or [] + tpl_id = u.get("frame_template_id") + if not isinstance(sids, list) or not sids: + continue + if not isinstance(tpl_id, str) or not tpl_id: + continue + unit_id = "+".join(str(s) for s in sids) + if unit_id: + pins.append((unit_id, tpl_id)) + return pins + + +@pytest.mark.sweep +@pytest.mark.parametrize("layout_pin", LAYOUT_PINS) +@pytest.mark.parametrize("mdx_name", MDX_FILES) +def test_full_rerun_vs_reuse_from_step13_equivalence_sweep( + mdx_name: str, layout_pin: str | None +) -> None: + """Stage 2 §u7b binding contract: across the (mdx × layout) sweep, + full rerun (B) with discovered frame overrides and ``--reuse-from`` + (C) with the same overrides yield byte-equal ``step13_render.json`` + modulo the u7a whitelist. + + Skip semantics: if baseline (A) fails for a (mdx, layout) combo + (e.g., layout pin incompatible with mdx unit_count), the case is + skipped — baseline correctness is not the equivalence axis under + test here. + """ + mdx_path = SAMPLES_DIR / mdx_name + if not mdx_path.is_file(): + pytest.skip(f"sample missing: {mdx_path}") + + layout_args: list[str] = ( + [] if layout_pin is None else ["--override-layout", layout_pin] + ) + + # Isolate any persisted ``data/user_overrides/.json`` for this + # mdx before spawning the three subprocesses; see module docstring + # "Persisted ... isolation" section for the orthogonality and + # fail-closed-guard rationale. + with _isolated_persisted_overrides(mdx_name): + # (A) baseline full run — no frame overrides — reuse seed. + seed_id = _unique("seed") + cp_a = _spawn_pipeline([str(mdx_path), seed_id, *layout_args]) + if cp_a.returncode != 0: + pytest.skip( + f"baseline (A) non-zero for mdx={mdx_name} layout={layout_pin} " + f"(returncode={cp_a.returncode}); not a reuse-equivalence axis. " + f"stderr tail: {cp_a.stderr[-400:]}" + ) + + pins = _discover_all_frame_pins(seed_id) + if not pins: + pytest.skip( + f"no pinnable (unit_id, frame_template_id) pairs in baseline " + f"step06 for mdx={mdx_name} layout={layout_pin}; nothing to " + f"exercise on the override-frame surface" + ) + override_args = _frame_override_args(pins) + + # (B) full rerun with the discovered frame overrides — independent control. + full_id = _unique("full") + cp_b = _spawn_pipeline([str(mdx_path), full_id, *layout_args, *override_args]) + _assert_run_ok( + f"full rerun (B) mdx={mdx_name} layout={layout_pin} pins={len(pins)}", + cp_b, + ) + + # (C) --reuse-from seed with the same frame overrides — reuse path. + # NOTE: must NOT pass --override-layout here — u1 fail-closed guard + # rejects layout+reuse combination. Layout is restored from the Step 6 + # snapshot (u2 layout_preset_pre_override) instead. + reuse_id = _unique("reuse") + cp_c = _spawn_pipeline([ + str(mdx_path), + reuse_id, + "--reuse-from", seed_id, + *override_args, + ]) + _assert_run_ok( + f"reuse rerun (C) mdx={mdx_name} layout={layout_pin} pins={len(pins)}", + cp_c, + ) + + # Step 13 equivalence — apply whitelist + compare byte-for-byte. + full_step13 = _read_step_artifact(full_id, "step13_render.json") + reuse_step13 = _read_step_artifact(reuse_id, "step13_render.json") + full_norm = _normalize_step13(full_step13, full_id) + reuse_norm = _normalize_step13(reuse_step13, reuse_id) + + assert full_norm == reuse_norm, ( + f"step13_render.json equivalence violated for IMP-43 #72 u7b " + f"(mdx={mdx_name}, layout={layout_pin}, full={full_id}, " + f"reuse={reuse_id}, seed={seed_id}, pins={pins}):\n" + f"--- full (normalized) ---\n" + f"{json.dumps(full_norm, ensure_ascii=False, indent=2)}\n" + f"--- reuse (normalized) ---\n" + f"{json.dumps(reuse_norm, ensure_ascii=False, indent=2)}" + ) diff --git a/tests/test_phase_z2_reuse_from_equivalence_unit.py b/tests/test_phase_z2_reuse_from_equivalence_unit.py new file mode 100644 index 0000000..f3410f2 --- /dev/null +++ b/tests/test_phase_z2_reuse_from_equivalence_unit.py @@ -0,0 +1,204 @@ +"""IMP-43 (#72) u7a — Fast CI equivalence test for full rerun vs ``--reuse-from``. + +u7a scope (per the Stage 2 Exit Report): + + * One mdx (``samples/mdx_batch/02.mdx``), one layout (auto), two + ``--override-frame`` pins self-discovered from the baseline's + ``step06_composition_plan.json`` (each pin re-states the unit's + own ``frame_template_id`` — semantically a no-op, but it + exercises the full ``--override-frame`` CLI surface through both + paths, satisfying the "two frames" axis of the Stage 2 plan). + * Three subprocess pipeline runs: + (A) baseline full run — no overrides — reuse seed + (B) full rerun with the two ``--override-frame`` pins — the + independent control path that does NOT touch ``--reuse-from`` + (C) ``--reuse-from `` with the same two + ``--override-frame`` pins — the reuse path + * Assert: ``step13_render.json`` from (B) and (C) is byte-equal modulo + the Stage 2 whitelist — only ``run_id`` (as a substring of + ``data.final_html_path``), ``timestamps``, and ``prev_run_id`` may + legitimately differ. ``step13_render.json`` has no timestamps and + no ``prev_run_id`` field (the latter surfaces via the separate + ``_reuse_marker.json`` sidecar instead — out of scope for this + step13 equivalence axis), so the only effective normalization + target is the ``run_id`` substring inside ``data.final_html_path``. + +Per Stage 2 plan: the sweep equivalence coverage (3 layouts × 3 mdx × +all 32 frames) lives in u7b under ``pytest.mark.sweep`` — u7a stays +fast (3 pipeline runs on a single small mdx) so it can run in default +CI without an opt-in marker. + +Why mdx02: + * ``test_pipeline_smoke_imp85.py::test_non_vp_smoke_runs_clean`` already + pins mdx02 as a non-VP exit-0 path (the baseline (A) run must + exit 0 for the equivalence axis to even have something to + compare against). + * mdx04 / mdx05 are deliberately excluded — mdx04 routes zones to + ``adapter_needed`` per IMP-#85 u1 and mdx05 exits 1 with + ``EMPTY_SHELL_NO_CONTENT`` per IMP-#87 u3, neither of which gives + a stable step13 equivalence surface for a fast CI lock. +""" +from __future__ import annotations + +import json +import subprocess +import sys +import uuid +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[1] +SAMPLES_DIR = REPO_ROOT / "samples" / "mdx_batch" +RUNS_DIR = REPO_ROOT / "data" / "runs" +MDX_FILENAME = "02.mdx" + + +def _unique_run_id(prefix: str) -> str: + return f"{prefix}_imp43_u7a_{uuid.uuid4().hex[:8]}" + + +def _spawn_pipeline(extra_args: list[str], timeout: int = 600) -> subprocess.CompletedProcess: + """Spawn ``python -m src.phase_z2_pipeline `` and capture I/O.""" + return subprocess.run( + [sys.executable, "-m", "src.phase_z2_pipeline", *extra_args], + capture_output=True, + text=True, + timeout=timeout, + cwd=str(REPO_ROOT), + ) + + +def _assert_run_ok(label: str, cp: subprocess.CompletedProcess) -> None: + assert cp.returncode == 0, ( + f"{label} pipeline returncode={cp.returncode}\n" + f"--- stderr tail ---\n{cp.stderr[-2000:]}\n" + f"--- stdout tail ---\n{cp.stdout[-2000:]}" + ) + + +def _read_step_artifact(run_id: str, fname: str) -> dict: + p = RUNS_DIR / run_id / "phase_z2" / "steps" / fname + assert p.is_file(), f"missing artifact: {p}" + return json.loads(p.read_text(encoding="utf-8")) + + +def _discover_two_frame_pins(seed_run_id: str) -> list[tuple[str, str]]: + """Self-discover two ``(unit_id, frame_template_id)`` pins from the + baseline's ``step06_composition_plan.json``. + + Schema source: ``src/phase_z2_pipeline.py`` ~L5530-L5560 — the step06 + artifact emits ``data.selected_units[*].{source_section_ids, + frame_template_id}``. ``unit_id`` is derived as + ``"+".join(source_section_ids)`` per the + ``--override-frame UNIT_ID=TEMPLATE_ID`` contract documented at + ``src/phase_z2_pipeline.py:7827-7832`` and computed by ``_unit_id`` + at ``src/phase_z2_pipeline.py:2328``. Pinning the unit's own + template is a no-op semantically but exercises the + ``--override-frame`` CLI surface end-to-end in both (B) and (C). + """ + step06 = _read_step_artifact(seed_run_id, "step06_composition_plan.json") + selected_units = step06.get("data", {}).get("selected_units") or [] + pinnable: list[tuple[str, str]] = [] + for u in selected_units: + sids = u.get("source_section_ids") or [] + tpl_id = u.get("frame_template_id") + if not isinstance(sids, list) or not sids: + continue + if not isinstance(tpl_id, str) or not tpl_id: + continue + unit_id = "+".join(str(s) for s in sids) + if not unit_id: + continue + pinnable.append((unit_id, tpl_id)) + if len(pinnable) >= 2: + break + assert len(pinnable) >= 2, ( + f"baseline {seed_run_id} step06_composition_plan.json must expose " + f">= 2 (unit_id, frame_template_id) pairs for the u7a two-frames " + f"axis; got {pinnable}" + ) + return pinnable + + +def _frame_override_args(pins: list[tuple[str, str]]) -> list[str]: + out: list[str] = [] + for unit_id, tpl_id in pins: + out.extend(["--override-frame", f"{unit_id}={tpl_id}"]) + return out + + +def _normalize_step13(payload: dict, run_id: str) -> dict: + """Apply the Stage 2 equivalence whitelist to step13_render.json. + + Whitelist axes (Stage 2 plan §u7a): + * ``run_id`` — appears only as a substring of + ``data.final_html_path`` in the step13 schema + (``src/phase_z2_pipeline.py:7174-7192``). + * ``timestamps`` — ``_write_step_artifact`` + (``src/phase_z2_pipeline.py:3826``) does not + stamp a timestamp on the payload, so no + normalization is needed for this axis. + * ``prev_run_id`` — surfaces via ``_reuse_marker.json`` (separate + sidecar), NOT via step13_render.json. No + normalization needed on the step13 surface. + + Returns a deep copy of ``payload`` with the ``run_id`` substring of + ``data.final_html_path`` replaced by the sentinel ```` so + the (B) and (C) step13 payloads can be compared byte-for-byte. + """ + normalized = json.loads(json.dumps(payload, ensure_ascii=False)) + data = normalized.get("data") + if isinstance(data, dict): + fhp = data.get("final_html_path") + if isinstance(fhp, str) and run_id in fhp: + data["final_html_path"] = fhp.replace(run_id, "") + return normalized + + +def test_full_rerun_vs_reuse_from_step13_equivalence_one_mdx_two_frames() -> None: + """Stage 2 §u7a binding contract: full rerun (B) with two + ``--override-frame`` pins and ``--reuse-from`` (C) with the same + pins yield byte-equal ``step13_render.json`` modulo the whitelist. + """ + mdx_path = SAMPLES_DIR / MDX_FILENAME + assert mdx_path.is_file(), f"sample missing: {mdx_path}" + + # (A) baseline full run — no overrides — reuse seed. + seed_id = _unique_run_id("seed") + cp_a = _spawn_pipeline([str(mdx_path), seed_id]) + _assert_run_ok("baseline (A)", cp_a) + + # Self-discover two (unit_id, frame_template_id) pins. + pins = _discover_two_frame_pins(seed_id) + override_args = _frame_override_args(pins) + + # (B) full rerun with the two frame overrides — independent control. + full_id = _unique_run_id("full") + cp_b = _spawn_pipeline([str(mdx_path), full_id, *override_args]) + _assert_run_ok("full rerun (B)", cp_b) + + # (C) --reuse-from seed with the same frame overrides — reuse path. + reuse_id = _unique_run_id("reuse") + cp_c = _spawn_pipeline([ + str(mdx_path), + reuse_id, + "--reuse-from", seed_id, + *override_args, + ]) + _assert_run_ok("reuse rerun (C)", cp_c) + + # Step 13 equivalence — apply whitelist + compare byte-for-byte. + full_step13 = _read_step_artifact(full_id, "step13_render.json") + reuse_step13 = _read_step_artifact(reuse_id, "step13_render.json") + full_norm = _normalize_step13(full_step13, full_id) + reuse_norm = _normalize_step13(reuse_step13, reuse_id) + + assert full_norm == reuse_norm, ( + "step13_render.json equivalence violated for IMP-43 #72 u7a " + f"(full={full_id}, reuse={reuse_id}, seed={seed_id}, pins={pins}):\n" + f"--- full (normalized) ---\n" + f"{json.dumps(full_norm, ensure_ascii=False, indent=2)}\n" + f"--- reuse (normalized) ---\n" + f"{json.dumps(reuse_norm, ensure_ascii=False, indent=2)}" + ) diff --git a/tests/test_phase_z2_reuse_from_fail_closed.py b/tests/test_phase_z2_reuse_from_fail_closed.py new file mode 100644 index 0000000..bb18b8c --- /dev/null +++ b/tests/test_phase_z2_reuse_from_fail_closed.py @@ -0,0 +1,748 @@ +"""IMP-43 (#72) u4b — fail-closed wrapper tests for ``--reuse-from``. + +u4b scope (per the Stage 2 Exit Report): + +- Translate the u4 raise surface (``FileNotFoundError`` / + ``SnapshotValidationError`` / ``json.JSONDecodeError`` / ``OSError``) + into the CLI fail-closed contract: stderr message + ``sys.exit(2)``. +- Add the ``prev_run_dir == new_run_dir`` accidental-write guard BEFORE + any copy attempt (prev_run_dir must stay read-only). +- Add the missing-prev-run-dir surface (clean axis, not raw stack). +- Surface ``mdx_sha256 mismatch`` as its OWN axis (distinct from + generic snapshot validation failures). + +The signature threading + the in-``run_phase_z2_mvp1`` branch that +invokes the wrapper land in u5. u4b adds the wrapper function only. + +Tested surface (``src/phase_z2_pipeline.py``): + * ``execute_reuse_from_or_fail_closed`` + * ``_abort_reuse_from`` + * ``_paths_equivalent`` + * ``REUSE_FAIL_CLOSED_AXES`` (closed enum) +""" +from __future__ import annotations + +import hashlib +import json +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + +import pytest + +import src.phase_z2_pipeline as _pz2 +from src.phase_z2_reuse_snapshot import ( + SNAPSHOT_FILENAME, + SNAPSHOT_VERSION, + build_snapshot, +) + + +# -- synthetic snapshot inputs (mirror u4 test fixture) ------------------ + + +@dataclass +class _Section: + section_id: str + section_num: int + title: str + raw_content: str + heading_number: Optional[str] = None + v4_alias_keys: list = field(default_factory=list) + sub_sections: list = field(default_factory=list) + + +@dataclass +class _V4Candidate: + template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + + +@dataclass +class _Unit: + source_section_ids: list + merge_type: str + frame_template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + phase_z_status: str + raw_content: str + title: str + score: float + v4_rank: Optional[int] = 1 + selection_path: str = "rank_1" + fallback_reason: Optional[str] = None + rationale: dict = field(default_factory=dict) + auto_selectable: bool = True + filter_reasons: list = field(default_factory=list) + notes: list = field(default_factory=list) + v4_candidates: list = field(default_factory=list) + provisional: bool = False + + +def _mdx_text() -> str: + return "# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n" + + +def _build_canonical_snapshot(*, mdx_source_text: Optional[str] = None) -> dict: + text = mdx_source_text if mdx_source_text is not None else _mdx_text() + cand = _V4Candidate( + template_id="tpl_a", + frame_id="fid_a", + frame_number=13, + confidence=0.91, + label="use_as_is", + ) + section = _Section( + section_id="03-1", + section_num=1, + title="DX status", + raw_content="- bullet one\n- bullet two", + heading_number="3.1", + v4_alias_keys=["03-1.1"], + ) + unit = _Unit( + source_section_ids=["03-1"], + merge_type="single", + frame_template_id="tpl_a", + frame_id="fid_a", + frame_number=13, + confidence=0.91, + label="use_as_is", + phase_z_status="auto_renderable", + raw_content="- bullet one\n- bullet two", + title="DX status", + score=0.91, + v4_candidates=[cand], + ) + return build_snapshot( + mdx_sha256=hashlib.sha256(text.encode("utf-8")).hexdigest(), + slide_title="Slide", + slide_footer=None, + sections=[section], + stage0_adapter_diagnostics={"used": True, "fallback_reason": None}, + stage0_normalized_assets={"popups": [], "images": [], "tables": []}, + v4_evidence=[], + layout_preset_pre_override="single", + units=[unit], + comp_debug={}, + v4_fallback_traces={}, + ai_preflight={"enabled": False, "skipped": True}, + ) + + +def _seed_prev_run_dir(prev_run_dir: Path, *, snapshot: dict) -> None: + (prev_run_dir / "steps").mkdir(parents=True, exist_ok=True) + for fname in _pz2._REUSE_STEP_ARTIFACTS: + (prev_run_dir / "steps" / fname).write_text( + f'{{"name": "{fname}"}}' + if fname.endswith(".json") + else "raw mdx body bytes", + encoding="utf-8", + ) + (prev_run_dir / SNAPSHOT_FILENAME).write_text( + json.dumps(snapshot, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + + +# -- REUSE_FAIL_CLOSED_AXES vocab lock ------------------------------------ + + +def test_fail_closed_axes_is_closed_enum(): + """The nine axes are the entire fail-closed vocabulary; if a new + axis lands without test coverage update, this lock breaks. + + ``reuse_copy_os_error`` / ``snapshot_read_os_error`` were added in + the Codex #6 stage_3_edit rewind to cover OSError != FNF that the + earlier u4b implementation let escape as a raw traceback. + """ + assert _pz2.REUSE_FAIL_CLOSED_AXES == frozenset({ + "prev_run_dir_missing", + "prev_run_dir_equals_new_run_dir", + "reuse_artifact_missing", + "reuse_copy_os_error", + "snapshot_missing_after_copy", + "snapshot_corrupt_json", + "snapshot_read_os_error", + "mdx_sha256_mismatch", + "snapshot_validation_failed", + }) + + +# -- _abort_reuse_from ----------------------------------------------------- + + +def test_abort_reuse_from_exits_with_code_two(capsys): + with pytest.raises(SystemExit) as ei: + _pz2._abort_reuse_from( + axis="prev_run_dir_missing", + value="never_existed", + path="D:/nope", + upstream="--reuse-from CLI argument", + ) + assert ei.value.code == 2 + + +def test_abort_reuse_from_stderr_contains_value_path_upstream(capsys): + with pytest.raises(SystemExit): + _pz2._abort_reuse_from( + axis="prev_run_dir_missing", + value="never_existed", + path="D:/nope", + upstream="--reuse-from CLI argument", + ) + err = capsys.readouterr().err + assert "prev_run_dir_missing" in err + assert "value:" in err + assert "path:" in err + assert "upstream:" in err + assert "never_existed" in err + assert "D:/nope" in err + assert "--reuse-from CLI argument" in err + + +def test_abort_reuse_from_includes_reason_when_exc_passed(capsys): + """The optional ``exc`` field surfaces the underlying type + + message so operators can distinguish e.g. JSONDecodeError line/col + info from a generic 'snapshot broken'.""" + try: + raise ValueError("schema_version mismatch: expected 1, got 99") + except ValueError as exc: + with pytest.raises(SystemExit): + _pz2._abort_reuse_from( + axis="snapshot_validation_failed", + value=str(exc), + path="D:/some/path", + upstream="validate_snapshot", + exc=exc, + ) + err = capsys.readouterr().err + assert "reason:" in err + assert "ValueError" in err + assert "schema_version mismatch" in err + + +def test_abort_reuse_from_rejects_unknown_axis(): + """Unknown axis = programmer error, not user error; must trip + AssertionError, not silently emit a malformed stderr line.""" + with pytest.raises(AssertionError): + _pz2._abort_reuse_from( + axis="totally_made_up_axis", + value="x", + path="y", + upstream="z", + ) + + +# -- _paths_equivalent ----------------------------------------------------- + + +def test_paths_equivalent_same_path_returns_true(tmp_path: Path): + a = tmp_path / "x" / "y" + a.mkdir(parents=True) + assert _pz2._paths_equivalent(a, a) is True + + +def test_paths_equivalent_different_paths_returns_false(tmp_path: Path): + a = tmp_path / "alpha" + b = tmp_path / "beta" + a.mkdir() + b.mkdir() + assert _pz2._paths_equivalent(a, b) is False + + +def test_paths_equivalent_handles_nonexistent_paths(tmp_path: Path): + """``Path.resolve(strict=False)`` should still normalize ``..`` + even when the leaf does not yet exist (new_run_dir before mkdir).""" + a = tmp_path / "new_run" / "phase_z2" + b = tmp_path / "new_run" / "phase_z2" + assert _pz2._paths_equivalent(a, b) is True + + +# -- execute_reuse_from_or_fail_closed: happy path ----------------------- + + +def test_happy_path_returns_prev_run_dir_copied_snapshot( + tmp_path: Path, monkeypatch +): + text = _mdx_text() + runs_root = tmp_path / "runs" + prev_run_id = "prev_run_id_001" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + new_run_dir = tmp_path / "new" / "phase_z2" + + snap = _build_canonical_snapshot(mdx_source_text=text) + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + rv = _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=text, + ) + prev_dir_ret, copied_ret, snap_ret = rv + + assert prev_dir_ret == prev_run_dir + assert SNAPSHOT_FILENAME in copied_ret + assert snap_ret["schema_version"] == SNAPSHOT_VERSION + # snapshot wrapper survives (value/source_path/upstream_step) + assert snap_ret["slide_title"]["value"] == "Slide" + + +# -- prev_run_dir_missing axis -------------------------------------------- + + +def test_prev_run_dir_missing_aborts(tmp_path: Path, monkeypatch, capsys): + runs_root = tmp_path / "runs" + runs_root.mkdir() + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + with pytest.raises(SystemExit) as ei: + _pz2.execute_reuse_from_or_fail_closed( + reuse_from="does_not_exist_anywhere", + new_run_dir=tmp_path / "new" / "phase_z2", + mdx_source_text=_mdx_text(), + ) + assert ei.value.code == 2 + err = capsys.readouterr().err + assert "prev_run_dir_missing" in err + assert "does_not_exist_anywhere" in err + + +# -- prev_run_dir_equals_new_run_dir axis --------------------------------- + + +def test_prev_run_dir_equals_new_run_dir_aborts( + tmp_path: Path, monkeypatch, capsys +): + """Accidental collision: if the new run_id resolves to the same + phase_z2 dir as prev_run_id, the copy step would overwrite + prev_run_dir in place. u4b must reject BEFORE the copy attempt.""" + runs_root = tmp_path / "runs" + prev_run_id = "shared_run_id" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + # new_run_dir resolves to the SAME phase_z2 dir as prev_run_dir. + new_run_dir = prev_run_dir + + with pytest.raises(SystemExit) as ei: + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=_mdx_text(), + ) + assert ei.value.code == 2 + err = capsys.readouterr().err + assert "prev_run_dir_equals_new_run_dir" in err + + +def test_prev_run_dir_equals_new_run_dir_does_not_mutate_prev( + tmp_path: Path, monkeypatch +): + """Critical RO guarantee — the abort must fire BEFORE + ``_copy_reuse_artifacts_from_prev_run`` runs, so the seeded prev + artifact bytes survive untouched.""" + runs_root = tmp_path / "runs" + prev_run_id = "shared_run_id" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + sentinel_text = '{"name": "step02_normalized.json"}' + target = prev_run_dir / "steps" / "step02_normalized.json" + assert target.read_text(encoding="utf-8") == sentinel_text + + with pytest.raises(SystemExit): + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=prev_run_dir, + mdx_source_text=_mdx_text(), + ) + # prev_run_dir bytes still intact. + assert target.read_text(encoding="utf-8") == sentinel_text + + +# -- reuse_artifact_missing axis ------------------------------------------ + + +def test_reuse_artifact_missing_aborts(tmp_path: Path, monkeypatch, capsys): + runs_root = tmp_path / "runs" + prev_run_id = "prev_run_001" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + new_run_dir = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + # Remove one required step file → triggers FileNotFoundError in + # _copy_reuse_artifacts_from_prev_run. + (prev_run_dir / "steps" / "step05_v4_evidence.json").unlink() + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + with pytest.raises(SystemExit) as ei: + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=_mdx_text(), + ) + assert ei.value.code == 2 + err = capsys.readouterr().err + assert "reuse_artifact_missing" in err + assert "step05_v4_evidence.json" in err + assert "reason:" in err + assert "FileNotFoundError" in err + + +def test_reuse_artifact_missing_snapshot_sidecar( + tmp_path: Path, monkeypatch, capsys +): + runs_root = tmp_path / "runs" + prev_run_id = "prev_run_002" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + new_run_dir = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + (prev_run_dir / SNAPSHOT_FILENAME).unlink() + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + with pytest.raises(SystemExit) as ei: + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=_mdx_text(), + ) + assert ei.value.code == 2 + err = capsys.readouterr().err + assert "reuse_artifact_missing" in err + assert SNAPSHOT_FILENAME in err + + +# -- snapshot_corrupt_json axis ------------------------------------------- + + +def test_snapshot_corrupt_json_aborts(tmp_path: Path, monkeypatch, capsys): + runs_root = tmp_path / "runs" + prev_run_id = "prev_run_corrupt" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + new_run_dir = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + # Overwrite the snapshot with invalid JSON; copy will succeed, + # validate_snapshot will fail with JSONDecodeError (raised inside + # _load_and_validate_reuse_snapshot before validate_snapshot). + (prev_run_dir / SNAPSHOT_FILENAME).write_text( + "{ not valid json", encoding="utf-8" + ) + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + with pytest.raises(SystemExit) as ei: + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=_mdx_text(), + ) + assert ei.value.code == 2 + err = capsys.readouterr().err + assert "snapshot_corrupt_json" in err + assert SNAPSHOT_FILENAME in err + assert "JSONDecodeError" in err + + +# -- mdx_sha256_mismatch axis (own surface) ------------------------------- + + +def test_mdx_sha256_mismatch_aborts_with_own_axis( + tmp_path: Path, monkeypatch, capsys +): + """Distinct from generic snapshot_validation_failed — operator + must be able to tell 'wrong --mdx-path for this prev_run_id' apart + from 'snapshot file is broken'.""" + runs_root = tmp_path / "runs" + prev_run_id = "prev_run_diff_mdx" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + new_run_dir = tmp_path / "new" / "phase_z2" + + text_a = "# Slide A\n" + text_b = "# Slide B (different bytes)\n" + snap = _build_canonical_snapshot(mdx_source_text=text_a) + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + with pytest.raises(SystemExit) as ei: + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=text_b, + ) + assert ei.value.code == 2 + err = capsys.readouterr().err + assert "mdx_sha256_mismatch" in err + # Must NOT be reported as generic snapshot_validation_failed — + # the mdx-sha case has its own axis. + assert "snapshot_validation_failed" not in err + assert "mdx_source_text" in err or "mdx_sha256" in err + + +# -- snapshot_validation_failed axis -------------------------------------- + + +def test_snapshot_validation_failed_schema_version_aborts( + tmp_path: Path, monkeypatch, capsys +): + runs_root = tmp_path / "runs" + prev_run_id = "prev_run_schema_mismatch" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + new_run_dir = tmp_path / "new" / "phase_z2" + + text = _mdx_text() + snap = _build_canonical_snapshot(mdx_source_text=text) + snap["schema_version"] = SNAPSHOT_VERSION + 1 + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + with pytest.raises(SystemExit) as ei: + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=text, + ) + assert ei.value.code == 2 + err = capsys.readouterr().err + assert "snapshot_validation_failed" in err + assert "schema_version" in err + # NOT the mdx-sha axis — separate fingerprint. + assert "mdx_sha256_mismatch" not in err + + +def test_snapshot_validation_failed_missing_required_key_aborts( + tmp_path: Path, monkeypatch, capsys +): + runs_root = tmp_path / "runs" + prev_run_id = "prev_run_missing_key" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + new_run_dir = tmp_path / "new" / "phase_z2" + + text = _mdx_text() + snap = _build_canonical_snapshot(mdx_source_text=text) + del snap["units"] + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + with pytest.raises(SystemExit) as ei: + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=text, + ) + assert ei.value.code == 2 + err = capsys.readouterr().err + assert "snapshot_validation_failed" in err + assert "units" in err + + +# -- reuse_copy_os_error axis (OSError != FileNotFoundError) ------------- + + +def test_copy_os_error_aborts_with_own_axis( + tmp_path: Path, monkeypatch, capsys +): + """Codex #6 stage_3_edit fixup — OSError raised inside + ``_copy_reuse_artifacts_from_prev_run`` (e.g. PermissionError on + the destination, OSError(errno.EXDEV) on cross-device copy) must + translate to fail-closed (stderr + SystemExit(2)) instead of + escaping as a raw traceback. + + Implementation must catch ``FileNotFoundError`` BEFORE the bare + ``OSError`` handler (FNF is a subclass of OSError), otherwise the + missing-artifact case would be mis-bucketed under + ``reuse_copy_os_error``. + """ + runs_root = tmp_path / "runs" + prev_run_id = "prev_run_perm_denied" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + new_run_dir = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + def _raise_perm(src, dst, *args, **kwargs): + raise PermissionError(f"simulated permission denied: {dst}") + + monkeypatch.setattr(_pz2.shutil, "copyfile", _raise_perm) + + with pytest.raises(SystemExit) as ei: + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=_mdx_text(), + ) + assert ei.value.code == 2 + err = capsys.readouterr().err + assert "reuse_copy_os_error" in err + assert "value:" in err + assert "path:" in err + assert "upstream:" in err + assert "reason:" in err + assert "PermissionError" in err + assert "simulated permission denied" in err + # Must NOT be mis-bucketed as the missing-artifact case. + assert "reuse_artifact_missing" not in err + + +def test_copy_filenotfounderror_still_uses_artifact_missing_axis( + tmp_path: Path, monkeypatch, capsys +): + """Subclass ordering regression guard — ``FileNotFoundError`` IS an + ``OSError`` subclass. If the bare-OSError handler ever moves above + the FNF handler, the missing-artifact case would be mis-bucketed + under ``reuse_copy_os_error``; this test pins the dispatch. + """ + runs_root = tmp_path / "runs" + prev_run_id = "prev_run_fnf_ordering" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + new_run_dir = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + (prev_run_dir / "steps" / "step05_v4_evidence.json").unlink() + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + with pytest.raises(SystemExit): + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=_mdx_text(), + ) + err = capsys.readouterr().err + assert "reuse_artifact_missing" in err + assert "reuse_copy_os_error" not in err + + +# -- snapshot_read_os_error axis (OSError != FileNotFoundError) ---------- + + +def test_snapshot_read_os_error_aborts_with_own_axis( + tmp_path: Path, monkeypatch, capsys +): + """OSError raised inside ``_load_and_validate_reuse_snapshot`` + (e.g. PermissionError on ``Path.read_text``, IsADirectoryError if + the snapshot path resolves to a directory after copy) must + translate to fail-closed instead of escaping as a raw traceback. + """ + runs_root = tmp_path / "runs" + prev_run_id = "prev_run_snapshot_perm" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + new_run_dir = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + def _raise_perm(*args, **kwargs): + raise PermissionError("simulated read denied on snapshot") + + monkeypatch.setattr( + _pz2, "_load_and_validate_reuse_snapshot", _raise_perm + ) + + with pytest.raises(SystemExit) as ei: + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=_mdx_text(), + ) + assert ei.value.code == 2 + err = capsys.readouterr().err + assert "snapshot_read_os_error" in err + assert "value:" in err + assert "path:" in err + assert "upstream:" in err + assert "reason:" in err + assert "PermissionError" in err + assert "simulated read denied on snapshot" in err + # Must NOT be mis-bucketed as missing-after-copy or corrupt-json. + assert "snapshot_missing_after_copy" not in err + assert "snapshot_corrupt_json" not in err + + +def test_snapshot_filenotfounderror_still_uses_missing_after_copy_axis( + tmp_path: Path, monkeypatch, capsys +): + """Subclass ordering regression guard for the load surface — FNF + must keep its own ``snapshot_missing_after_copy`` axis even though + the new bare-OSError branch sits below it. + """ + runs_root = tmp_path / "runs" + prev_run_id = "prev_run_load_fnf_ordering" + prev_run_dir = runs_root / prev_run_id / "phase_z2" + new_run_dir = tmp_path / "new" / "phase_z2" + snap = _build_canonical_snapshot() + _seed_prev_run_dir(prev_run_dir, snapshot=snap) + monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root) + + def _raise_fnf(*args, **kwargs): + raise FileNotFoundError("simulated FNF on snapshot read") + + monkeypatch.setattr( + _pz2, "_load_and_validate_reuse_snapshot", _raise_fnf + ) + + with pytest.raises(SystemExit): + _pz2.execute_reuse_from_or_fail_closed( + reuse_from=prev_run_id, + new_run_dir=new_run_dir, + mdx_source_text=_mdx_text(), + ) + err = capsys.readouterr().err + assert "snapshot_missing_after_copy" in err + assert "snapshot_read_os_error" not in err + + +# -- module surface anchor ------------------------------------------------ + + +def test_pipeline_exposes_u4b_surface(): + """u5 wires ``execute_reuse_from_or_fail_closed`` into the entry + point — the public callable + the closed-axis vocabulary must + remain module-level attributes.""" + for name in ( + "execute_reuse_from_or_fail_closed", + "_abort_reuse_from", + "_paths_equivalent", + "REUSE_FAIL_CLOSED_AXES", + ): + assert hasattr(_pz2, name), f"u4b surface missing: {name}" + + +def test_pipeline_run_signature_reuse_from_threaded_after_u5(): + """u5 has now threaded ``reuse_from`` into ``run_phase_z2_mvp1`` as + a keyword-only parameter with default ``None``. The previous + ``until_u5`` lock has flipped — this forward-direction lock + ensures the kwarg never silently drifts (positional promotion, + default change to a string, kind change). Mirror of the + equivalent lock in test_phase_z2_reuse_from_entry.py and + test_phase_z2_cli_reuse_from.py — kept in this file too so the + fail-closed regression suite is self-contained.""" + import inspect + + sig = inspect.signature(_pz2.run_phase_z2_mvp1) + assert "reuse_from" in sig.parameters, ( + "u5 must thread reuse_from into run_phase_z2_mvp1 — kwarg missing. " + f"current params: {list(sig.parameters)}" + ) + param = sig.parameters["reuse_from"] + assert param.kind is inspect.Parameter.KEYWORD_ONLY, ( + f"reuse_from must be keyword-only (after the ``*`` barrier); " + f"got kind={param.kind}" + ) + assert param.default is None, ( + f"reuse_from must default to None to preserve pre-u5 behaviour; " + f"got default={param.default!r}" + ) diff --git a/tests/test_phase_z2_reuse_snapshot.py b/tests/test_phase_z2_reuse_snapshot.py new file mode 100644 index 0000000..6c15573 --- /dev/null +++ b/tests/test_phase_z2_reuse_snapshot.py @@ -0,0 +1,493 @@ +"""IMP-43 (#72) u2 — unit tests for ``src.phase_z2_reuse_snapshot``. + +Scope mirror of the production module (Stage 2 u2): + +* ``build_snapshot`` shape, provenance, JSON round-trip, required keys. +* ``serialize_section`` / ``serialize_unit`` field preservation, including + the duck-typed ``v4_candidates`` shape (template_id / frame_id / + frame_number / confidence / label). +* ``validate_snapshot`` fail-closed paths: non-dict input, schema + version mismatch, missing/empty/non-string ``mdx_sha256``, sha + mismatch, missing required keys, unwrapped wrapper, wrapper missing + a provenance field. +* Module-level constants exposed for u3 / u4 / u4b consumers. + +The tests use synthetic duck-typed dataclasses so the snapshot module's +external surface is exercised without coupling to the production +``MdxSection`` / ``CompositionUnit`` / ``V4Match`` dataclass layouts. +That mirrors the production module's intentional duck-typing (no +imports from ``phase_z2_pipeline`` / ``phase_z2_composition``). +""" +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from typing import Any, Optional + +import pytest + +from src.phase_z2_reuse_snapshot import ( + REQUIRED_TOP_LEVEL_KEYS, + SNAPSHOT_FILENAME, + SNAPSHOT_VERSION, + SnapshotValidationError, + build_snapshot, + serialize_section, + serialize_unit, + validate_snapshot, +) + + +# -- synthetic duck-typed inputs ------------------------------------------ + + +@dataclass +class _Section: + section_id: str + section_num: int + title: str + raw_content: str + heading_number: Optional[str] = None + v4_alias_keys: list = field(default_factory=list) + sub_sections: list = field(default_factory=list) + + +@dataclass +class _V4Candidate: + template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + v4_rank: Optional[int] = None + + +@dataclass +class _Unit: + source_section_ids: list + merge_type: str + frame_template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + phase_z_status: str + raw_content: str + title: str + score: float + v4_rank: Optional[int] = 1 + selection_path: str = "rank_1" + fallback_reason: Optional[str] = None + rationale: dict = field(default_factory=dict) + auto_selectable: bool = True + filter_reasons: list = field(default_factory=list) + notes: list = field(default_factory=list) + v4_candidates: list = field(default_factory=list) + provisional: bool = False + + +def _make_section(**overrides: Any) -> _Section: + base = dict( + section_id="03-1", + section_num=1, + title="DX status", + raw_content="- bullet one\n- bullet two", + ) + base.update(overrides) + return _Section(**base) + + +def _make_unit(**overrides: Any) -> _Unit: + cand = _V4Candidate( + template_id="tpl_a", + frame_id="fid_a", + frame_number=13, + confidence=0.91, + label="use_as_is", + ) + base: dict[str, Any] = dict( + source_section_ids=["03-1"], + merge_type="single", + frame_template_id="tpl_a", + frame_id="fid_a", + frame_number=13, + confidence=0.91, + label="use_as_is", + phase_z_status="auto_renderable", + raw_content="- bullet one\n- bullet two", + title="DX status", + score=0.91, + v4_candidates=[cand], + ) + base.update(overrides) + return _Unit(**base) + + +def _make_build_kwargs(**overrides: Any) -> dict[str, Any]: + kwargs: dict[str, Any] = dict( + mdx_sha256="a" * 64, + slide_title="Title", + slide_footer="Footer", + sections=[_make_section()], + stage0_adapter_diagnostics={"used": True, "fallback_reason": None}, + stage0_normalized_assets={"popups": [], "images": [], "tables": []}, + v4_evidence=[{"section_id": "03-1", "v4_candidates": []}], + layout_preset_pre_override="horizontal-2", + units=[_make_unit()], + comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}}, + v4_fallback_traces={"03-1": {"selection_path": "rank_1"}}, + ai_preflight={"enabled": False, "skipped": True}, + ) + kwargs.update(overrides) + return kwargs + + +# -- module constants ----------------------------------------------------- + + +def test_snapshot_filename_constant(): + assert SNAPSHOT_FILENAME == "_reuse_snapshot.json" + + +def test_snapshot_version_is_positive_int(): + assert isinstance(SNAPSHOT_VERSION, int) + assert SNAPSHOT_VERSION >= 1 + + +def test_required_keys_include_contract_and_payload(): + # Bare contract / integrity keys. + assert "schema_version" in REQUIRED_TOP_LEVEL_KEYS + assert "mdx_sha256" in REQUIRED_TOP_LEVEL_KEYS + # Payload axes per Stage 2 plan. + for k in ( + "slide_title", + "slide_footer", + "sections", + "stage0_adapter_diagnostics", + "stage0_normalized_assets", + "v4_evidence", + "layout_preset_pre_override", + "units", + "comp_debug", + "v4_fallback_traces", + "ai_preflight", + ): + assert k in REQUIRED_TOP_LEVEL_KEYS, f"missing from REQUIRED_TOP_LEVEL_KEYS: {k}" + + +# -- build_snapshot ------------------------------------------------------- + + +def test_build_snapshot_round_trips_through_json(): + snap = build_snapshot(**_make_build_kwargs()) + payload = json.dumps(snap) + loaded = json.loads(payload) + assert loaded["schema_version"] == SNAPSHOT_VERSION + assert loaded["mdx_sha256"] == "a" * 64 + + +def test_build_snapshot_has_all_required_keys(): + snap = build_snapshot(**_make_build_kwargs()) + for key in REQUIRED_TOP_LEVEL_KEYS: + assert key in snap, f"build_snapshot missing required key: {key}" + + +def test_build_snapshot_bare_keys_are_unwrapped_scalars(): + snap = build_snapshot(**_make_build_kwargs()) + assert snap["schema_version"] == SNAPSHOT_VERSION + assert snap["mdx_sha256"] == "a" * 64 + # bare keys MUST NOT be wrapped — u4b mdx_sha256 check reads directly. + assert not isinstance(snap["schema_version"], dict) + assert not isinstance(snap["mdx_sha256"], dict) + + +def test_build_snapshot_provenance_wrapper_shape(): + snap = build_snapshot(**_make_build_kwargs()) + bare = {"schema_version", "mdx_sha256"} + for key, entry in snap.items(): + if key in bare: + continue + assert isinstance(entry, dict), f"{key} is not wrapped" + assert set(entry.keys()) == {"value", "source_path", "upstream_step"}, key + assert isinstance(entry["source_path"], str) and entry["source_path"] + assert isinstance(entry["upstream_step"], str) + assert entry["upstream_step"].startswith("step"), entry["upstream_step"] + + +def test_build_snapshot_upstream_steps_stay_inside_reuse_boundary(): + """No ``upstream_step`` may point outside the Step 0/2/5/6 reuse + boundary (Stage 1 root_cause). A drift to e.g. ``step09`` would + silently invite work outside the reuse window — fail loudly. + + Step 01's contribution is the ``mdx_sha256`` integrity key (a bare + contract scalar with no wrapper) so step01 does not need to appear + in payload provenance. + """ + snap = build_snapshot(**_make_build_kwargs()) + allowed = {"step00", "step02", "step05", "step06"} + for key, entry in snap.items(): + if key in {"schema_version", "mdx_sha256"}: + continue + assert entry["upstream_step"] in allowed, ( + f"key {key!r}: upstream_step {entry['upstream_step']!r} outside reuse boundary" + ) + + +def test_build_snapshot_units_carry_v4_candidates(): + snap = build_snapshot(**_make_build_kwargs()) + units = snap["units"]["value"] + assert len(units) == 1 + assert units[0]["v4_candidates"][0]["template_id"] == "tpl_a" + assert units[0]["v4_candidates"][0]["frame_number"] == 13 + assert units[0]["v4_candidates"][0]["confidence"] == pytest.approx(0.91) + + +def test_build_snapshot_sections_preserve_alias_keys_and_subsections(): + sec = _make_section( + section_id="04-2", + v4_alias_keys=["04-2.1"], + sub_sections=[{"id": "04-2-sub-1"}], + heading_number="2.1", + ) + snap = build_snapshot(**_make_build_kwargs(sections=[sec])) + payload = snap["sections"]["value"] + assert payload[0]["section_id"] == "04-2" + assert payload[0]["v4_alias_keys"] == ["04-2.1"] + assert payload[0]["sub_sections"] == [{"id": "04-2-sub-1"}] + assert payload[0]["heading_number"] == "2.1" + + +def test_build_snapshot_units_provenance_points_at_step06(): + snap = build_snapshot(**_make_build_kwargs()) + assert "step06_composition_plan.json" in snap["units"]["source_path"] + assert snap["units"]["upstream_step"] == "step06" + + +def test_build_snapshot_v4_evidence_provenance_points_at_step05(): + snap = build_snapshot(**_make_build_kwargs()) + assert "step05_v4_evidence.json" in snap["v4_evidence"]["source_path"] + assert snap["v4_evidence"]["upstream_step"] == "step05" + + +def test_build_snapshot_ai_preflight_provenance_points_at_step00(): + snap = build_snapshot(**_make_build_kwargs()) + assert "step00_preconditions.json" in snap["ai_preflight"]["source_path"] + assert snap["ai_preflight"]["upstream_step"] == "step00" + + +def test_build_snapshot_rejects_unjsonable_input(): + bad_unit = _make_unit() + bad_unit.notes.append(object()) # not JSON-safe + with pytest.raises(TypeError): + build_snapshot(**_make_build_kwargs(units=[bad_unit])) + + +def test_build_snapshot_handles_none_optional_fields(): + snap = build_snapshot( + **_make_build_kwargs( + slide_title=None, + slide_footer=None, + stage0_adapter_diagnostics=None, + stage0_normalized_assets=None, + comp_debug=None, + v4_fallback_traces=None, + ai_preflight=None, + ) + ) + # None inputs land as None / {} consistently — never raise. + assert snap["slide_title"]["value"] is None + assert snap["slide_footer"]["value"] is None + assert snap["stage0_adapter_diagnostics"]["value"] == {} + assert snap["stage0_normalized_assets"]["value"] == {} + assert snap["comp_debug"]["value"] == {} + assert snap["v4_fallback_traces"]["value"] == {} + assert snap["ai_preflight"]["value"] == {} + + +# -- serializer helpers --------------------------------------------------- + + +def test_serialize_section_preserves_all_documented_fields(): + sec = _make_section( + heading_number="1.1", + v4_alias_keys=["03-1.x"], + sub_sections=[{"id": "s"}], + ) + out = serialize_section(sec) + assert out["section_id"] == "03-1" + assert out["section_num"] == 1 + assert out["title"] == "DX status" + assert out["raw_content"].startswith("- bullet") + assert out["heading_number"] == "1.1" + assert out["v4_alias_keys"] == ["03-1.x"] + assert out["sub_sections"] == [{"id": "s"}] + + +def test_serialize_section_works_with_missing_optional_attrs(): + class _Minimal: + section_id = "x" + section_num = 0 + title = "t" + raw_content = "r" + out = serialize_section(_Minimal()) + assert out["heading_number"] is None + assert out["v4_alias_keys"] == [] + assert out["sub_sections"] == [] + + +def test_serialize_unit_v4_candidates_unwrap_to_named_attrs(): + unit = _make_unit() + out = serialize_unit(unit) + cand = out["v4_candidates"][0] + assert cand == { + "template_id": "tpl_a", + "frame_id": "fid_a", + "frame_number": 13, + "confidence": pytest.approx(0.91), + "label": "use_as_is", + # u4 follow-up — Step 9 application-plan payload reads + # ``c.v4_rank`` off each rehydrated candidate. Snapshot + # serializer persists it via ``getattr(c, 'v4_rank', None)`` so + # legacy duck types (no v4_rank attr) get None and modern V4Match + # instances carry their rank (1/2/3/...). + "v4_rank": None, + } + + +def test_serialize_unit_v4_candidates_persist_v4_rank_when_present(): + """A v4_candidate with v4_rank=2 (V4Match-shape duck type) round-trips.""" + ranked_cand = _V4Candidate( + template_id="tpl_b", + frame_id="fid_b", + frame_number=14, + confidence=0.82, + label="light_edit", + v4_rank=2, + ) + unit = _make_unit(v4_candidates=[ranked_cand]) + out = serialize_unit(unit) + assert out["v4_candidates"][0]["v4_rank"] == 2 + + +def test_serialize_unit_handles_empty_v4_candidates(): + unit = _make_unit(v4_candidates=[]) + out = serialize_unit(unit) + assert out["v4_candidates"] == [] + + +def test_serialize_unit_provisional_default_false(): + unit = _make_unit() + assert serialize_unit(unit)["provisional"] is False + + +def test_serialize_unit_provisional_true_preserved(): + unit = _make_unit(provisional=True) + assert serialize_unit(unit)["provisional"] is True + + +def test_serialize_unit_round_trips_through_json(): + out = serialize_unit(_make_unit()) + reloaded = json.loads(json.dumps(out)) + assert reloaded["source_section_ids"] == ["03-1"] + assert reloaded["frame_template_id"] == "tpl_a" + + +# -- validate_snapshot ---------------------------------------------------- + + +def test_validate_snapshot_accepts_well_formed(): + snap = build_snapshot(**_make_build_kwargs()) + validate_snapshot(snap, expected_mdx_sha256="a" * 64) + + +def test_validate_snapshot_rejects_non_dict_input(): + with pytest.raises(SnapshotValidationError): + validate_snapshot("not a dict", expected_mdx_sha256="a" * 64) + + +def test_validate_snapshot_rejects_version_mismatch(): + snap = build_snapshot(**_make_build_kwargs()) + snap["schema_version"] = SNAPSHOT_VERSION + 999 + with pytest.raises(SnapshotValidationError) as exc: + validate_snapshot(snap, expected_mdx_sha256="a" * 64) + assert "schema_version" in str(exc.value) + + +def test_validate_snapshot_rejects_missing_sha(): + snap = build_snapshot(**_make_build_kwargs()) + del snap["mdx_sha256"] + with pytest.raises(SnapshotValidationError) as exc: + validate_snapshot(snap, expected_mdx_sha256="a" * 64) + assert "mdx_sha256" in str(exc.value) + + +def test_validate_snapshot_rejects_empty_sha(): + snap = build_snapshot(**_make_build_kwargs()) + snap["mdx_sha256"] = "" + with pytest.raises(SnapshotValidationError) as exc: + validate_snapshot(snap, expected_mdx_sha256="a" * 64) + assert "mdx_sha256" in str(exc.value) + + +def test_validate_snapshot_rejects_non_string_sha(): + snap = build_snapshot(**_make_build_kwargs()) + snap["mdx_sha256"] = 12345 + with pytest.raises(SnapshotValidationError) as exc: + validate_snapshot(snap, expected_mdx_sha256="a" * 64) + assert "mdx_sha256" in str(exc.value) + + +def test_validate_snapshot_rejects_sha_mismatch(): + snap = build_snapshot(**_make_build_kwargs()) + with pytest.raises(SnapshotValidationError) as exc: + validate_snapshot(snap, expected_mdx_sha256="b" * 64) + assert "mdx_sha256 mismatch" in str(exc.value) + + +def test_validate_snapshot_rejects_missing_required_key(): + snap = build_snapshot(**_make_build_kwargs()) + del snap["units"] + with pytest.raises(SnapshotValidationError) as exc: + validate_snapshot(snap, expected_mdx_sha256="a" * 64) + assert "units" in str(exc.value) + + +def test_validate_snapshot_rejects_unwrapped_payload_key(): + snap = build_snapshot(**_make_build_kwargs()) + snap["units"] = "not a dict" + with pytest.raises(SnapshotValidationError) as exc: + validate_snapshot(snap, expected_mdx_sha256="a" * 64) + assert "units" in str(exc.value) + + +def test_validate_snapshot_rejects_wrapper_missing_value(): + snap = build_snapshot(**_make_build_kwargs()) + snap["units"] = {"source_path": "x", "upstream_step": "step06"} + with pytest.raises(SnapshotValidationError) as exc: + validate_snapshot(snap, expected_mdx_sha256="a" * 64) + assert "value" in str(exc.value) + + +def test_validate_snapshot_rejects_wrapper_missing_source_path(): + snap = build_snapshot(**_make_build_kwargs()) + snap["units"] = {"value": [], "upstream_step": "step06"} + with pytest.raises(SnapshotValidationError) as exc: + validate_snapshot(snap, expected_mdx_sha256="a" * 64) + assert "source_path" in str(exc.value) + + +def test_validate_snapshot_rejects_wrapper_missing_upstream_step(): + snap = build_snapshot(**_make_build_kwargs()) + snap["units"] = {"value": [], "source_path": "x"} + with pytest.raises(SnapshotValidationError) as exc: + validate_snapshot(snap, expected_mdx_sha256="a" * 64) + assert "upstream_step" in str(exc.value) + + +def test_validate_snapshot_error_subclasses_value_error(): + snap = build_snapshot(**_make_build_kwargs()) + snap["schema_version"] = 999 + # u4b will pre-catch SnapshotValidationError, but the broader + # `except ValueError` net must still pick this up. + with pytest.raises(ValueError): + validate_snapshot(snap, expected_mdx_sha256="a" * 64) diff --git a/tests/test_phase_z2_reuse_snapshot_write.py b/tests/test_phase_z2_reuse_snapshot_write.py new file mode 100644 index 0000000..3f5588a --- /dev/null +++ b/tests/test_phase_z2_reuse_snapshot_write.py @@ -0,0 +1,282 @@ +"""IMP-43 (#72) u3 — focused tests for the Step 6 reuse snapshot writer. + +u3 scope (per the Stage 2 Exit Report): + +- ``_write_reuse_snapshot`` writes ``run_dir/_reuse_snapshot.json`` *after* + the Step 6 artifact lands; failure WARNS and CONTINUES (the helper does + NOT raise out of the main pipeline run). +- The Step 6 artifact data dict records the run_dir-relative sidecar path + as ``data.reuse_snapshot_path`` (additive informational field, always + set to ``SNAPSHOT_FILENAME`` regardless of write success — u4 will + fail-closed on missing / invalid sidecar via u2's ``validate_snapshot``). + +The helper is tested in isolation (no full pipeline run) — pipeline call +site presence is asserted structurally so we exercise behaviour without +re-running Step 0~6 inside the test process. End-to-end equivalence under +``--reuse-from`` is u7a / u7b scope. +""" +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Optional + +import pytest + +import src.phase_z2_pipeline as _pz2 +from src.phase_z2_reuse_snapshot import ( + SNAPSHOT_FILENAME, + SNAPSHOT_VERSION, + SnapshotValidationError, + validate_snapshot, +) + + +# -- synthetic duck-typed inputs ------------------------------------------ + + +@dataclass +class _Section: + section_id: str + section_num: int + title: str + raw_content: str + heading_number: Optional[str] = None + v4_alias_keys: list = field(default_factory=list) + sub_sections: list = field(default_factory=list) + + +@dataclass +class _V4Candidate: + template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + + +@dataclass +class _Unit: + source_section_ids: list + merge_type: str + frame_template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + phase_z_status: str + raw_content: str + title: str + score: float + v4_rank: Optional[int] = 1 + selection_path: str = "rank_1" + fallback_reason: Optional[str] = None + rationale: dict = field(default_factory=dict) + auto_selectable: bool = True + filter_reasons: list = field(default_factory=list) + notes: list = field(default_factory=list) + v4_candidates: list = field(default_factory=list) + provisional: bool = False + + +def _make_kwargs(**overrides: Any) -> dict[str, Any]: + cand = _V4Candidate( + template_id="tpl_a", + frame_id="fid_a", + frame_number=13, + confidence=0.91, + label="use_as_is", + ) + section = _Section( + section_id="03-1", + section_num=1, + title="DX status", + raw_content="- bullet one\n- bullet two", + ) + unit = _Unit( + source_section_ids=["03-1"], + merge_type="single", + frame_template_id="tpl_a", + frame_id="fid_a", + frame_number=13, + confidence=0.91, + label="use_as_is", + phase_z_status="auto_renderable", + raw_content="- bullet one\n- bullet two", + title="DX status", + score=0.91, + v4_candidates=[cand], + ) + kwargs: dict[str, Any] = dict( + mdx_source_text="# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n", + slide_title="Slide", + slide_footer=None, + sections=[section], + stage0_adapter_diagnostics={"used": True, "fallback_reason": None}, + stage0_normalized_assets={"popups": [], "images": [], "tables": []}, + v4_evidence=[ + { + "section_id": "03-1", + "v4_candidates": [ + { + "template_id": "tpl_a", + "frame_id": "fid_a", + "frame_number": 13, + "confidence": 0.91, + "label": "use_as_is", + } + ], + "candidate_status": "ok", + } + ], + layout_preset_pre_override="single", + units=[unit], + comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}}, + v4_fallback_traces={"03-1": {"selection_path": "rank_1"}}, + ai_preflight={"enabled": False, "skipped": True}, + ) + kwargs.update(overrides) + return kwargs + + +# -- success path --------------------------------------------------------- + + +def test_writes_snapshot_file_at_run_dir_root(tmp_path: Path): + rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs()) + assert rv == SNAPSHOT_FILENAME + fpath = tmp_path / SNAPSHOT_FILENAME + assert fpath.exists(), f"snapshot not written at {fpath}" + + +def test_written_snapshot_validates(tmp_path: Path): + kwargs = _make_kwargs() + rv = _pz2._write_reuse_snapshot(tmp_path, **kwargs) + assert rv == SNAPSHOT_FILENAME + snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8")) + + # mdx_sha256 is derived from mdx_source_text — recompute to verify + # the helper is hashing the UTF-8 bytes of the same source we passed. + import hashlib as _hl + + expected_sha = _hl.sha256( + kwargs["mdx_source_text"].encode("utf-8") + ).hexdigest() + validate_snapshot(snap, expected_mdx_sha256=expected_sha) + + +def test_snapshot_has_correct_schema_version(tmp_path: Path): + _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs()) + snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8")) + assert snap["schema_version"] == SNAPSHOT_VERSION + + +def test_snapshot_records_layout_preset_pre_override(tmp_path: Path): + _pz2._write_reuse_snapshot( + tmp_path, **_make_kwargs(layout_preset_pre_override="horizontal-2") + ) + snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8")) + assert snap["layout_preset_pre_override"]["value"] == "horizontal-2" + + +def test_snapshot_is_utf8_encoded_with_non_ascii_content(tmp_path: Path): + _pz2._write_reuse_snapshot( + tmp_path, + **_make_kwargs( + slide_title="설계 방식의 왜곡", + mdx_source_text="# 설계 방식\n\n- 한글 bullet\n", + ), + ) + # ensure_ascii=False is intentional so Korean text round-trips + # readable; if a future refactor drops it the bytes change but the + # JSON still parses — we assert the file is decodable AS utf-8 and + # the value survives the round trip. + raw = (tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8") + snap = json.loads(raw) + assert snap["slide_title"]["value"] == "설계 방식의 왜곡" + + +# -- failure path --------------------------------------------------------- + + +def test_failure_warns_and_returns_none(tmp_path: Path, monkeypatch, capsys): + """When ``build_snapshot`` raises, the helper must NOT propagate the + exception — it WARNS on stderr and returns ``None`` so the main + pipeline run continues.""" + + def _boom(**_kwargs): + raise RuntimeError("synthetic build failure") + + monkeypatch.setattr(_pz2, "build_snapshot", _boom) + + rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs()) + + assert rv is None + captured = capsys.readouterr() + assert "reuse-snapshot" in captured.err + assert "WARN" in captured.err + assert "RuntimeError" in captured.err + # File MUST NOT exist on failure (no partial JSON on disk). + assert not (tmp_path / SNAPSHOT_FILENAME).exists() + + +def test_failure_on_unwritable_run_dir_warns_and_returns_none( + tmp_path: Path, monkeypatch, capsys +): + """Simulate disk write failure: helper warns + returns None, never + raises out to the caller (Stage 2 guardrail: optional sidecar).""" + nonexistent = tmp_path / "does" / "not" / "exist" + # nonexistent.exists() is False — Path.write_text raises FileNotFoundError. + + rv = _pz2._write_reuse_snapshot(nonexistent, **_make_kwargs()) + + assert rv is None + captured = capsys.readouterr() + assert "reuse-snapshot" in captured.err + assert "WARN" in captured.err + # FileNotFoundError specifically — sanity-check the type surfaces in + # the warning so debugging is not blind. + assert "FileNotFoundError" in captured.err + + +# -- pipeline integration anchors ----------------------------------------- + + +def test_pipeline_imports_helper_and_constant(): + """The pipeline module must expose the helper for the post-Step-6 + call site, and the constant must round-trip from the snapshot + module (single source of truth).""" + assert hasattr(_pz2, "_write_reuse_snapshot") + assert callable(_pz2._write_reuse_snapshot) + assert _pz2.SNAPSHOT_FILENAME == "_reuse_snapshot.json" + + +def test_pipeline_call_site_follows_step06_artifact_write(): + """Structural guard: the helper must be invoked AFTER the Step 6 + artifact write in ``run_phase_z2_mvp1`` so the sidecar lands next + to ``steps/step06_composition_plan.json`` (Stage 2 spec).""" + source = Path(_pz2.__file__).read_text(encoding="utf-8") + # Locate the step06 artifact write call site by its locked name arg. + step06_marker = '6, "composition_plan"' + idx_step06 = source.find(step06_marker) + assert idx_step06 != -1, "step06 artifact write call site missing" + # The helper call must appear AFTER the step06 marker. + idx_helper = source.find("_write_reuse_snapshot(", idx_step06) + assert idx_helper != -1, "u3 helper call missing after step06 write" + + +def test_pipeline_step06_artifact_data_records_snapshot_path(): + """Structural guard: the Step 6 artifact data dict must include the + ``reuse_snapshot_path`` field so a future ``--reuse-from`` consumer + can locate the expected sidecar via the canonical step artifact + (Stage 2 spec — informational; absence of the file is u4's + fail-closed concern).""" + source = Path(_pz2.__file__).read_text(encoding="utf-8") + step06_marker = '6, "composition_plan"' + idx_step06 = source.find(step06_marker) + assert idx_step06 != -1 + # Search a generous window after the marker for the field key. + window = source[idx_step06 : idx_step06 + 8000] + assert '"reuse_snapshot_path"' in window + assert "SNAPSHOT_FILENAME" in window