feat(#72): IMP-43 u1~u8 --reuse-from incremental rerun (Step 0/1/2/5/6 reuse + Step 7+ re-execute)
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 25s

u1 argparse --reuse-from PREV_RUN_ID + post-merge fail-closed guard (rejects
layout/zone_geometry/zone_section/image override axes by name; only
--override-frame is preserved).
u2 src/phase_z2_reuse_snapshot.py — JSON-only Step 6 snapshot with mdx_sha256
integrity key and {value, source_path, upstream_step} provenance per axis
(pickle forbidden per Stage 2 guardrail).
u3 _write_reuse_snapshot at the Step 6 boundary; soft-fails to stderr without
aborting the seed run.
u4 prev_run_dir RO copy of step00/01/02/05/06 + _reuse_snapshot.json into
new run_dir, state rehydration, reuse marker, frame-override application on
restored units, Step 7+ resume.
u4b fail-closed for missing prev_run_dir / missing/corrupt/invalid snapshot /
mdx_sha256 mismatch / accidental new==prev write, with value+path+upstream
diagnostics per axis.
u5 reuse_from Optional[str] threaded through run_phase_z2_mvp1 signature and
CLI dispatch; default None preserves byte-identical pre-IMP-43 behavior.
u6 Front /api/run optional reuseFromRunId forwarding (vite.config.ts +
designAgentApi.ts + run_pipeline_reuse_from.test.ts).
u7a fast CI equivalence (1 mdx × 1 layout × 2 frames); step13 whitelist =
run_id/timestamps/prev_run_id only. u7b 3 layouts × 3 mdx × 32 frames
sweep gated by pytest.mark.sweep (registered in pyproject.toml; default CI
must use -m 'not sweep').
u8 scripts/measure_reuse_savings.py argv-driven A/B/C harness with frame
pin self-discovery + seed-time exclusion; status board §8 TBD anchor
(issue-body 50-70% / 10-20s→3-8s claim explicitly unverified, not mirrored).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-24 22:44:27 +09:00
parent 8648a468d9
commit b4be6c1cd0
15 changed files with 5128 additions and 656 deletions

View File

@@ -345,13 +345,25 @@ export interface PipelineOverrides {
export async function runPipeline(
file: File,
overrides?: PipelineOverrides
overrides?: PipelineOverrides,
// IMP-43 (#72) u6 — optional prev RUN_ID for incremental rerun. When set,
// the vite plugin forwards `--reuse-from <PREV_RUN_ID>` to the backend
// and the pipeline resumes at Step 7 (Step 0/1/2/5/6 artifacts copied
// from the prior run). When omitted / empty, the POST body is
// byte-identical to pre-u6 (no reuseFromRunId key → no flag forwarded).
reuseFromRunId?: string,
): Promise<RunPipelineResult> {
const content = await file.text();
const body: Record<string, unknown> = {
filename: file.name,
content,
overrides,
};
if (reuseFromRunId) body.reuseFromRunId = reuseFromRunId;
const res = await fetch("/api/run", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ filename: file.name, content, overrides }),
body: JSON.stringify(body),
});
const data = (await res.json()) as RunPipelineResult;
if (!res.ok && !data.run_id) {

View File

@@ -0,0 +1,250 @@
// IMP-43 (#72) u6 — /api/run reuseFromRunId forwarding coverage.
//
// Stage 2 unit scope:
// 1) Front/client/src/services/designAgentApi.ts `runPipeline`:
// • accepts an optional 3rd arg `reuseFromRunId: string`.
// • includes `reuseFromRunId` in the POST body when truthy.
// • OMITS `reuseFromRunId` from the body when absent / empty / undefined
// → byte-identical to the pre-u6 POST contract (absent flag = full
// pipeline; backend u1 guard never sees an empty PREV_RUN_ID).
// • leaves `filename`, `content`, and `overrides` untouched alongside
// the new field (no payload-shape regression).
// 2) Front/vite.config.ts `/api/run` handler:
// • declares `reuseFromRunId?: string` in the payload type so a typed
// client cannot send a payload the server silently drops.
// • destructures `reuseFromRunId` from `payload` (sibling of
// `overrides`, NOT nested under it — the backend u1 post-merge
// guard treats reuse as a pipeline mode, not an override).
// • forwards `--reuse-from <PREV_RUN_ID>` to spawn cliArgs guarded by
// a truthy check (empty string / undefined ⇒ no flag, per Stage 2
// contract: invalid CLI args must never reach argparse).
// • places the forward block AFTER the `--override-section-assignment`
// loop so the spawn argv preserves backend argparse's no-positional-
// before-flag expectation and so `--override-frame` (still allowed
// by the u1 guard) is positioned ahead of `--reuse-from`.
//
// runPipeline is exercised with a duck-typed `File` plus a `vi.stubGlobal`
// fetch mock — mirrors the user_overrides_service.test.ts pattern. The
// vite handler is source-sliced (mirrors handle_generate_diag.test.ts)
// because the handler spawns python and a real /api/run round-trip is
// out of unit-test scope.
import { afterEach, beforeEach, describe, expect, it, vi, type Mock } from "vitest";
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import { runPipeline } from "../src/services/designAgentApi";
// ---------------------------------------------------------------------------
// vite.config.ts source — read once for the handler source-slice assertions.
// Path: Front/client/tests/ → Front/vite.config.ts (two levels up).
// ---------------------------------------------------------------------------
const VITE_CONFIG_PATH = resolve(__dirname, "..", "..", "vite.config.ts");
const VITE_CONFIG_SOURCE = readFileSync(VITE_CONFIG_PATH, "utf-8");
// ---------------------------------------------------------------------------
// fetch mock — minimal Response stub mirroring runPipeline's `.ok` + `.json()`
// + `.status` surface. Same shape as the user_overrides_service.test.ts
// helper so the two test files stay drift-free.
// ---------------------------------------------------------------------------
type MockResponse = {
ok: boolean;
status: number;
json: () => Promise<unknown>;
};
function mockResponse(body: unknown, ok = true, status = 200): MockResponse {
return { ok, status, json: async () => body };
}
const SUCCESS_BODY = {
success: true,
run_id: "test_run_id_20260524",
exit_code: 0,
final_html_exists: true,
preview_exists: true,
stdout: "",
stderr: "",
};
// Duck-typed File — runPipeline reads only `.name` and `.text()`. Avoids a
// hard dependency on the global File constructor (varies across node /
// jsdom / happy-dom test environments).
function makeFakeFile(name: string, content: string): File {
return {
name,
text: async () => content,
} as unknown as File;
}
let fetchMock: Mock;
beforeEach(() => {
fetchMock = vi.fn();
vi.stubGlobal("fetch", fetchMock);
});
afterEach(() => {
vi.unstubAllGlobals();
});
function lastPostBody(): Record<string, unknown> {
const lastCall = fetchMock.mock.calls.at(-1);
if (!lastCall) throw new Error("fetch was not called");
const init = lastCall[1] as RequestInit | undefined;
if (!init?.body) throw new Error("fetch was called without a body");
return JSON.parse(String(init.body));
}
// ============================================================================
// runPipeline (designAgentApi.ts) — forwarding/omission coverage
// ============================================================================
describe("runPipeline reuseFromRunId forwarding (IMP-43 #72 u6)", () => {
it("posts to /api/run via POST with JSON content-type", async () => {
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
await runPipeline(makeFakeFile("03.mdx", "# title"));
expect(fetchMock).toHaveBeenCalledTimes(1);
const [url, init] = fetchMock.mock.calls[0];
expect(url).toBe("/api/run");
expect((init as RequestInit).method).toBe("POST");
expect((init as RequestInit).headers).toMatchObject({
"Content-Type": "application/json",
});
});
it("includes reuseFromRunId in the POST body when provided", async () => {
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
await runPipeline(
makeFakeFile("03.mdx", "# title"),
undefined,
"mdx03_20260524080000",
);
const body = lastPostBody();
expect(body.reuseFromRunId).toBe("mdx03_20260524080000");
expect(body.filename).toBe("03.mdx");
expect(body.content).toBe("# title");
});
it("omits reuseFromRunId when 3rd arg is undefined (pre-u6 byte-identical)", async () => {
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
await runPipeline(makeFakeFile("03.mdx", "# title"));
const body = lastPostBody();
expect("reuseFromRunId" in body).toBe(false);
// Pre-u6 contract: filename/content are the only keys when overrides
// is undefined (JSON.stringify drops undefined values; pre-u6 emitted
// `JSON.stringify({filename, content, overrides})` with the same
// drop-undefined behaviour, so the wire body is byte-identical).
expect(Object.keys(body).sort()).toEqual(["content", "filename"]);
});
it("omits reuseFromRunId but keeps overrides when only overrides provided", async () => {
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
await runPipeline(makeFakeFile("03.mdx", "# title"), {
frames: { "03-1": "frame_07" },
});
const body = lastPostBody();
expect("reuseFromRunId" in body).toBe(false);
expect(Object.keys(body).sort()).toEqual([
"content",
"filename",
"overrides",
]);
expect(body.overrides).toEqual({ frames: { "03-1": "frame_07" } });
});
it("omits reuseFromRunId when passed an empty string (truthy guard)", async () => {
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
await runPipeline(makeFakeFile("03.mdx", "# title"), undefined, "");
const body = lastPostBody();
expect("reuseFromRunId" in body).toBe(false);
});
it("forwards reuseFromRunId alongside frame overrides (the only u1-permitted combo)", async () => {
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
await runPipeline(
makeFakeFile("03.mdx", "# title"),
{ frames: { "03-1+03-2": "frame_07" } },
"mdx03_20260524080000",
);
const body = lastPostBody();
expect(body.overrides).toEqual({ frames: { "03-1+03-2": "frame_07" } });
expect(body.reuseFromRunId).toBe("mdx03_20260524080000");
});
it("returns the parsed RunPipelineResult on success", async () => {
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
const res = await runPipeline(
makeFakeFile("03.mdx", "# title"),
undefined,
"mdx03_20260524080000",
);
expect(res.success).toBe(true);
expect(res.run_id).toBe("test_run_id_20260524");
});
});
// ============================================================================
// /api/run handler (vite.config.ts) — source-slice forwarding contract
// ============================================================================
describe("/api/run handler reuseFromRunId source-slice (IMP-43 #72 u6)", () => {
it("declares reuseFromRunId?: string on the /api/run payload type", () => {
// Payload type at the top of the /api/run handler body. The
// optional-string declaration is the single source-of-truth for what
// shape the handler accepts; a typed frontend client (u5 saveUserOverrides
// sibling pattern) cannot silently send a payload the server drops.
expect(VITE_CONFIG_SOURCE).toMatch(/reuseFromRunId\?:\s*string\s*;/);
});
it("destructures reuseFromRunId from payload alongside filename/content/overrides", () => {
expect(VITE_CONFIG_SOURCE).toMatch(
/const\s*\{\s*filename\s*,\s*content\s*,\s*overrides\s*,\s*reuseFromRunId\s*\}\s*=\s*payload\s*;/,
);
});
it("forwards --reuse-from <PREV_RUN_ID> after the override-section-assignment loop", () => {
// Stage 2 contract: reuse_from is a pipeline mode, not an override.
// The forward block must sit AFTER the last override loop so the spawn
// argv preserves the order documented in the u1 backend post-merge
// guard (overrides parsed first; reuse_from precondition runs against
// the merged overrides view).
const reuseFromIdx = VITE_CONFIG_SOURCE.indexOf('"--reuse-from"');
const zoneSectionsIdx = VITE_CONFIG_SOURCE.indexOf(
'"--override-section-assignment"',
);
expect(reuseFromIdx).toBeGreaterThan(-1);
expect(zoneSectionsIdx).toBeGreaterThan(-1);
expect(reuseFromIdx).toBeGreaterThan(zoneSectionsIdx);
});
it("guards the forward with a truthy check on reuseFromRunId", () => {
// Empty string / undefined ⇒ no flag pushed (Stage 2 contract: invalid
// CLI args must never reach argparse — the backend u1 guard would
// fail-closed with `reuse_artifact_missing` on the empty PREV_RUN_ID).
const reuseFromIdx = VITE_CONFIG_SOURCE.indexOf('"--reuse-from"');
expect(reuseFromIdx).toBeGreaterThan(-1);
const preface = VITE_CONFIG_SOURCE.slice(
Math.max(0, reuseFromIdx - 200),
reuseFromIdx,
);
expect(preface).toMatch(/if\s*\(\s*reuseFromRunId/);
expect(preface).toMatch(/typeof\s+reuseFromRunId\s*===\s*"string"/);
});
it("pushes reuseFromRunId as the --reuse-from argument value (no string interpolation)", () => {
// The CLI value must be the raw PREV_RUN_ID — no `=` join, no quoting
// (spawn is shell:false). Mirrors the `--override-layout` shape.
const reuseFromIdx = VITE_CONFIG_SOURCE.indexOf('"--reuse-from"');
expect(reuseFromIdx).toBeGreaterThan(-1);
// Window spans both before (`cliArgs.push(`) and after
// (`reuseFromRunId)`) the literal so the full push expression is
// captured.
const window = VITE_CONFIG_SOURCE.slice(
Math.max(0, reuseFromIdx - 100),
reuseFromIdx + 200,
);
expect(window).toMatch(
/cliArgs\.push\(\s*"--reuse-from"\s*,\s*reuseFromRunId\s*\)/,
);
});
});

View File

@@ -543,6 +543,13 @@ function vitePluginPhaseZApi(): Plugin {
// (e.g., "top": ["03-1-sub-1"]). Forwarded as --override-section-assignment.
zoneSections?: Record<string, string[]>;
};
// IMP-43 (#72) u6 — optional PREV_RUN_ID to reuse Step 0/1/2/5/6
// artifacts from a prior run and resume execution at Step 7.
// Lives at the payload root (NOT under `overrides`) because the
// backend u1 post-merge guard rejects most override axes when
// --reuse-from is supplied. Absent / empty = full pipeline
// (byte-identical to pre-u6 spawn).
reuseFromRunId?: string;
};
try {
payload = JSON.parse(body);
@@ -554,7 +561,7 @@ function vitePluginPhaseZApi(): Plugin {
return;
}
const { filename, content, overrides } = payload;
const { filename, content, overrides, reuseFromRunId } = payload;
if (!filename || typeof content !== "string") {
res.writeHead(400, { "Content-Type": "application/json" });
res.end(
@@ -638,6 +645,19 @@ function vitePluginPhaseZApi(): Plugin {
);
}
}
// IMP-43 (#72) u6 — --reuse-from <PREV_RUN_ID> forward. Backend
// (u1) parses this flag, validates the snapshot, copies Step
// 0/1/2/5/6 artifacts from data/runs/<PREV_RUN_ID>/phase_z2 into
// the new run_dir, and resumes execution at Step 7. The post-merge
// guard at the same site rejects --override-layout /
// --override-zone-geometry / --override-section-assignment /
// --override-image with axis-named fail-closed exit; only
// --override-frame (above) is preserved. Truthy check excludes
// empty string + undefined so an invalid argument never reaches
// argparse.
if (reuseFromRunId && typeof reuseFromRunId === "string") {
cliArgs.push("--reuse-from", reuseFromRunId);
}
console.log(
`[phase-z-api] spawn pipeline: run_id=${runId}, mdx=${mdxPath}, args=${JSON.stringify(cliArgs.slice(2))}`
);

View File

@@ -182,6 +182,27 @@ Step 0 (사전 준비) 의 Figma → HTML 변환은 *precondition phase 의 작
---
## 8. IMP-43 (#72) `--reuse-from` measured savings
> Stage 2 §u8 binding contract: the issue-body 5070% / 1020s → 38s claim is **unverified** and is **not** mirrored here. Numbers below come from `scripts/measure_reuse_savings.py` on the project reference host; until that script is run and the values committed, every cell stays `TBD`.
| axis | value |
|---|---|
| measurement script | `scripts/measure_reuse_savings.py` |
| reuse boundary (Stage 1 lock) | Step 0 / 1 / 2 / 5 / 6 only; Step 7+ re-executes |
| full rerun seconds (p50) | TBD |
| full rerun seconds (p95) | TBD |
| reuse seconds (p50) | TBD |
| reuse seconds (p95) | TBD |
| reuse / full ratio (p50) | TBD |
| last measured | TBD (date / host / mdx / iterations) |
Run protocol (per iteration): `(A)` seed → `(B)` full rerun with one self-discovered `--override-frame` pin → `(C)` `--reuse-from <seed>` with the same pin. The `(A)` seed time is reported separately and **not** included in the B-vs-C comparison — the reuse path's whole point is that the seed already exists from a prior interactive run.
Invocation: `python -m scripts.measure_reuse_savings samples/mdx_batch/02.mdx --iterations 5` (mdx is argv-driven; the script does not pin a sample internally).
---
## 사용 방법
- 새 작업 들어오면 → 본 board 의 *어느 step* 의 status 를 바꾸는 작업인지 식별

View File

@@ -34,4 +34,5 @@ target-version = "py310"
asyncio_mode = "auto"
markers = [
"integration: end-to-end pipeline integration tests (heavy; invoke Selenium)",
"sweep: opt-in heavyweight sweep tests (IMP-43 u7b: 3 layouts × 3 mdx × frame-pin coverage). Invoke explicitly via `pytest -m sweep`; default CI must use `-m 'not sweep'`.",
]

View File

@@ -0,0 +1,178 @@
"""IMP-43 (#72) u8 — measure ``--reuse-from`` wall-clock savings.
Argv-driven measurement helper for the Stage 2 §u8 binding contract:
re-derive a realistic savings target instead of mirroring the
unverified issue-body 5070% / 1020s → 38s claim.
Per-iteration measurement protocol (mirrors the u7a equivalence
harness, ``tests/test_phase_z2_reuse_from_equivalence_unit.py``):
(A) baseline full run, no overrides — reuse seed
(B) full rerun full run + one --override-frame pin — control path
(C) reuse --reuse-from <seed> + same pin — reuse path
Wall-clock = ``time.perf_counter()`` around the subprocess.run call.
The (A) seed run time is reported separately and NOT included in the
B-vs-C comparison (the reuse path's whole point is that the seed
already exists from a prior interactive run).
For each iteration the frame pin is self-discovered from the seed
run's ``step06_composition_plan.json``: the first unit's
``frame_template_id`` is re-pinned to itself, exercising the
``--override-frame`` CLI surface end-to-end without changing the
semantic frame assignment (same approach the u7a/u7b equivalence
tests already lock).
Output: a JSON document to stdout with per-iteration timings,
B/C p50 + p95, and the ratio C/B. Stderr carries the subprocess
stdout/stderr tails on non-zero exits.
Guardrails (Stage 2):
* argv-driven, no hardcoded mdx — caller picks the sample
* no hardcoded savings target — TBD until measured
* value + path + upstream provenance lives in the printed JSON
* does NOT mutate prev_run_dir; new runs land under fresh run_ids
"""
from __future__ import annotations
import argparse
import json
import statistics
import subprocess
import sys
import time
import uuid
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
RUNS_DIR = REPO_ROOT / "data" / "runs"
def _unique_run_id(prefix: str) -> str:
return f"{prefix}_imp43_u8_{uuid.uuid4().hex[:8]}"
def _spawn(extra_args: list[str], timeout: int) -> tuple[subprocess.CompletedProcess, float]:
start = time.perf_counter()
cp = subprocess.run(
[sys.executable, "-m", "src.phase_z2_pipeline", *extra_args],
capture_output=True,
text=True,
timeout=timeout,
cwd=str(REPO_ROOT),
)
return cp, time.perf_counter() - start
def _assert_ok(label: str, cp: subprocess.CompletedProcess) -> None:
if cp.returncode != 0:
sys.stderr.write(
f"[measure_reuse_savings] {label} failed rc={cp.returncode}\n"
f"--- stderr tail ---\n{cp.stderr[-2000:]}\n"
f"--- stdout tail ---\n{cp.stdout[-2000:]}\n"
)
raise SystemExit(2)
def _discover_first_frame_pin(seed_run_id: str) -> tuple[str, str]:
p = RUNS_DIR / seed_run_id / "phase_z2" / "steps" / "step06_composition_plan.json"
payload = json.loads(p.read_text(encoding="utf-8"))
for u in payload.get("data", {}).get("selected_units") or []:
sids = u.get("source_section_ids") or []
tpl = u.get("frame_template_id")
if isinstance(sids, list) and sids and isinstance(tpl, str) and tpl:
return ("+".join(str(s) for s in sids), tpl)
raise SystemExit(
f"[measure_reuse_savings] seed {seed_run_id} step06 has no pinnable "
f"(unit_id, frame_template_id); path={p}"
)
def _percentile(values: list[float], pct: float) -> float:
if not values:
return float("nan")
if len(values) == 1:
return values[0]
s = sorted(values)
k = (len(s) - 1) * pct
lo = int(k)
hi = min(lo + 1, len(s) - 1)
return s[lo] + (s[hi] - s[lo]) * (k - lo)
def main() -> int:
ap = argparse.ArgumentParser(
prog="python -m scripts.measure_reuse_savings",
description="Measure IMP-43 --reuse-from wall-clock savings.",
)
ap.add_argument("mdx_path", type=Path, help="MDX sample to measure against")
ap.add_argument("--iterations", type=int, default=3, help="trials (default 3)")
ap.add_argument("--timeout", type=int, default=900, help="per-run timeout seconds")
args = ap.parse_args()
if not args.mdx_path.is_file():
sys.stderr.write(f"[measure_reuse_savings] mdx not found: {args.mdx_path}\n")
return 2
iterations: list[dict] = []
for i in range(args.iterations):
seed_id = _unique_run_id(f"seed{i}")
cp_a, t_a = _spawn([str(args.mdx_path), seed_id], args.timeout)
_assert_ok(f"(A) seed iter={i}", cp_a)
unit_id, tpl_id = _discover_first_frame_pin(seed_id)
override = ["--override-frame", f"{unit_id}={tpl_id}"]
full_id = _unique_run_id(f"full{i}")
cp_b, t_b = _spawn([str(args.mdx_path), full_id, *override], args.timeout)
_assert_ok(f"(B) full rerun iter={i}", cp_b)
reuse_id = _unique_run_id(f"reuse{i}")
cp_c, t_c = _spawn(
[str(args.mdx_path), reuse_id, "--reuse-from", seed_id, *override],
args.timeout,
)
_assert_ok(f"(C) reuse iter={i}", cp_c)
iterations.append({
"iter": i,
"seed_run_id": seed_id,
"full_run_id": full_id,
"reuse_run_id": reuse_id,
"override_frame": f"{unit_id}={tpl_id}",
"seed_seconds": t_a,
"full_rerun_seconds": t_b,
"reuse_seconds": t_c,
})
full_times = [it["full_rerun_seconds"] for it in iterations]
reuse_times = [it["reuse_seconds"] for it in iterations]
summary = {
"mdx_path": str(args.mdx_path),
"iterations_count": len(iterations),
"full_rerun_seconds_p50": _percentile(full_times, 0.50),
"full_rerun_seconds_p95": _percentile(full_times, 0.95),
"reuse_seconds_p50": _percentile(reuse_times, 0.50),
"reuse_seconds_p95": _percentile(reuse_times, 0.95),
"reuse_over_full_ratio_p50": (
_percentile(reuse_times, 0.50) / _percentile(full_times, 0.50)
if full_times and statistics.median(full_times) > 0
else float("nan")
),
"iterations": iterations,
"note": (
"IMP-43 (#72) u8 measurement. Issue-body 5070% / 1020s → 38s "
"claim is NOT honored here — actual numbers depend on host, "
"Selenium cold-start, and AI cache state. Update "
"docs/architecture/PHASE-Z-PIPELINE-STATUS-BOARD.md §8 with the "
"p50/p95 reported here when run on the project's reference host."
),
}
sys.stdout.write(json.dumps(summary, ensure_ascii=False, indent=2))
sys.stdout.write("\n")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -25,6 +25,7 @@ MVP-1.5b spec :
- mvp1.5b_test* : 본 모듈, 원래 설계 라인 합류
"""
import hashlib
import json
import os
import re
@@ -33,7 +34,7 @@ import sys
import time
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Optional
from typing import Any, Optional
import yaml
from jinja2 import Environment, FileSystemLoader, select_autoescape
@@ -99,6 +100,15 @@ from src.phase_z2_ai_fallback.step12 import gather_step12_ai_repair_proposals
# idempotent ``has_popup`` marker onto retry_trace per unit. No AI call.
from src.phase_z2_ai_fallback.step17 import run_step17_popup_gate
# IMP-43 (#72) u3 — Step 6 reuse snapshot sidecar (JSON-only). Schema +
# serializers + validator live in u2 (``src.phase_z2_reuse_snapshot``);
# this module's call site at the Step 6 boundary writes the sidecar
# alongside ``steps/step06_composition_plan.json`` so that future
# ``--reuse-from`` runs (u4) can resume at Step 7 without re-deriving
# Step 0/1/2/5/6 state. ``--reuse-from`` is u4 scope; here we only
# WRITE the snapshot — restore wiring lands in u4.
from src.phase_z2_reuse_snapshot import build_snapshot, SNAPSHOT_FILENAME
# ─── Constants ──────────────────────────────────────────────────
@@ -3853,6 +3863,564 @@ def _write_step_artifact(
return fpath
# IMP-43 (#72) u3 — Step 6 reuse snapshot sidecar writer.
#
# Scope (u3 only — Stage 2 unit split):
# * Writes ``run_dir/_reuse_snapshot.json`` *after* the Step 6 artifact.
# * JSON-only (per Stage 2 guardrail — pickle forbidden); schema +
# ``build_snapshot`` live in u2 (``src.phase_z2_reuse_snapshot``).
# * Write failure WARNS and CONTINUES — the snapshot is an OPTIONAL
# sidecar; absence means future ``--reuse-from`` (u4) will fail
# closed when it cannot find / load the file. The main pipeline
# run must not abort on snapshot write failure.
# * Returns the run_dir-relative path (``"_reuse_snapshot.json"``) on
# success, ``None`` on failure. The caller stamps the returned value
# (or the constant when known ahead of time) into the Step 6 artifact.
def _write_reuse_snapshot(
run_dir: Path,
*,
mdx_source_text: str,
slide_title: Optional[str],
slide_footer: Optional[str],
sections: list,
stage0_adapter_diagnostics: Optional[dict],
stage0_normalized_assets: Optional[dict],
v4_evidence: list,
layout_preset_pre_override: Optional[str],
units: list,
comp_debug: Optional[dict],
v4_fallback_traces: Optional[dict],
ai_preflight: Optional[dict],
) -> Optional[str]:
try:
mdx_sha256 = hashlib.sha256(mdx_source_text.encode("utf-8")).hexdigest()
snapshot = build_snapshot(
mdx_sha256=mdx_sha256,
slide_title=slide_title,
slide_footer=slide_footer,
sections=sections,
stage0_adapter_diagnostics=stage0_adapter_diagnostics,
stage0_normalized_assets=stage0_normalized_assets,
v4_evidence=v4_evidence,
layout_preset_pre_override=layout_preset_pre_override,
units=units,
comp_debug=comp_debug,
v4_fallback_traces=v4_fallback_traces,
ai_preflight=ai_preflight,
)
fpath = run_dir / SNAPSHOT_FILENAME
fpath.write_text(
json.dumps(snapshot, ensure_ascii=False, indent=2),
encoding="utf-8",
)
return SNAPSHOT_FILENAME
except Exception as exc:
print(
f" [reuse-snapshot] WARN — failed to write {SNAPSHOT_FILENAME} "
f"(reason={type(exc).__name__}: {exc}); --reuse-from will not be "
f"available from this run.",
file=sys.stderr,
)
return None
# IMP-43 (#72) u4 — --reuse-from copy + restore + entry helpers.
#
# Scope (u4 only — Stage 2 unit split):
# * Pure path resolution / file copy / snapshot load+validate /
# section + unit rehydration / marker writing.
# * NO edits to ``run_phase_z2_mvp1`` body — the kwarg threading and
# the entry-point branch that invokes these helpers land in u5.
# * NO sys.exit(2) translation — helpers RAISE
# (FileNotFoundError / SnapshotValidationError / OSError); u4b adds
# the stderr + exit-code-2 wrapper, the prev_run_dir == new_run_dir
# accidental-write guard, and the mdx_sha256 mismatch surface
# fingerprint.
#
# Restore contract (Stage 2 boundary): Step 0/1/2/5/6 artifacts +
# ``_reuse_snapshot.json``. Step numbers 3 / 4 are deliberately absent
# — the pipeline DOES write ``step03_content_objects.json`` and
# ``step04_internal_composition.json`` AFTER the Step 6 artifact and
# BEFORE the Step 7 artifact (see ``_write_step_artifact`` call sites
# for ``run_dir, 3`` and ``run_dir, 4`` above the ``run_dir, 7`` call
# in this file), but both are emitted with
# ``step_status="trace-only"`` and ``pipeline_path_connected=False``:
# they are diagnostic projections derived from the Step 6
# ``debug_zones`` snapshot, not deterministic inputs that Step 7+
# consume. Restoring them is unnecessary because downstream code
# reads ``debug_zones`` directly (rehydrated from the snapshot), and
# copying trace-only files would muddle the boundary audit. Stage 2
# boundary lock = pipeline-path-connected pre-Step 7 artifacts only.
_REUSE_STEP_ARTIFACTS: tuple[str, ...] = (
"step00_preconditions.json",
"step01_mdx_upload.json",
"step01_mdx_source.md",
"step02_normalized.json",
"step05_v4_evidence.json",
"step06_composition_plan.json",
)
REUSE_MARKER_FILENAME = "_reuse_marker.json"
def _resolve_reuse_from_prev_run_dir(reuse_from: str) -> Path:
"""Resolve ``--reuse-from PREV_RUN_ID`` to its ``phase_z2`` run_dir.
Pure path computation — does NOT check existence. u4b adds the
fail-closed prev-run-missing translation around this helper.
"""
return RUNS_DIR / reuse_from / "phase_z2"
def _copy_reuse_artifacts_from_prev_run(
prev_run_dir: Path, new_run_dir: Path
) -> dict[str, str]:
"""Copy Step 0/1/2/5/6 artifacts + ``_reuse_snapshot.json`` into new_run_dir.
Returns ``{artifact_name: new_run_dir-relative_path}`` for all
copied files. Raises ``FileNotFoundError`` when any required
artifact is missing in ``prev_run_dir`` (u4b translates to exit 2).
"""
new_steps = new_run_dir / "steps"
new_steps.mkdir(parents=True, exist_ok=True)
copied: dict[str, str] = {}
for fname in _REUSE_STEP_ARTIFACTS:
src = prev_run_dir / "steps" / fname
if not src.exists():
raise FileNotFoundError(
f"reuse artifact missing in prev_run_dir: steps/{fname} "
f"(expected at {src})"
)
shutil.copyfile(src, new_steps / fname)
copied[fname] = f"steps/{fname}"
snap_src = prev_run_dir / SNAPSHOT_FILENAME
if not snap_src.exists():
raise FileNotFoundError(
f"reuse snapshot missing in prev_run_dir: {SNAPSHOT_FILENAME} "
f"(expected at {snap_src})"
)
shutil.copyfile(snap_src, new_run_dir / SNAPSHOT_FILENAME)
copied[SNAPSHOT_FILENAME] = SNAPSHOT_FILENAME
return copied
def _load_and_validate_reuse_snapshot(
new_run_dir: Path, *, mdx_source_text: str
) -> dict:
"""Load + validate the reuse snapshot already copied into ``new_run_dir``.
Computes the expected ``mdx_sha256`` from ``mdx_source_text`` UTF-8
bytes — same derivation as ``_write_reuse_snapshot`` so the
integrity check is symmetric. Delegates structural validation to
u2's ``validate_snapshot``; that raises
``SnapshotValidationError`` (subclass of ``ValueError``) on
schema_version mismatch, mdx_sha256 mismatch, missing required
keys, or malformed wrappers — u4b catches and translates.
"""
from src.phase_z2_reuse_snapshot import validate_snapshot
snap_path = new_run_dir / SNAPSHOT_FILENAME
snapshot = json.loads(snap_path.read_text(encoding="utf-8"))
expected_sha = hashlib.sha256(mdx_source_text.encode("utf-8")).hexdigest()
validate_snapshot(snapshot, expected_mdx_sha256=expected_sha)
return snapshot
@dataclass
class _RehydratedV4Candidate:
"""V4Match-shape duck type restored from snapshot ``v4_candidates``.
Exposes the 6-attribute contract that the reuse path's downstream
consumers read off ``unit.v4_candidates`` entries:
* template_id / frame_id / frame_number / confidence / label —
read by ``_apply_frame_override_to_unit`` (frame swap).
* v4_rank — read by ``_build_application_plan_unit`` (Step 9
payload, ``data.application_plan.zones[i].v4_candidates[j]``).
Default ``None`` keeps the dataclass safe to construct from
legacy snapshots that pre-date the u4 fix where the snapshot
serializer did not persist per-candidate rank.
Kept local — circular-dep-free; the production ``V4Match`` dataclass
additionally carries section_id / selection_path / fallback_reason /
provisional that the reuse boundary deliberately does not require.
"""
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
v4_rank: Optional[int] = None
def _rehydrate_mdx_sections_from_snapshot(snapshot: dict) -> list:
"""Rebuild ``list[MdxSection]`` from snapshot ``sections`` wrapper.
Mirrors the ``serialize_section`` field list (u2 source of truth).
Returns a Python list of ``MdxSection`` dataclass instances so the
Step 7+ pipeline code that does ``[s.section_id for s in sections]``
keeps byte-for-byte behavior.
"""
entries = snapshot["sections"]["value"]
return [
MdxSection(
section_id=e["section_id"],
section_num=e["section_num"],
title=e["title"],
raw_content=e["raw_content"],
heading_number=e.get("heading_number"),
v4_alias_keys=list(e.get("v4_alias_keys") or []),
sub_sections=list(e.get("sub_sections") or []),
)
for e in entries
]
def _rehydrate_composition_units_from_snapshot(snapshot: dict) -> list:
"""Rebuild ``list[CompositionUnit]`` from snapshot ``units`` wrapper.
``v4_candidates`` entries are restored as ``_RehydratedV4Candidate``
instances so attribute access (``cand.template_id`` etc.) works
end-to-end through ``_apply_frame_override_to_unit`` without
serializing the production ``V4Match`` dataclass shape.
Uses the ``src.phase_z2_composition`` import path to match
line 4976 / 5125's local re-imports — the module is loaded under
both ``phase_z2_composition`` and ``src.phase_z2_composition`` due
to historical sys.path duality, so a top-level CompositionUnit
reference would create a class-identity mismatch against tests and
downstream code that imports via the ``src.`` path.
"""
from src.phase_z2_composition import CompositionUnit as _CompositionUnit
entries = snapshot["units"]["value"]
units: list = []
for e in entries:
cands = [
_RehydratedV4Candidate(
template_id=c["template_id"],
frame_id=c["frame_id"],
frame_number=int(c["frame_number"]),
confidence=float(c["confidence"]),
label=c["label"],
v4_rank=(
int(c["v4_rank"])
if c.get("v4_rank") is not None
else None
),
)
for c in (e.get("v4_candidates") or [])
]
units.append(_CompositionUnit(
source_section_ids=list(e["source_section_ids"]),
merge_type=e["merge_type"],
frame_template_id=e["frame_template_id"],
frame_id=e["frame_id"],
frame_number=int(e["frame_number"]),
confidence=float(e["confidence"]),
label=e["label"],
phase_z_status=e["phase_z_status"],
raw_content=e["raw_content"],
title=e["title"],
v4_rank=e.get("v4_rank"),
selection_path=e.get("selection_path") or "rank_1",
fallback_reason=e.get("fallback_reason"),
score=float(e.get("score") or 0.0),
rationale=dict(e.get("rationale") or {}),
auto_selectable=bool(e.get("auto_selectable", True)),
filter_reasons=list(e.get("filter_reasons") or []),
notes=list(e.get("notes") or []),
v4_candidates=cands,
provisional=bool(e.get("provisional", False)),
))
return units
REUSE_MARKER_SCHEMA_VERSION = 1
def _write_reuse_marker(
new_run_dir: Path,
*,
prev_run_id: str,
copied_artifacts: dict[str, str],
) -> Path:
"""Write ``_reuse_marker.json`` to ``new_run_dir`` for audit trail.
Records prev_run_id, copied artifact map, the locked Step 0/1/2/5/6
boundary, and ``resume_at_step=7``. Informational sidecar — absence
does not break the reused run; presence lets operators trace which
prev_run_id the reuse path was sourced from. u5 invokes this after
a successful copy + restore.
"""
marker = {
"schema_version": REUSE_MARKER_SCHEMA_VERSION,
"reuse_from_prev_run_id": prev_run_id,
"snapshot_filename": SNAPSHOT_FILENAME,
"copied_artifacts": dict(copied_artifacts),
"boundary_steps": list(_REUSE_STEP_ARTIFACTS),
"resume_at_step": 7,
"note": (
"IMP-43 (#72) u4 — this run was sourced from prev_run_id via "
"--reuse-from. Steps 0/1/2/5/6 artifacts copied; Step 7+ "
"re-executed in this run_dir."
),
}
fpath = new_run_dir / REUSE_MARKER_FILENAME
fpath.write_text(
json.dumps(marker, ensure_ascii=False, indent=2),
encoding="utf-8",
)
return fpath
# IMP-43 (#72) u4b — fail-closed wrapper around the u4 helpers.
#
# Scope (u4b only — Stage 2 unit split):
# * Translate the u4 raises (FileNotFoundError, SnapshotValidationError,
# json.JSONDecodeError, OSError) into the CLI fail-closed contract:
# stderr message + ``sys.exit(2)``.
# * Add the prev_run_dir == new_run_dir accidental-write guard BEFORE
# any copy attempt — otherwise ``_copy_reuse_artifacts_from_prev_run``
# would overwrite prev_run_dir's own step files with itself and
# mutate the "read-only" reuse source.
# * Add the missing-prev-run-dir surface so the user gets a clean
# "run id not found" message instead of the raw FileNotFoundError
# stack from inside _copy_reuse_artifacts_from_prev_run.
# * Surface the mdx_sha256 mismatch as its OWN axis (distinct from
# generic snapshot validation failures) so the operator can tell
# "wrong --mdx-path for this prev_run_id" apart from "snapshot file
# is broken".
#
# Out of scope: signature threading into ``run_phase_z2_mvp1`` (u5),
# the actual call site dispatch into Step 7+ (u5).
#
# Diagnostic format (factual-verification guardrail):
# [error] --reuse-from fail-closed: <axis>
# value: <repr>
# path: <fs path / resource locator>
# upstream: <where the value originated>
# reason: <type>: <message> (only when exc != None)
#
# axis vocabulary (closed enum — tests pin this set):
# * prev_run_dir_missing
# * prev_run_dir_equals_new_run_dir
# * reuse_artifact_missing
# * reuse_copy_os_error # OSError != FileNotFoundError during copy
# # (PermissionError, IsADirectoryError,
# # OSError(errno.EXDEV), full-disk, etc.)
# * snapshot_missing_after_copy
# * snapshot_corrupt_json
# * snapshot_read_os_error # OSError != FileNotFoundError during
# # snapshot read (permission denied,
# # path-became-dir, lower-level IO)
# * mdx_sha256_mismatch
# * snapshot_validation_failed
REUSE_FAIL_CLOSED_AXES: frozenset[str] = frozenset({
"prev_run_dir_missing",
"prev_run_dir_equals_new_run_dir",
"reuse_artifact_missing",
"reuse_copy_os_error",
"snapshot_missing_after_copy",
"snapshot_corrupt_json",
"snapshot_read_os_error",
"mdx_sha256_mismatch",
"snapshot_validation_failed",
})
def _abort_reuse_from(
*,
axis: str,
value: Any,
path: str,
upstream: str,
exc: Optional[BaseException] = None,
) -> "NoReturn":
"""Print provenance-tagged stderr message and ``sys.exit(2)``.
All four `value+path+upstream+axis` fields are mandatory so the
operator can pinpoint the failed precondition without grepping the
pipeline source. ``exc`` (when supplied) adds the underlying type
+ message — useful for ``json.JSONDecodeError`` line/col info or
OSError errno.
"""
if axis not in REUSE_FAIL_CLOSED_AXES:
raise AssertionError(
f"_abort_reuse_from: unknown axis {axis!r} "
f"(expected one of {sorted(REUSE_FAIL_CLOSED_AXES)})"
)
lines = [
f"[error] --reuse-from fail-closed: {axis}",
f" value: {value!r}",
f" path: {path}",
f" upstream: {upstream}",
]
if exc is not None:
lines.append(f" reason: {type(exc).__name__}: {exc}")
print("\n".join(lines), file=sys.stderr)
sys.exit(2)
def _paths_equivalent(a: Path, b: Path) -> bool:
"""Return True when ``a`` and ``b`` resolve to the same filesystem
location, falling back to lexical equality when ``a`` doesn't
exist yet (Path.resolve(strict=False) still normalizes case + sep
on Windows + collapses ``..`` segments).
"""
try:
return a.resolve(strict=False) == b.resolve(strict=False)
except (OSError, RuntimeError):
return a == b
def execute_reuse_from_or_fail_closed(
*,
reuse_from: str,
new_run_dir: Path,
mdx_source_text: str,
) -> tuple[Path, dict[str, str], dict]:
"""Orchestrate u4 helpers under the u4b fail-closed contract.
Returns ``(prev_run_dir, copied_artifacts, snapshot)`` on success.
Calls ``sys.exit(2)`` on any of the seven fail-closed axes; does
NOT return in that case.
The caller (u5, into ``run_phase_z2_mvp1``) does NOT need to wrap
this in its own try/except — every reachable failure inside this
function terminates the process directly.
"""
from src.phase_z2_reuse_snapshot import SnapshotValidationError
prev_run_dir = _resolve_reuse_from_prev_run_dir(reuse_from)
# Guard 1: prev_run_dir must exist.
if not prev_run_dir.exists():
_abort_reuse_from(
axis="prev_run_dir_missing",
value=reuse_from,
path=str(prev_run_dir),
upstream="--reuse-from CLI argument",
)
# Guard 2: prev_run_dir must NOT be the same as new_run_dir.
# Without this, the copy step would overwrite prev_run_dir's own
# files with themselves and break the RO guarantee on the reuse
# source. The check resolves both sides so a relative-vs-absolute
# or symlinked collision still trips it.
if _paths_equivalent(prev_run_dir, new_run_dir):
_abort_reuse_from(
axis="prev_run_dir_equals_new_run_dir",
value=reuse_from,
path=str(prev_run_dir),
upstream=(
"_resolve_reuse_from_prev_run_dir(reuse_from) == new_run_dir "
"(would overwrite prev_run_dir during copy)"
),
)
# Copy step 0/1/2/5/6 + snapshot from prev_run_dir → new_run_dir.
# FileNotFoundError MUST be caught before the bare OSError handler —
# it is a subclass of OSError and the missing-artifact case has its
# own dedicated axis.
try:
copied = _copy_reuse_artifacts_from_prev_run(prev_run_dir, new_run_dir)
except FileNotFoundError as exc:
_abort_reuse_from(
axis="reuse_artifact_missing",
value=str(exc),
path=str(prev_run_dir),
upstream=(
"Step 0/1/2/5/6 deterministic artifacts + "
f"{SNAPSHOT_FILENAME} under prev_run_dir/steps/"
),
exc=exc,
)
except OSError as exc:
# PermissionError, IsADirectoryError, OSError(errno.EXDEV) when
# crossing filesystems with shutil.copyfile, disk-full, etc.
# Without this branch the raw traceback would escape the wrapper
# and contradict the docstring contract ("every reachable
# failure inside this function terminates the process directly").
_abort_reuse_from(
axis="reuse_copy_os_error",
value=str(exc),
path=str(prev_run_dir),
upstream=(
"_copy_reuse_artifacts_from_prev_run "
"(OSError != FileNotFoundError; shutil.copyfile or "
"Path.mkdir surface)"
),
exc=exc,
)
# Load + validate snapshot. Exception fan-out below mirrors the
# u4 helper raise surface; each fail-closed axis is reported
# separately so operators can tell the cases apart.
# FileNotFoundError MUST be caught before the bare OSError handler.
try:
snapshot = _load_and_validate_reuse_snapshot(
new_run_dir, mdx_source_text=mdx_source_text,
)
except FileNotFoundError as exc:
# Should not happen — copy step would have failed first — but
# left explicit to make the contract symmetric.
_abort_reuse_from(
axis="snapshot_missing_after_copy",
value=str(exc),
path=str(new_run_dir / SNAPSHOT_FILENAME),
upstream="_copy_reuse_artifacts_from_prev_run side effect",
exc=exc,
)
except json.JSONDecodeError as exc:
_abort_reuse_from(
axis="snapshot_corrupt_json",
value=str(exc),
path=str(new_run_dir / SNAPSHOT_FILENAME),
upstream=f"json.loads({SNAPSHOT_FILENAME})",
exc=exc,
)
except OSError as exc:
# Permission denied on the copied snapshot, snap_path turned out
# to be a directory, lower-level IO error. JSONDecodeError is
# ValueError (independent of OSError) so order with that branch
# does not matter; this branch only needs to follow FNF.
_abort_reuse_from(
axis="snapshot_read_os_error",
value=str(exc),
path=str(new_run_dir / SNAPSHOT_FILENAME),
upstream=(
"_load_and_validate_reuse_snapshot "
"(OSError != FileNotFoundError; Path.read_text surface)"
),
exc=exc,
)
except SnapshotValidationError as exc:
msg = str(exc)
if "mdx_sha256 mismatch" in msg:
_abort_reuse_from(
axis="mdx_sha256_mismatch",
value=msg,
path=str(new_run_dir / SNAPSHOT_FILENAME),
upstream=(
"sha256(mdx_source_text) vs "
f"{SNAPSHOT_FILENAME}#/mdx_sha256"
),
exc=exc,
)
else:
_abort_reuse_from(
axis="snapshot_validation_failed",
value=msg,
path=str(new_run_dir / SNAPSHOT_FILENAME),
upstream="src.phase_z2_reuse_snapshot.validate_snapshot",
exc=exc,
)
return prev_run_dir, copied, snapshot
def _write_step_html(
run_dir: Path,
step_num: int,
@@ -4284,6 +4852,7 @@ def run_phase_z2_mvp1(
override_zone_geometries: Optional[dict[str, dict]] = None,
override_section_assignments: Optional[dict[str, list[str]]] = None,
override_image_overrides: Optional[dict[str, dict]] = None,
reuse_from: Optional[str] = None,
) -> Path:
"""MVP-1.5b entry — single slide + composition planner v0 + 8 preset vocabulary.
@@ -4306,6 +4875,22 @@ def run_phase_z2_mvp1(
backend contract (KNOWN_AXES u1 + Vite allowlist u2 + typed
client u3 + stamper u4) end-to-end addressable from CLI without
diverging the function signature.
Incremental rerun (IMP-43 #72, u5) :
reuse_from : Optional PREV_RUN_ID. When set, Steps 0/1/2/5/6 artifacts
are copied from ``RUNS_DIR / PREV_RUN_ID / phase_z2``
and the in-memory state (sections, units, layout_preset,
comp_debug, v4_fallback_traces, slide_title/footer,
stage0_*, v4_evidence, ai_preflight) is rehydrated
from ``_reuse_snapshot.json`` via the u4 helpers,
wrapped by u4b's fail-closed contract. Step 7+ then
re-executes against ``override_frames`` in this new
run_dir. ``None`` preserves the legacy single-pass
behaviour (Steps 0-6 derive state from scratch).
The post-merge u1 guard at the CLI surface rejects
any layout / zone_geometry / zone_section / image
override under ``--reuse-from`` so only frame
overrides reach this kwarg's reuse branch.
"""
mdx_path = Path(mdx_path)
if run_id is None:
@@ -4315,6 +4900,16 @@ def run_phase_z2_mvp1(
print(f"[Phase Z-2 MVP-1.5b] start — mdx={mdx_path.name}, run_id={run_id}")
# IMP-43 (#72) u5 — Steps 0/1/2/5/6 entry-point branch.
# ``reuse_from is None`` = normal pipeline (Steps 0-6 derive state).
# ``reuse_from is not None`` = restore Steps 0/1/2/5/6 state from
# prev_run snapshot via the u4 helpers wrapped by u4b's fail-closed
# contract, then fall through to the shared Step 7+ block below.
# The post-merge u1 guard has already rejected any layout /
# zone_geometry / zone_section / image override on the reuse path,
# so only --override-frame (handled at the Step 7-A axis below the
# branch) survives into this code path.
if reuse_from is None:
# ─── Step 0: 사전 준비 (precondition snapshot) ───
# IMP-92 u4 — boot-time AI fallback preflight (gated on
# settings.ai_fallback_enabled; default OFF = skipped, no API call).
@@ -4985,11 +5580,19 @@ def run_phase_z2_mvp1(
# for IMP-47B (#76) AI handoff. section_assignment_override skip
# honors IMP-06 (#6) zoneSections ground truth.
"imp48_resplit": _imp48_audit,
# IMP-43 (#72) u3 — additive informational field recording the
# run_dir-relative location of the ``--reuse-from`` sidecar
# (written immediately after this artifact). Path is stamped
# unconditionally so that a future ``--reuse-from`` consumer
# (u4) can locate the expected sidecar even when its write
# failed (u4 then fail-closes on missing/invalid sidecar via
# u2's ``validate_snapshot``).
"reuse_snapshot_path": SNAPSHOT_FILENAME,
},
step_status="done",
pipeline_path_connected=True,
inputs=["step02_normalized.json", "step05_v4_evidence.json"],
outputs=["step06_composition_plan.json"],
outputs=["step06_composition_plan.json", SNAPSHOT_FILENAME],
note=(
"composition v0 count-based — sections → candidates → score → greedy select. "
"Step 6-A (사용자 lock 2026-05-08): selected_units[i].v4_candidates 추가 "
@@ -5000,9 +5603,115 @@ def run_phase_z2_mvp1(
"guardrails: coverage equality / beneficial split (≥1 non-reject) / "
"layout cap (≤4 units). imp48_resplit audit additive. "
"logic 무변 — runtime 결과 동일. Step 9 application_plan input. "
"IMP-43 (#72) u3: _reuse_snapshot.json sidecar written next to "
"this artifact (run_dir level) for future --reuse-from (u4) "
"consumption. Optional sidecar — write failure warns + continues."
),
)
# IMP-43 (#72) u3 — write Step 6 reuse snapshot sidecar AFTER the
# step06 artifact. The sidecar captures the in-memory state that
# downstream steps need but that the canonical step02 / step05 /
# step06 artifacts do not preserve in a deserialize-ready form (e.g.
# ``CompositionUnit`` instances, raw ``comp_debug``, untruncated
# ``v4_fallback_traces``, pre-override ``layout_preset``). Helper
# warns + returns ``None`` on failure — does NOT abort the run.
# Restore wiring (``--reuse-from``) lands in u4.
_write_reuse_snapshot(
run_dir,
mdx_source_text=mdx_source_text,
slide_title=slide_title,
slide_footer=slide_footer,
sections=sections,
stage0_adapter_diagnostics=stage0_adapter_diagnostics,
stage0_normalized_assets=stage0_normalized_assets,
v4_evidence=v4_evidence_list,
layout_preset_pre_override=layout_preset,
units=units,
comp_debug=comp_debug,
v4_fallback_traces=v4_fallback_traces,
ai_preflight=ai_preflight,
)
else:
# IMP-43 (#72) u5 — reuse path: restore Steps 0/1/2/5/6 state
# from prev_run snapshot. u4b's execute_reuse_from_or_fail_closed
# handles all nine fail-closed axes (prev_run_dir_missing,
# snapshot_corrupt_json, mdx_sha256_mismatch, etc.) — on success
# it returns ``(prev_run_dir, copied_artifacts, snapshot)``;
# any reachable failure terminates the process before this branch
# binds a local.
#
# State variable shape matches the locals produced by Steps 0-6
# above so the Step 7+ block reads them transparently:
# ai_preflight : Step 0 preflight dict
# slide_title / slide_footer : parse_mdx output
# sections : list[MdxSection], post-align
# stage0_adapter_diagnostics : Stage 0 adapter trace dict
# stage0_normalized_assets : Step 3 handoff dict (popups/...)
# v4_evidence_list : list[dict] (Step 5 artifact)
# layout_preset : Step 6 post-IMP-48 preset
# units : list[CompositionUnit]
# comp_debug : Step 6 debug dict
# v4_fallback_traces : dict[sid -> trace dict]
#
# NOT serialized (deterministic from external sources or restored
# sections — recomputed here):
# v4 : load_v4_result() — V4_RESULT_PATH on disk
# section_alias_by_id : derived from restored sections
#
# u1 guard ensures override_layout is None on the reuse path, so
# layout_override_applied / auto_layout_preset reflect the
# restored Step 6 preset for the Step 7 artifact.
mdx_source_text = mdx_path.read_text(encoding="utf-8")
(run_dir / "steps").mkdir(exist_ok=True)
_prev_run_dir, _copied_artifacts, _snapshot = execute_reuse_from_or_fail_closed(
reuse_from=reuse_from,
new_run_dir=run_dir,
mdx_source_text=mdx_source_text,
)
ai_preflight = _snapshot["ai_preflight"]["value"]
slide_title = _snapshot["slide_title"]["value"]
slide_footer = _snapshot["slide_footer"]["value"]
sections = _rehydrate_mdx_sections_from_snapshot(_snapshot)
stage0_adapter_diagnostics = _snapshot["stage0_adapter_diagnostics"]["value"]
stage0_normalized_assets = _snapshot["stage0_normalized_assets"]["value"]
v4_evidence_list = _snapshot["v4_evidence"]["value"]
layout_preset = _snapshot["layout_preset_pre_override"]["value"]
units = _rehydrate_composition_units_from_snapshot(_snapshot)
comp_debug = _snapshot["comp_debug"]["value"]
v4_fallback_traces = _snapshot["v4_fallback_traces"]["value"]
v4 = load_v4_result()
section_alias_by_id = {
s.section_id: list(getattr(s, "v4_alias_keys", []) or [])
for s in sections
}
auto_layout_preset = layout_preset
layout_override_applied = False
# IMP-43 (#72) u4 fix — shared Step 7+ block reads
# ``section_assignment_plan`` unconditionally at the render_records
# gate below, and ``section_assignment_summary`` is mirrored into
# comp_debug via the normal-path override branch. Both stay at
# their "no override applied" defaults on the reuse path because
# u1's fail-closed guard already rejected --override-section-
# assignment when --reuse-from is set. Without these explicit
# defaults the reuse branch falls through to ``if
# section_assignment_plan is not None:`` (line ~5754) with an
# unbound local and the run aborts with UnboundLocalError before
# Step 7 can begin (see Codex #14 rewind report).
section_assignment_plan: Optional[list[dict]] = None
section_assignment_summary: Optional[dict] = None
_write_reuse_marker(
run_dir,
prev_run_id=reuse_from,
copied_artifacts=_copied_artifacts,
)
print(
f" reuse : sections={len(sections)} "
f"({[s.section_id for s in sections]}), "
f"units={len(units)}, layout={layout_preset}, "
f"prev_run_id={reuse_from}"
)
# 5. Per-unit: synthesize MdxSection → mapper → assets → zone data
# mapper FitError 는 catch — 자동 파이프라인은 다른 zone 계속 진행. abort X.
positions = LAYOUT_PRESETS[layout_preset]["positions"]
@@ -7211,6 +7920,28 @@ if __name__ == "__main__":
"settings.ai_fallback_auto_cache=True for this run."
),
)
# IMP-43 (#72) u1 — incremental rerun reuse pointer. Reuse target
# = Step 0/1/2/5/6 deterministic artifacts from a prior run; Step 7
# onward re-executes against the new frame overrides. Only frame
# overrides preserve the reusable subset (Stage 2 boundary lock);
# layout/geometry/section/image overrides invalidate it and are
# rejected by the post-merge guard below. Signature threading +
# snapshot copy/restore land in u5 and u4 respectively; this unit
# only adds the CLI surface + fail-closed precondition guard.
parser.add_argument(
"--reuse-from",
dest="reuse_from",
default=None,
metavar="PREV_RUN_ID",
help=(
"Reuse Step 0/1/2/5/6 artifacts from a previous run id "
"(directory under data/runs/<PREV_RUN_ID>/phase_z2) and resume "
"execution at Step 7. Only --override-frame is preserved; "
"--override-layout / --override-zone-geometry / "
"--override-section-assignment / --override-image invalidate "
"the reusable boundary and will be rejected."
),
)
args = parser.parse_args()
if args.auto_cache:
@@ -7436,6 +8167,37 @@ if __name__ == "__main__":
continue
overrides_images = _accepted_img
# IMP-43 (#72) u1 — fail-closed reuse_from precondition guard.
# Placed AFTER the user_overrides.json merge so persisted overrides
# are evaluated against the same reuse boundary as CLI overrides
# (Stage 2 lock: "fail-closed guard after user_overrides.json merge
# and before dispatch"). Reuse target = Step 0/1/2/5/6 deterministic
# artifacts; only frame overrides preserve that subset. layout /
# zone_geometry / zone_section / image overrides each invalidate at
# least one of Step 0/1/2/5/6 and must reject. Frame-only is allowed
# (no rejected axes → falls through to dispatch). Error stderr names
# every rejected axis so the user can either drop the rejected axes
# or rerun without --reuse-from.
if args.reuse_from is not None:
_rejected_axes: list[str] = []
if _final_override_layout is not None:
_rejected_axes.append("layout")
if overrides_geoms:
_rejected_axes.append("zone_geometry")
if overrides_section_assignments:
_rejected_axes.append("zone_section")
if overrides_images:
_rejected_axes.append("image")
if _rejected_axes:
print(
f"[error] --reuse-from incompatible with override axes: "
f"{', '.join(_rejected_axes)}. Only --override-frame is "
f"preserved across Step 0/1/2/5/6 reuse; drop the rejected "
f"overrides or rerun without --reuse-from.",
file=sys.stderr,
)
sys.exit(2)
run_phase_z2_mvp1(
args.mdx_path,
args.run_id,
@@ -7444,4 +8206,5 @@ if __name__ == "__main__":
override_zone_geometries=overrides_geoms or None,
override_section_assignments=overrides_section_assignments or None,
override_image_overrides=overrides_images or None,
reuse_from=args.reuse_from,
)

View File

@@ -0,0 +1,301 @@
"""IMP-43 (#72) u2 — Step 6 reuse snapshot schema (JSON-only).
Stage 2 plan (locked) — ``--reuse-from PREV_RUN_ID`` reuses the
Step 0 / 1 / 2 / 5 / 6 deterministic artifact subset plus the
in-memory state that downstream steps need but that the existing
``step02_normalized.json`` / ``step05_v4_evidence.json`` /
``step06_composition_plan.json`` artifacts do not capture in a
deserialize-ready form (e.g. ``CompositionUnit`` instances,
``comp_debug``, ``v4_fallback_traces`` raw map, pre-override
``layout_preset``). This module owns the schema for the additional
``_reuse_snapshot.json`` sidecar written next to ``step06_composition_plan.json``.
Scope (u2 only, Stage 2 unit split):
* Pure schema + serializers + validator. No file I/O.
* JSON-only — pickle is forbidden per Stage 2 guardrails.
* Provenance per top-level field: ``{value, source_path, upstream_step}``.
* ``mdx_sha256`` integrity key — ``--reuse-from`` must fail closed when
the prev run's MDX bytes don't match the current MDX bytes.
* ``schema_version`` — bumped on any non-additive shape change.
Out of scope (deferred to later units):
* Writing the snapshot into the run_dir (u3).
* Copy / restore on ``--reuse-from`` (u4).
* Fail-closed snapshot/path errors at restore time (u4b).
* Threading ``reuse_from`` through ``run_phase_z2_mvp1`` (u5).
"""
from __future__ import annotations
import json
from typing import Any, Optional
SNAPSHOT_VERSION = 1
SNAPSHOT_FILENAME = "_reuse_snapshot.json"
# Required top-level keys. Bare scalars (no provenance wrapper):
# - schema_version (contract key)
# - mdx_sha256 (integrity key)
# All other keys are wrapped {value, source_path, upstream_step}.
REQUIRED_TOP_LEVEL_KEYS: tuple[str, ...] = (
"schema_version",
"mdx_sha256",
"slide_title",
"slide_footer",
"sections",
"stage0_adapter_diagnostics",
"stage0_normalized_assets",
"v4_evidence",
"layout_preset_pre_override",
"units",
"comp_debug",
"v4_fallback_traces",
"ai_preflight",
)
_BARE_KEYS: frozenset[str] = frozenset({"schema_version", "mdx_sha256"})
def _wrap(value: Any, *, source_path: str, upstream_step: str) -> dict[str, Any]:
return {
"value": value,
"source_path": source_path,
"upstream_step": upstream_step,
}
def serialize_section(section: Any) -> dict[str, Any]:
"""Serialize an ``MdxSection``-shaped object into a JSON-safe dict.
Duck-typed: accepts the production ``MdxSection`` dataclass or any
object exposing the same attribute names. Preserves the subset of
fields needed to reconstruct downstream pipeline behavior on the
reuse path.
"""
return {
"section_id": section.section_id,
"section_num": section.section_num,
"title": section.title,
"raw_content": section.raw_content,
"heading_number": getattr(section, "heading_number", None),
"v4_alias_keys": list(getattr(section, "v4_alias_keys", []) or []),
"sub_sections": list(getattr(section, "sub_sections", []) or []),
}
def serialize_unit(unit: Any) -> dict[str, Any]:
"""Serialize a ``CompositionUnit``-shaped object into a JSON-safe dict.
``v4_candidates`` entries are V4Match-duck-typed per the
CompositionUnit docstring; each is unwrapped to its 6 named
attributes so the snapshot file does not pin V4Match's dataclass
layout. ``v4_rank`` is included so the reuse path's Step 9
application-plan payload (``_build_application_plan_unit``)
remains byte-equivalent to the full-rerun path — full rerun stamps
each candidate's rank via ``_v4_match_from_judgment`` (e.g. 1, 2,
3, …) and Step 9 surfaces it under ``v4_candidates[i].v4_rank``.
Persisting it here lets the rehydrated ``_RehydratedV4Candidate``
expose the same attribute end-to-end and avoids None drift in the
Step 13 equivalence comparison (u7a).
"""
return {
"source_section_ids": list(unit.source_section_ids),
"merge_type": unit.merge_type,
"frame_template_id": unit.frame_template_id,
"frame_id": unit.frame_id,
"frame_number": unit.frame_number,
"confidence": float(unit.confidence),
"label": unit.label,
"phase_z_status": unit.phase_z_status,
"raw_content": unit.raw_content,
"title": unit.title,
"v4_rank": unit.v4_rank,
"selection_path": unit.selection_path,
"fallback_reason": unit.fallback_reason,
"score": float(unit.score),
"rationale": dict(unit.rationale or {}),
"auto_selectable": bool(unit.auto_selectable),
"filter_reasons": list(unit.filter_reasons or []),
"notes": list(unit.notes or []),
"v4_candidates": [
{
"template_id": c.template_id,
"frame_id": c.frame_id,
"frame_number": c.frame_number,
"confidence": float(c.confidence),
"label": c.label,
"v4_rank": getattr(c, "v4_rank", None),
}
for c in (unit.v4_candidates or [])
],
"provisional": bool(getattr(unit, "provisional", False)),
}
def build_snapshot(
*,
mdx_sha256: str,
slide_title: Optional[str],
slide_footer: Optional[str],
sections: list,
stage0_adapter_diagnostics: Optional[dict],
stage0_normalized_assets: Optional[dict],
v4_evidence: list,
layout_preset_pre_override: Optional[str],
units: list,
comp_debug: Optional[dict],
v4_fallback_traces: Optional[dict],
ai_preflight: Optional[dict],
) -> dict[str, Any]:
"""Build a JSON-serializable Step 6 reuse snapshot with provenance.
Each top-level entry — except the two bare contract / integrity
keys (``schema_version``, ``mdx_sha256``) — is wrapped with
``{value, source_path, upstream_step}``.
The function calls ``json.dumps(snapshot)`` at the end to enforce
JSON-safety at build time: any latent non-JSON value (set, Path,
dataclass instance, etc.) raises ``TypeError`` at the call site,
not later at restore.
"""
snapshot: dict[str, Any] = {
"schema_version": SNAPSHOT_VERSION,
"mdx_sha256": mdx_sha256,
"slide_title": _wrap(
slide_title,
source_path="steps/step02_normalized.json#/slide_title",
upstream_step="step02",
),
"slide_footer": _wrap(
slide_footer,
source_path="steps/step02_normalized.json#/slide_footer",
upstream_step="step02",
),
"sections": _wrap(
[serialize_section(s) for s in sections],
source_path="steps/step02_normalized.json#/sections",
upstream_step="step02",
),
"stage0_adapter_diagnostics": _wrap(
dict(stage0_adapter_diagnostics or {}),
source_path="steps/step02_normalized.json#/stage0_adapter_diagnostics",
upstream_step="step02",
),
"stage0_normalized_assets": _wrap(
dict(stage0_normalized_assets or {}),
source_path="steps/step02_normalized.json#/stage0_normalized_assets",
upstream_step="step02",
),
"v4_evidence": _wrap(
list(v4_evidence or []),
source_path="steps/step05_v4_evidence.json#/evidence_per_section",
upstream_step="step05",
),
"layout_preset_pre_override": _wrap(
layout_preset_pre_override,
source_path="steps/step06_composition_plan.json#/layout_preset_decided",
upstream_step="step06",
),
"units": _wrap(
[serialize_unit(u) for u in units],
source_path="steps/step06_composition_plan.json#/selected_units",
upstream_step="step06",
),
"comp_debug": _wrap(
dict(comp_debug or {}),
source_path="steps/step06_composition_plan.json#/*",
upstream_step="step06",
),
"v4_fallback_traces": _wrap(
dict(v4_fallback_traces or {}),
# v4_fallback_traces is assembled inside run_phase_z2_mvp1
# (see phase_z2_pipeline.py around the Step 5/6 boundary) and
# surfaces only partially into step06_composition_plan.json
# via the v4_fallback_summary / imp48_resplit fields. The
# canonical untruncated source is the in-memory dict at end
# of Step 6 — that's what the reuse path needs.
source_path="phase_z2_pipeline.run_phase_z2_mvp1::v4_fallback_traces",
upstream_step="step06",
),
"ai_preflight": _wrap(
dict(ai_preflight or {}),
source_path="steps/step00_preconditions.json#/ai_preflight",
upstream_step="step00",
),
}
json.dumps(snapshot)
return snapshot
class SnapshotValidationError(ValueError):
"""Raised by ``validate_snapshot`` when the snapshot is structurally
unusable or fails the ``mdx_sha256`` integrity check.
Subclass of ``ValueError`` so existing ``except ValueError`` callers
(u4b will add a tighter ``except SnapshotValidationError``) still
catch it without escaping to the outer CLI.
"""
def validate_snapshot(
snapshot: Any,
*,
expected_mdx_sha256: str,
) -> None:
"""Validate a loaded snapshot dict (fail-closed).
Raises ``SnapshotValidationError`` when:
* ``snapshot`` is not a dict
* ``schema_version`` is missing or != ``SNAPSHOT_VERSION``
* ``mdx_sha256`` is missing, non-string, or doesn't match
``expected_mdx_sha256``
* any required top-level key is missing
* a wrapped entry doesn't expose ``{value, source_path, upstream_step}``
Returns ``None`` on success.
Callers (u4b) translate the raised error into an exit-code-2 abort
with the failing axis surfaced as `value + path + upstream`
(factual-verification guardrail).
"""
if not isinstance(snapshot, dict):
raise SnapshotValidationError(
f"snapshot is not a dict (got {type(snapshot).__name__})"
)
version = snapshot.get("schema_version")
if version != SNAPSHOT_VERSION:
raise SnapshotValidationError(
f"schema_version mismatch: expected {SNAPSHOT_VERSION!r}, got {version!r}"
)
actual_sha = snapshot.get("mdx_sha256")
if not isinstance(actual_sha, str) or not actual_sha:
raise SnapshotValidationError(
f"mdx_sha256 missing or non-string: got {actual_sha!r}"
)
if actual_sha != expected_mdx_sha256:
raise SnapshotValidationError(
f"mdx_sha256 mismatch: snapshot={actual_sha!r} "
f"expected={expected_mdx_sha256!r}"
)
missing = [k for k in REQUIRED_TOP_LEVEL_KEYS if k not in snapshot]
if missing:
raise SnapshotValidationError(
f"missing required keys: {missing!r}"
)
for key, entry in snapshot.items():
if key in _BARE_KEYS:
continue
if not isinstance(entry, dict):
raise SnapshotValidationError(
f"key {key!r}: expected wrapper dict, got {type(entry).__name__}"
)
for field_name in ("value", "source_path", "upstream_step"):
if field_name not in entry:
raise SnapshotValidationError(
f"key {key!r}: wrapper missing {field_name!r}"
)

View File

@@ -0,0 +1,383 @@
"""IMP-43 (#72) u1 + u5 — focused tests for the ``--reuse-from`` CLI surface.
u1 scope (per the Stage 2 Exit Report):
- argparse flag ``--reuse-from PREV_RUN_ID`` parses without error.
- Fail-closed precondition guard runs AFTER the ``user_overrides.json``
merge and BEFORE dispatch. With ``--reuse-from`` set, the guard
must:
* accept frame-only overrides (or no overrides at all);
* reject layout / zone-geometry / zone-section / image overrides
with ``sys.exit(2)`` whose stderr names every rejected axis.
u5 scope (added 2026-05-24):
- ``reuse_from`` is keyword-only on ``run_phase_z2_mvp1`` and defaults
to ``None`` so the absent-flag path preserves pre-u5 behaviour.
- The CLI dispatch forwards ``args.reuse_from`` verbatim — both
``None`` (flag absent) and ``"PREV_RUN_ID"`` (flag present) reach
the kwarg unchanged.
- The fake ``run_phase_z2_mvp1`` stub below mirrors the production
signature so the forwarding lock would fail loudly on any
forwarding regression.
The harness mirrors ``tests/test_phase_z2_cli_overrides.py`` — the
``if __name__ == "__main__"`` block of ``src.phase_z2_pipeline`` is
exec'd inside the module's namespace after monkeypatching
``run_phase_z2_mvp1`` with a recording stub. The persistence fallback
is silenced by redirecting ``src.user_overrides_io.DEFAULT_OVERRIDES_ROOT``
to a clean tmp directory so persisted state from prior runs cannot bleed
into the parser-only assertions here.
"""
from __future__ import annotations
import ast
import sys
from pathlib import Path
from typing import Any
import pytest
import src.phase_z2_pipeline as _pz2
import src.user_overrides_io as _io
# -- harness ---------------------------------------------------------------
def _exec_main_block(
captured: dict[str, Any], argv: list[str], monkeypatch
) -> None:
"""Run the ``__main__`` body of phase_z2_pipeline.py with a fake
``run_phase_z2_mvp1`` so its kwargs are observable. Captures the
presence of the call (``called=True``) so guard-driven early exits
can be distinguished from a successful parse + dispatch."""
def _fake_run(
mdx_path,
run_id,
*,
override_layout=None,
override_frames=None,
override_zone_geometries=None,
override_section_assignments=None,
override_image_overrides=None,
reuse_from=None,
):
captured["called"] = True
captured["mdx_path"] = mdx_path
captured["run_id"] = run_id
captured["override_layout"] = override_layout
captured["override_frames"] = override_frames
captured["override_zone_geometries"] = override_zone_geometries
captured["override_section_assignments"] = override_section_assignments
captured["override_image_overrides"] = override_image_overrides
captured["reuse_from"] = reuse_from
monkeypatch.setattr(_pz2, "run_phase_z2_mvp1", _fake_run)
monkeypatch.setattr(sys, "argv", argv)
src_path = Path(_pz2.__file__)
source = src_path.read_text(encoding="utf-8")
tree = ast.parse(source)
for node in tree.body:
if (
isinstance(node, ast.If)
and isinstance(node.test, ast.Compare)
and isinstance(node.test.left, ast.Name)
and node.test.left.id == "__name__"
):
block = ast.Module(body=node.body, type_ignores=[])
exec(compile(block, str(src_path), "exec"), _pz2.__dict__)
return
raise AssertionError("no `if __name__ == '__main__'` block found")
def _redirect_overrides_root(tmp_path: Path, monkeypatch) -> None:
"""Isolate the persistence fallback so file state never leaks in."""
monkeypatch.setattr(_io, "DEFAULT_OVERRIDES_ROOT", tmp_path)
# -- success paths --------------------------------------------------------
def test_reuse_from_alone_parses_and_dispatches(tmp_path, monkeypatch):
"""``--reuse-from`` with no other overrides must parse cleanly and
fall through to dispatch (frame-only / empty override is allowed).
u5 (2026-05-24): also asserts the CLI threads ``args.reuse_from``
verbatim into the ``reuse_from`` kwarg."""
_redirect_overrides_root(tmp_path, monkeypatch)
captured: dict[str, Any] = {}
_exec_main_block(
captured,
[
"src.phase_z2_pipeline",
"03.mdx",
"--reuse-from",
"03__DX_20260508025134",
],
monkeypatch,
)
assert captured.get("called") is True
# u5 — verbatim threading.
assert captured["reuse_from"] == "03__DX_20260508025134"
def test_reuse_from_with_frame_override_dispatches(tmp_path, monkeypatch):
"""Frame overrides ARE preserved across Step 0/1/2/5/6 reuse, so
``--reuse-from`` + ``--override-frame`` must reach dispatch.
u5: forwards both ``reuse_from`` and ``override_frames`` in the
same call."""
_redirect_overrides_root(tmp_path, monkeypatch)
captured: dict[str, Any] = {}
_exec_main_block(
captured,
[
"src.phase_z2_pipeline",
"03.mdx",
"--reuse-from",
"03__DX_20260508025134",
"--override-frame",
"03-1=frame_foo",
],
monkeypatch,
)
assert captured.get("called") is True
assert captured["override_frames"] == {"03-1": "frame_foo"}
# u5 — frame override + reuse_from reach the kwarg simultaneously.
assert captured["reuse_from"] == "03__DX_20260508025134"
# -- u5 — flag-absent default + signature surface ------------------------
def test_no_reuse_from_threads_none_kwarg(tmp_path, monkeypatch):
"""u5 — when ``--reuse-from`` is absent, the kwarg must reach
``run_phase_z2_mvp1`` as ``None`` (not omitted, not ``""``). This
locks the "default None preserves current behavior" requirement
from the Stage 2 plan §u5."""
_redirect_overrides_root(tmp_path, monkeypatch)
captured: dict[str, Any] = {}
_exec_main_block(
captured,
["src.phase_z2_pipeline", "03.mdx"],
monkeypatch,
)
assert captured.get("called") is True
assert captured["reuse_from"] is None
def test_run_phase_z2_mvp1_signature_includes_reuse_from():
"""Production signature lock — ``reuse_from`` must be a keyword-only
parameter with default ``None``. Mirror of the entry-tests
invariant; kept here so the CLI-surface test file fails loudly if
the production signature drifts away from the dispatch contract."""
import inspect
sig = inspect.signature(_pz2.run_phase_z2_mvp1)
assert "reuse_from" in sig.parameters, list(sig.parameters)
param = sig.parameters["reuse_from"]
assert param.kind is inspect.Parameter.KEYWORD_ONLY, param.kind
assert param.default is None, param.default
# -- fail-closed (single-axis rejection) ----------------------------------
def test_reuse_from_with_layout_override_exits(tmp_path, monkeypatch, capsys):
_redirect_overrides_root(tmp_path, monkeypatch)
captured: dict[str, Any] = {}
with pytest.raises(SystemExit) as excinfo:
_exec_main_block(
captured,
[
"src.phase_z2_pipeline",
"03.mdx",
"--reuse-from",
"03__DX_20260508025134",
"--override-layout",
"horizontal-2",
],
monkeypatch,
)
assert excinfo.value.code == 2
err = capsys.readouterr().err
assert "--reuse-from incompatible with override axes" in err
assert "layout" in err
assert captured.get("called") is not True
def test_reuse_from_with_zone_geometry_override_exits(
tmp_path, monkeypatch, capsys
):
_redirect_overrides_root(tmp_path, monkeypatch)
captured: dict[str, Any] = {}
with pytest.raises(SystemExit) as excinfo:
_exec_main_block(
captured,
[
"src.phase_z2_pipeline",
"03.mdx",
"--reuse-from",
"03__DX_20260508025134",
"--override-zone-geometry",
"top=0,0,1,0.3",
],
monkeypatch,
)
assert excinfo.value.code == 2
err = capsys.readouterr().err
assert "--reuse-from incompatible with override axes" in err
assert "zone_geometry" in err
assert captured.get("called") is not True
def test_reuse_from_with_zone_section_override_exits(
tmp_path, monkeypatch, capsys
):
_redirect_overrides_root(tmp_path, monkeypatch)
captured: dict[str, Any] = {}
with pytest.raises(SystemExit) as excinfo:
_exec_main_block(
captured,
[
"src.phase_z2_pipeline",
"03.mdx",
"--reuse-from",
"03__DX_20260508025134",
"--override-section-assignment",
"top=03-1",
],
monkeypatch,
)
assert excinfo.value.code == 2
err = capsys.readouterr().err
assert "--reuse-from incompatible with override axes" in err
assert "zone_section" in err
assert captured.get("called") is not True
def test_reuse_from_with_image_override_exits(tmp_path, monkeypatch, capsys):
_redirect_overrides_root(tmp_path, monkeypatch)
captured: dict[str, Any] = {}
with pytest.raises(SystemExit) as excinfo:
_exec_main_block(
captured,
[
"src.phase_z2_pipeline",
"03.mdx",
"--reuse-from",
"03__DX_20260508025134",
"--override-image",
"img-abc=10,15,30,25",
],
monkeypatch,
)
assert excinfo.value.code == 2
err = capsys.readouterr().err
assert "--reuse-from incompatible with override axes" in err
assert "image" in err
assert captured.get("called") is not True
# -- fail-closed (multi-axis aggregation) ---------------------------------
def test_reuse_from_with_multiple_rejected_axes_lists_all(
tmp_path, monkeypatch, capsys
):
"""Stderr must enumerate every rejected axis (not stop at first)."""
_redirect_overrides_root(tmp_path, monkeypatch)
captured: dict[str, Any] = {}
with pytest.raises(SystemExit) as excinfo:
_exec_main_block(
captured,
[
"src.phase_z2_pipeline",
"03.mdx",
"--reuse-from",
"03__DX_20260508025134",
"--override-layout",
"horizontal-2",
"--override-zone-geometry",
"top=0,0,1,0.3",
"--override-image",
"img-abc=10,15,30,25",
],
monkeypatch,
)
assert excinfo.value.code == 2
err = capsys.readouterr().err
assert "layout" in err
assert "zone_geometry" in err
assert "image" in err
assert captured.get("called") is not True
# -- guard inactive when --reuse-from absent ------------------------------
def test_no_reuse_from_layout_override_still_dispatches(
tmp_path, monkeypatch
):
"""Without ``--reuse-from``, the guard must be silent — existing
override behaviour is preserved end-to-end."""
_redirect_overrides_root(tmp_path, monkeypatch)
captured: dict[str, Any] = {}
_exec_main_block(
captured,
[
"src.phase_z2_pipeline",
"03.mdx",
"--override-layout",
"horizontal-2",
],
monkeypatch,
)
assert captured.get("called") is True
assert captured["override_layout"] == "horizontal-2"
# -- fail-closed honours persisted overrides ------------------------------
def test_reuse_from_with_persisted_layout_override_exits(
tmp_path, monkeypatch, capsys
):
"""The guard runs AFTER the user_overrides.json merge, so a layout
persisted on disk (not on the CLI) must still reject when
``--reuse-from`` is set. This locks the Stage 2 placement rule."""
_redirect_overrides_root(tmp_path, monkeypatch)
# Persist a layout override keyed by the MDX stem ``03``.
overrides_dir = tmp_path
overrides_dir.mkdir(parents=True, exist_ok=True)
(overrides_dir / "03.json").write_text(
'{"layout": "vertical-2"}', encoding="utf-8"
)
captured: dict[str, Any] = {}
with pytest.raises(SystemExit) as excinfo:
_exec_main_block(
captured,
[
"src.phase_z2_pipeline",
"03.mdx",
"--reuse-from",
"03__DX_20260508025134",
],
monkeypatch,
)
assert excinfo.value.code == 2
err = capsys.readouterr().err
assert "--reuse-from incompatible with override axes" in err
assert "layout" in err
assert captured.get("called") is not True

View File

@@ -0,0 +1,555 @@
"""IMP-43 (#72) u4 — focused tests for the --reuse-from entry helpers.
u4 scope (per the Stage 2 Exit Report):
- Pure path resolution, file copy, snapshot load+validate, MdxSection +
CompositionUnit rehydration, and reuse-marker writing.
- Helpers RAISE on missing artifacts / corrupt snapshot / mdx_sha256
mismatch — u4b adds the stderr + sys.exit(2) translation and the
prev_run_dir == new_run_dir accidental-write guard around them.
- The kwarg threading + the in-``run_phase_z2_mvp1`` branch that
invokes these helpers land in u5.
Tested helpers (``src/phase_z2_pipeline.py``):
* ``_resolve_reuse_from_prev_run_dir``
* ``_copy_reuse_artifacts_from_prev_run``
* ``_load_and_validate_reuse_snapshot``
* ``_rehydrate_mdx_sections_from_snapshot``
* ``_rehydrate_composition_units_from_snapshot``
* ``_write_reuse_marker``
* ``_RehydratedV4Candidate`` (V4Match-shape duck type)
* ``_REUSE_STEP_ARTIFACTS`` / ``REUSE_MARKER_FILENAME`` /
``REUSE_MARKER_SCHEMA_VERSION``
"""
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
import pytest
import src.phase_z2_pipeline as _pz2
from src.phase_z2_composition import CompositionUnit
from src.phase_z2_reuse_snapshot import (
SNAPSHOT_FILENAME,
SNAPSHOT_VERSION,
SnapshotValidationError,
build_snapshot,
)
# -- synthetic duck-typed inputs (mirror u3 test fixture) -----------------
@dataclass
class _Section:
section_id: str
section_num: int
title: str
raw_content: str
heading_number: Optional[str] = None
v4_alias_keys: list = field(default_factory=list)
sub_sections: list = field(default_factory=list)
@dataclass
class _V4Candidate:
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
@dataclass
class _Unit:
source_section_ids: list
merge_type: str
frame_template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
phase_z_status: str
raw_content: str
title: str
score: float
v4_rank: Optional[int] = 1
selection_path: str = "rank_1"
fallback_reason: Optional[str] = None
rationale: dict = field(default_factory=dict)
auto_selectable: bool = True
filter_reasons: list = field(default_factory=list)
notes: list = field(default_factory=list)
v4_candidates: list = field(default_factory=list)
provisional: bool = False
def _mdx_text() -> str:
return "# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n"
def _build_canonical_snapshot(
*,
mdx_source_text: Optional[str] = None,
layout_preset: str = "single",
) -> dict:
text = mdx_source_text if mdx_source_text is not None else _mdx_text()
cand = _V4Candidate(
template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
)
section = _Section(
section_id="03-1",
section_num=1,
title="DX status",
raw_content="- bullet one\n- bullet two",
heading_number="3.1",
v4_alias_keys=["03-1.1"],
sub_sections=[],
)
unit = _Unit(
source_section_ids=["03-1"],
merge_type="single",
frame_template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
phase_z_status="auto_renderable",
raw_content="- bullet one\n- bullet two",
title="DX status",
score=0.91,
v4_candidates=[cand],
provisional=False,
auto_selectable=True,
filter_reasons=[],
notes=["a note"],
rationale={"weight": 1.0},
)
return build_snapshot(
mdx_sha256=hashlib.sha256(text.encode("utf-8")).hexdigest(),
slide_title="Slide",
slide_footer=None,
sections=[section],
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
v4_evidence=[
{
"section_id": "03-1",
"v4_candidates": [
{
"template_id": "tpl_a",
"frame_id": "fid_a",
"frame_number": 13,
"confidence": 0.91,
"label": "use_as_is",
}
],
"candidate_status": "ok",
}
],
layout_preset_pre_override=layout_preset,
units=[unit],
comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
ai_preflight={"enabled": False, "skipped": True},
)
def _seed_prev_run_dir(prev_run_dir: Path, *, snapshot: dict) -> None:
"""Populate ``prev_run_dir`` with the Step 0/1/2/5/6 artifacts plus
the reuse snapshot — minimal but valid surface for u4 helpers."""
(prev_run_dir / "steps").mkdir(parents=True, exist_ok=True)
for fname in _pz2._REUSE_STEP_ARTIFACTS:
# JSON-shaped surface — exact shape doesn't matter for u4 (the
# copy helper doesn't introspect contents); just must exist.
(prev_run_dir / "steps" / fname).write_text(
f'{{"name": "{fname}"}}'
if fname.endswith(".json")
else "raw mdx body bytes",
encoding="utf-8",
)
(prev_run_dir / SNAPSHOT_FILENAME).write_text(
json.dumps(snapshot, ensure_ascii=False, indent=2),
encoding="utf-8",
)
# -- _REUSE_STEP_ARTIFACTS constant ---------------------------------------
def test_reuse_step_artifacts_locks_stage2_boundary():
"""Stage 2 boundary lock — Step 0/1/2/5/6 artifacts only.
Step 3/4 deliberately absent: step03 / step04 ARE written after
Step 6 (around src/phase_z2_pipeline.py:5931 / 5964) before the
Step 7 artifact (~6294), but both are emitted with
step_status='trace-only' / pipeline_path_connected=False — they
are diagnostic projections of the Step 6 debug_zones, not
pipeline-path-connected inputs that Step 7+ rehydrate from."""
assert _pz2._REUSE_STEP_ARTIFACTS == (
"step00_preconditions.json",
"step01_mdx_upload.json",
"step01_mdx_source.md",
"step02_normalized.json",
"step05_v4_evidence.json",
"step06_composition_plan.json",
)
def test_reuse_marker_filename_is_dotfile_at_run_dir_root():
assert _pz2.REUSE_MARKER_FILENAME == "_reuse_marker.json"
# -- _resolve_reuse_from_prev_run_dir -------------------------------------
def test_resolve_prev_run_dir_returns_runs_dir_phase_z2_path():
rv = _pz2._resolve_reuse_from_prev_run_dir("20260524_120000_phase_z2")
expected = _pz2.RUNS_DIR / "20260524_120000_phase_z2" / "phase_z2"
assert rv == expected
def test_resolve_prev_run_dir_does_not_check_existence(tmp_path: Path):
"""Pure path computation — must NOT touch the filesystem (u4b
handles the missing-prev-run case)."""
rv = _pz2._resolve_reuse_from_prev_run_dir("never_existed_run_id")
assert isinstance(rv, Path)
# The path does not actually exist; helper still returned cleanly.
assert not rv.exists()
# -- _copy_reuse_artifacts_from_prev_run ----------------------------------
def test_copy_reuse_artifacts_copies_all_step_files(tmp_path: Path):
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
copied = _pz2._copy_reuse_artifacts_from_prev_run(prev, new)
for fname in _pz2._REUSE_STEP_ARTIFACTS:
assert (new / "steps" / fname).exists(), f"missing copy: {fname}"
assert copied[fname] == f"steps/{fname}"
def test_copy_reuse_artifacts_copies_snapshot_to_run_dir_root(tmp_path: Path):
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
copied = _pz2._copy_reuse_artifacts_from_prev_run(prev, new)
# Snapshot lives at run_dir root (NOT under steps/) per u3 contract.
assert (new / SNAPSHOT_FILENAME).exists()
assert copied[SNAPSHOT_FILENAME] == SNAPSHOT_FILENAME
def test_copy_reuse_artifacts_creates_steps_subdir_if_absent(tmp_path: Path):
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
# new_run_dir / steps does not yet exist
assert not (new / "steps").exists()
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
assert (new / "steps").is_dir()
def test_copy_reuse_artifacts_missing_step_raises_filenotfound(
tmp_path: Path,
):
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
# Delete one of the required step artifacts.
(prev / "steps" / "step05_v4_evidence.json").unlink()
with pytest.raises(FileNotFoundError) as ei:
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
msg = str(ei.value)
assert "step05_v4_evidence.json" in msg
assert "prev_run_dir" in msg
def test_copy_reuse_artifacts_missing_snapshot_raises_filenotfound(
tmp_path: Path,
):
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
(prev / SNAPSHOT_FILENAME).unlink()
with pytest.raises(FileNotFoundError) as ei:
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
assert SNAPSHOT_FILENAME in str(ei.value)
def test_copy_reuse_artifacts_byte_identical_copy(tmp_path: Path):
"""Bytes must match exactly — copy, not transform."""
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
for fname in _pz2._REUSE_STEP_ARTIFACTS:
assert (
(prev / "steps" / fname).read_bytes()
== (new / "steps" / fname).read_bytes()
)
assert (
(prev / SNAPSHOT_FILENAME).read_bytes()
== (new / SNAPSHOT_FILENAME).read_bytes()
)
# -- _load_and_validate_reuse_snapshot ------------------------------------
def test_load_and_validate_returns_snapshot_dict(tmp_path: Path):
text = _mdx_text()
snap = _build_canonical_snapshot(mdx_source_text=text)
(tmp_path / SNAPSHOT_FILENAME).write_text(
json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8"
)
loaded = _pz2._load_and_validate_reuse_snapshot(
tmp_path, mdx_source_text=text
)
assert loaded["schema_version"] == SNAPSHOT_VERSION
assert loaded["slide_title"]["value"] == "Slide"
def test_load_and_validate_mdx_sha256_mismatch_raises(tmp_path: Path):
"""Snapshot was built for ``text_a`` but caller passes ``text_b``;
u2 validator raises ``SnapshotValidationError`` (subclass of
``ValueError``). u4b translates to exit 2 — here we only assert the
raise."""
text_a = "# Slide A\n"
text_b = "# Slide B (different bytes)\n"
snap = _build_canonical_snapshot(mdx_source_text=text_a)
(tmp_path / SNAPSHOT_FILENAME).write_text(
json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8"
)
with pytest.raises(SnapshotValidationError) as ei:
_pz2._load_and_validate_reuse_snapshot(
tmp_path, mdx_source_text=text_b
)
assert "mdx_sha256 mismatch" in str(ei.value)
def test_load_and_validate_corrupt_json_raises(tmp_path: Path):
(tmp_path / SNAPSHOT_FILENAME).write_text(
"{ not valid json", encoding="utf-8"
)
with pytest.raises(json.JSONDecodeError):
_pz2._load_and_validate_reuse_snapshot(
tmp_path, mdx_source_text=_mdx_text()
)
def test_load_and_validate_missing_snapshot_file_raises(tmp_path: Path):
"""No snapshot at all — bare ``read_text`` raises FileNotFoundError.
u4b translates this to exit 2 with a provenance message."""
with pytest.raises(FileNotFoundError):
_pz2._load_and_validate_reuse_snapshot(
tmp_path, mdx_source_text=_mdx_text()
)
def test_load_and_validate_schema_version_mismatch_raises(tmp_path: Path):
text = _mdx_text()
snap = _build_canonical_snapshot(mdx_source_text=text)
snap["schema_version"] = SNAPSHOT_VERSION + 1 # force mismatch
(tmp_path / SNAPSHOT_FILENAME).write_text(
json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8"
)
with pytest.raises(SnapshotValidationError) as ei:
_pz2._load_and_validate_reuse_snapshot(
tmp_path, mdx_source_text=text
)
assert "schema_version" in str(ei.value)
# -- _rehydrate_mdx_sections_from_snapshot --------------------------------
def test_rehydrate_sections_returns_mdxsection_instances():
snap = _build_canonical_snapshot()
sections = _pz2._rehydrate_mdx_sections_from_snapshot(snap)
assert len(sections) == 1
assert isinstance(sections[0], _pz2.MdxSection)
assert sections[0].section_id == "03-1"
assert sections[0].title == "DX status"
assert sections[0].raw_content == "- bullet one\n- bullet two"
def test_rehydrate_sections_preserves_heading_number_and_aliases():
snap = _build_canonical_snapshot()
sections = _pz2._rehydrate_mdx_sections_from_snapshot(snap)
assert sections[0].heading_number == "3.1"
assert sections[0].v4_alias_keys == ["03-1.1"]
assert sections[0].sub_sections == []
# -- _rehydrate_composition_units_from_snapshot ---------------------------
def test_rehydrate_units_returns_composition_unit_instances():
snap = _build_canonical_snapshot()
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
assert len(units) == 1
assert isinstance(units[0], CompositionUnit)
def test_rehydrate_units_preserves_core_fields():
snap = _build_canonical_snapshot()
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
u = units[0]
assert u.source_section_ids == ["03-1"]
assert u.merge_type == "single"
assert u.frame_template_id == "tpl_a"
assert u.frame_id == "fid_a"
assert u.frame_number == 13
assert u.confidence == pytest.approx(0.91)
assert u.label == "use_as_is"
assert u.phase_z_status == "auto_renderable"
assert u.title == "DX status"
assert u.score == pytest.approx(0.91)
def test_rehydrate_units_preserves_provisional_and_auto_selectable():
snap = _build_canonical_snapshot()
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
assert units[0].provisional is False
assert units[0].auto_selectable is True
assert units[0].filter_reasons == []
assert units[0].notes == ["a note"]
assert units[0].rationale == {"weight": 1.0}
def test_rehydrate_units_v4_candidates_expose_attribute_access():
"""``_apply_frame_override_to_unit`` reads
``cand.template_id`` / ``cand.frame_id`` / etc. off
``unit.v4_candidates`` — restored entries MUST expose attribute
access, not raw dict access."""
snap = _build_canonical_snapshot()
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
cands = units[0].v4_candidates
assert len(cands) == 1
c = cands[0]
assert isinstance(c, _pz2._RehydratedV4Candidate)
assert c.template_id == "tpl_a"
assert c.frame_id == "fid_a"
assert c.frame_number == 13
assert c.confidence == pytest.approx(0.91)
assert c.label == "use_as_is"
def test_rehydrate_units_empty_v4_candidates_yields_empty_list():
snap = _build_canonical_snapshot()
snap["units"]["value"][0]["v4_candidates"] = []
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
assert units[0].v4_candidates == []
# -- _write_reuse_marker --------------------------------------------------
def test_write_reuse_marker_writes_json_with_prev_run_id(tmp_path: Path):
copied = {
"step00_preconditions.json": "steps/step00_preconditions.json",
SNAPSHOT_FILENAME: SNAPSHOT_FILENAME,
}
rv = _pz2._write_reuse_marker(
tmp_path,
prev_run_id="20260524_010101_phase_z2",
copied_artifacts=copied,
)
assert rv == tmp_path / _pz2.REUSE_MARKER_FILENAME
marker = json.loads(rv.read_text(encoding="utf-8"))
assert marker["schema_version"] == _pz2.REUSE_MARKER_SCHEMA_VERSION
assert marker["reuse_from_prev_run_id"] == "20260524_010101_phase_z2"
assert marker["snapshot_filename"] == SNAPSHOT_FILENAME
def test_write_reuse_marker_records_copied_artifacts_and_boundary(
tmp_path: Path,
):
copied = {
fname: f"steps/{fname}" for fname in _pz2._REUSE_STEP_ARTIFACTS
}
copied[SNAPSHOT_FILENAME] = SNAPSHOT_FILENAME
_pz2._write_reuse_marker(
tmp_path,
prev_run_id="20260524_010101_phase_z2",
copied_artifacts=copied,
)
marker = json.loads(
(tmp_path / _pz2.REUSE_MARKER_FILENAME).read_text(encoding="utf-8")
)
assert marker["copied_artifacts"] == copied
assert marker["boundary_steps"] == list(_pz2._REUSE_STEP_ARTIFACTS)
assert marker["resume_at_step"] == 7
# -- module surface anchors -----------------------------------------------
def test_pipeline_exposes_all_u4_helpers():
"""u5 wires these into ``run_phase_z2_mvp1`` — they must remain
module-level callable surface on ``phase_z2_pipeline``."""
for name in (
"_resolve_reuse_from_prev_run_dir",
"_copy_reuse_artifacts_from_prev_run",
"_load_and_validate_reuse_snapshot",
"_rehydrate_mdx_sections_from_snapshot",
"_rehydrate_composition_units_from_snapshot",
"_write_reuse_marker",
"_RehydratedV4Candidate",
"_REUSE_STEP_ARTIFACTS",
"REUSE_MARKER_FILENAME",
"REUSE_MARKER_SCHEMA_VERSION",
):
assert hasattr(_pz2, name), f"u4 surface missing: {name}"
def test_pipeline_run_signature_reuse_from_is_kw_only_optional_none():
"""u5 — ``reuse_from`` is now part of ``run_phase_z2_mvp1``'s public
signature. The kwarg MUST be keyword-only (after the ``*`` barrier),
default to ``None`` (so absent flag preserves the pre-u5 behaviour),
and sit alongside the existing override kwargs. The locked
``until_u5`` regression has flipped — keep this assertion as the
forward-direction lock so future signature drift (e.g. a positional
promotion or a default change) trips loudly."""
import inspect
sig = inspect.signature(_pz2.run_phase_z2_mvp1)
assert "reuse_from" in sig.parameters, (
"u5 must thread reuse_from into run_phase_z2_mvp1 — kwarg missing. "
f"current params: {list(sig.parameters)}"
)
param = sig.parameters["reuse_from"]
assert param.kind is inspect.Parameter.KEYWORD_ONLY, (
f"reuse_from must be keyword-only (after the ``*`` barrier); "
f"got kind={param.kind}"
)
assert param.default is None, (
f"reuse_from must default to None to preserve pre-u5 behaviour; "
f"got default={param.default!r}"
)

View File

@@ -0,0 +1,261 @@
"""IMP-43 (#72) u7b — Opt-in sweep equivalence test for full rerun vs
``--reuse-from`` across 3 layouts × 3 mdx samples × per-baseline frame pins.
u7b scope (per the Stage 2 Exit Report):
* Three mdx samples — ``01.mdx``, ``02.mdx``, ``03.mdx`` (the baseline
full run for each must exit 0 to give step13 equivalence something
to compare; ``04.mdx`` / ``05.mdx`` are deliberately excluded per
the u7a docstring — adapter_needed / EMPTY_SHELL_NO_CONTENT).
* Three ``--override-layout`` axes — ``None`` (auto), ``horizontal-2``,
``vertical-2``. ``None`` exercises the natural layout for that mdx;
the explicit pins exercise the layout-locked branch (Step 7-B
``select_layout_preset`` honors ``--override-layout`` per
``src/phase_z2_pipeline.py:5210``). The reuse path (C) inherits the
locked layout via the Step 6 snapshot ``layout_preset_pre_override``
(u2) — it MUST NOT pass ``--override-layout`` itself (u1 fail-closed
guard at ``src/phase_z2_pipeline.py:8181-8199`` rejects layout
overrides combined with ``--reuse-from``).
* "All 32 frames" coverage axis — each test case discovers ALL pinnable
``(unit_id, frame_template_id)`` pairs from its baseline ``step06_
composition_plan.json`` and uses every pin in (B) and (C). Union of
pins across the 9 (mdx, layout) cases approximates the V4 catalog
coverage; pure Cartesian 3×3×32 = 288 parametrize combos × 3
subprocess runs ≈ 864 pipeline runs is impractical even opt-in.
Three subprocess pipeline runs per case (same shape as u7a):
(A) baseline full run — no frame overrides — reuse seed.
(B) full rerun with the discovered frame overrides — independent
control path that does NOT touch ``--reuse-from``.
(C) ``--reuse-from <seed_id>`` with the same frame overrides — the
reuse path.
Assert: ``step13_render.json`` from (B) and (C) is byte-equal modulo the
Stage 2 whitelist (only ``run_id`` substring inside
``data.final_html_path`` is normalized — see u7a docstring for the full
whitelist rationale).
Opt-in:
* ``@pytest.mark.sweep`` — marker registered in ``pyproject.toml``.
Default CI must run ``pytest -m 'not sweep'``; explicit opt-in is
``pytest -m sweep tests/test_phase_z2_reuse_from_equivalence_sweep.py``.
* If an mdx / layout combo's baseline (A) returns non-zero (e.g., a
layout pin incompatible with the mdx's natural unit_count produces
a pipeline error), the case is skipped — u7b is a reuse-equivalence
test, not a baseline-correctness test (those live elsewhere).
Persisted ``data/user_overrides/<stem>.json`` isolation:
IMP-52 (#80) u2 introduced an MDX-keyed persistence fallback at
``src/phase_z2_pipeline.py:8075-8168`` that merges the on-disk file
into the subprocess overrides regardless of CLI flags. For mdx stems
whose persistence file carries non-frame axes (e.g.,
``data/user_overrides/03.json`` holds ``layout`` + ``zone_geometries``),
two orthogonality problems break u7b:
1. (A) and (B) absorb the persisted ``layout`` / ``zone_geometries``
independent of the ``layout_pin`` parameter, collapsing the test
matrix — the parametrized layout axis stops being a real axis.
2. (C) on the reuse path receives the persisted non-frame axes via
the same merge, which the u1 fail-closed guard at
``src/phase_z2_pipeline.py:8181-8199`` rejects with exit code 2
before step13 equivalence can be measured.
The ``_isolated_persisted_overrides`` context manager renames the
persistence file out of the way for the duration of each parametrized
case (try/finally restore; crash-resistant via a startup recovery
branch). The hidden backup filename starts with ``.`` so
``user_overrides_io.validate_key`` (``src/user_overrides_io.py:72``)
cannot accidentally re-load it mid-run. The pipeline subprocess does
not write the persistence file (writes are gated to the Vite
``/api/user-overrides`` endpoint), so the rename is safe across the
three subprocess spawns. The real-world reuse-from × persistence
interaction (where ``--reuse-from`` should arguably suppress
non-frame persistence injection rather than fail closed) is a
follow-up issue candidate, surfaced in this unit's unit_executed
Gitea comment.
"""
from __future__ import annotations
import contextlib
import json
import os
import subprocess
import sys
import uuid
from pathlib import Path
import pytest
from tests.test_phase_z2_reuse_from_equivalence_unit import (
_assert_run_ok,
_frame_override_args,
_normalize_step13,
_read_step_artifact,
_spawn_pipeline,
)
REPO_ROOT = Path(__file__).resolve().parents[1]
SAMPLES_DIR = REPO_ROOT / "samples" / "mdx_batch"
RUNS_DIR = REPO_ROOT / "data" / "runs"
OVERRIDES_DIR = REPO_ROOT / "data" / "user_overrides"
MDX_FILES = ("01.mdx", "02.mdx", "03.mdx")
LAYOUT_PINS = (None, "horizontal-2", "vertical-2")
def _unique(prefix: str) -> str:
return f"{prefix}_imp43_u7b_{uuid.uuid4().hex[:8]}"
@contextlib.contextmanager
def _isolated_persisted_overrides(mdx_name: str):
"""Temporarily rename ``data/user_overrides/<stem>.json`` so the
three subprocess runs see a clean persistence state.
Rationale: see module docstring "Persisted ... isolation" section.
The pipeline reads the file at
``src/phase_z2_pipeline.py:8098`` via ``load(key)`` which resolves
to ``DEFAULT_OVERRIDES_ROOT`` (``src/user_overrides_io.py:54``);
moving the file out of the way reduces ``load(key) -> {}`` and
prevents the merge from injecting persisted axes.
Crash recovery: a prior run that crashed between rename and
restore would leave ``.<stem>.imp43_u7b_isolation.bak`` next to
the missing ``<stem>.json``. The recovery branch at startup
restores the backup before proceeding so we never lose the
original on a second invocation.
"""
stem = Path(mdx_name).stem
src = OVERRIDES_DIR / f"{stem}.json"
backup = OVERRIDES_DIR / f".{stem}.imp43_u7b_isolation.bak"
if backup.is_file() and not src.is_file():
os.replace(backup, src)
moved = False
if src.is_file():
OVERRIDES_DIR.mkdir(parents=True, exist_ok=True)
os.replace(src, backup)
moved = True
try:
yield
finally:
if moved and backup.is_file():
os.replace(backup, src)
def _discover_all_frame_pins(seed_run_id: str) -> list[tuple[str, str]]:
"""Discover ALL ``(unit_id, frame_template_id)`` pins from baseline plan.
Unlike u7a (capped at 2 for fast CI), u7b uses every pin so the sweep
naturally exercises the union of frame templates produced across the
9 (mdx, layout) cases — the practical realization of the Stage 2
plan's "all 32 frames" axis (full Cartesian 3×3×32 would be 288×3 =
864 pipeline runs; impractical even opt-in).
Schema source: ``src/phase_z2_pipeline.py:5530-5560`` — step06 artifact
emits ``data.selected_units[*].{source_section_ids, frame_template_id}``;
``unit_id = "+".join(source_section_ids)`` per the ``--override-frame``
contract documented at ``src/phase_z2_pipeline.py:7827-7832``.
"""
step06 = _read_step_artifact(seed_run_id, "step06_composition_plan.json")
selected_units = step06.get("data", {}).get("selected_units") or []
pins: list[tuple[str, str]] = []
for u in selected_units:
sids = u.get("source_section_ids") or []
tpl_id = u.get("frame_template_id")
if not isinstance(sids, list) or not sids:
continue
if not isinstance(tpl_id, str) or not tpl_id:
continue
unit_id = "+".join(str(s) for s in sids)
if unit_id:
pins.append((unit_id, tpl_id))
return pins
@pytest.mark.sweep
@pytest.mark.parametrize("layout_pin", LAYOUT_PINS)
@pytest.mark.parametrize("mdx_name", MDX_FILES)
def test_full_rerun_vs_reuse_from_step13_equivalence_sweep(
mdx_name: str, layout_pin: str | None
) -> None:
"""Stage 2 §u7b binding contract: across the (mdx × layout) sweep,
full rerun (B) with discovered frame overrides and ``--reuse-from``
(C) with the same overrides yield byte-equal ``step13_render.json``
modulo the u7a whitelist.
Skip semantics: if baseline (A) fails for a (mdx, layout) combo
(e.g., layout pin incompatible with mdx unit_count), the case is
skipped — baseline correctness is not the equivalence axis under
test here.
"""
mdx_path = SAMPLES_DIR / mdx_name
if not mdx_path.is_file():
pytest.skip(f"sample missing: {mdx_path}")
layout_args: list[str] = (
[] if layout_pin is None else ["--override-layout", layout_pin]
)
# Isolate any persisted ``data/user_overrides/<stem>.json`` for this
# mdx before spawning the three subprocesses; see module docstring
# "Persisted ... isolation" section for the orthogonality and
# fail-closed-guard rationale.
with _isolated_persisted_overrides(mdx_name):
# (A) baseline full run — no frame overrides — reuse seed.
seed_id = _unique("seed")
cp_a = _spawn_pipeline([str(mdx_path), seed_id, *layout_args])
if cp_a.returncode != 0:
pytest.skip(
f"baseline (A) non-zero for mdx={mdx_name} layout={layout_pin} "
f"(returncode={cp_a.returncode}); not a reuse-equivalence axis. "
f"stderr tail: {cp_a.stderr[-400:]}"
)
pins = _discover_all_frame_pins(seed_id)
if not pins:
pytest.skip(
f"no pinnable (unit_id, frame_template_id) pairs in baseline "
f"step06 for mdx={mdx_name} layout={layout_pin}; nothing to "
f"exercise on the override-frame surface"
)
override_args = _frame_override_args(pins)
# (B) full rerun with the discovered frame overrides — independent control.
full_id = _unique("full")
cp_b = _spawn_pipeline([str(mdx_path), full_id, *layout_args, *override_args])
_assert_run_ok(
f"full rerun (B) mdx={mdx_name} layout={layout_pin} pins={len(pins)}",
cp_b,
)
# (C) --reuse-from seed with the same frame overrides — reuse path.
# NOTE: must NOT pass --override-layout here — u1 fail-closed guard
# rejects layout+reuse combination. Layout is restored from the Step 6
# snapshot (u2 layout_preset_pre_override) instead.
reuse_id = _unique("reuse")
cp_c = _spawn_pipeline([
str(mdx_path),
reuse_id,
"--reuse-from", seed_id,
*override_args,
])
_assert_run_ok(
f"reuse rerun (C) mdx={mdx_name} layout={layout_pin} pins={len(pins)}",
cp_c,
)
# Step 13 equivalence — apply whitelist + compare byte-for-byte.
full_step13 = _read_step_artifact(full_id, "step13_render.json")
reuse_step13 = _read_step_artifact(reuse_id, "step13_render.json")
full_norm = _normalize_step13(full_step13, full_id)
reuse_norm = _normalize_step13(reuse_step13, reuse_id)
assert full_norm == reuse_norm, (
f"step13_render.json equivalence violated for IMP-43 #72 u7b "
f"(mdx={mdx_name}, layout={layout_pin}, full={full_id}, "
f"reuse={reuse_id}, seed={seed_id}, pins={pins}):\n"
f"--- full (normalized) ---\n"
f"{json.dumps(full_norm, ensure_ascii=False, indent=2)}\n"
f"--- reuse (normalized) ---\n"
f"{json.dumps(reuse_norm, ensure_ascii=False, indent=2)}"
)

View File

@@ -0,0 +1,204 @@
"""IMP-43 (#72) u7a — Fast CI equivalence test for full rerun vs ``--reuse-from``.
u7a scope (per the Stage 2 Exit Report):
* One mdx (``samples/mdx_batch/02.mdx``), one layout (auto), two
``--override-frame`` pins self-discovered from the baseline's
``step06_composition_plan.json`` (each pin re-states the unit's
own ``frame_template_id`` — semantically a no-op, but it
exercises the full ``--override-frame`` CLI surface through both
paths, satisfying the "two frames" axis of the Stage 2 plan).
* Three subprocess pipeline runs:
(A) baseline full run — no overrides — reuse seed
(B) full rerun with the two ``--override-frame`` pins — the
independent control path that does NOT touch ``--reuse-from``
(C) ``--reuse-from <seed_id>`` with the same two
``--override-frame`` pins — the reuse path
* Assert: ``step13_render.json`` from (B) and (C) is byte-equal modulo
the Stage 2 whitelist — only ``run_id`` (as a substring of
``data.final_html_path``), ``timestamps``, and ``prev_run_id`` may
legitimately differ. ``step13_render.json`` has no timestamps and
no ``prev_run_id`` field (the latter surfaces via the separate
``_reuse_marker.json`` sidecar instead — out of scope for this
step13 equivalence axis), so the only effective normalization
target is the ``run_id`` substring inside ``data.final_html_path``.
Per Stage 2 plan: the sweep equivalence coverage (3 layouts × 3 mdx ×
all 32 frames) lives in u7b under ``pytest.mark.sweep`` — u7a stays
fast (3 pipeline runs on a single small mdx) so it can run in default
CI without an opt-in marker.
Why mdx02:
* ``test_pipeline_smoke_imp85.py::test_non_vp_smoke_runs_clean`` already
pins mdx02 as a non-VP exit-0 path (the baseline (A) run must
exit 0 for the equivalence axis to even have something to
compare against).
* mdx04 / mdx05 are deliberately excluded — mdx04 routes zones to
``adapter_needed`` per IMP-#85 u1 and mdx05 exits 1 with
``EMPTY_SHELL_NO_CONTENT`` per IMP-#87 u3, neither of which gives
a stable step13 equivalence surface for a fast CI lock.
"""
from __future__ import annotations
import json
import subprocess
import sys
import uuid
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[1]
SAMPLES_DIR = REPO_ROOT / "samples" / "mdx_batch"
RUNS_DIR = REPO_ROOT / "data" / "runs"
MDX_FILENAME = "02.mdx"
def _unique_run_id(prefix: str) -> str:
return f"{prefix}_imp43_u7a_{uuid.uuid4().hex[:8]}"
def _spawn_pipeline(extra_args: list[str], timeout: int = 600) -> subprocess.CompletedProcess:
"""Spawn ``python -m src.phase_z2_pipeline <args>`` and capture I/O."""
return subprocess.run(
[sys.executable, "-m", "src.phase_z2_pipeline", *extra_args],
capture_output=True,
text=True,
timeout=timeout,
cwd=str(REPO_ROOT),
)
def _assert_run_ok(label: str, cp: subprocess.CompletedProcess) -> None:
assert cp.returncode == 0, (
f"{label} pipeline returncode={cp.returncode}\n"
f"--- stderr tail ---\n{cp.stderr[-2000:]}\n"
f"--- stdout tail ---\n{cp.stdout[-2000:]}"
)
def _read_step_artifact(run_id: str, fname: str) -> dict:
p = RUNS_DIR / run_id / "phase_z2" / "steps" / fname
assert p.is_file(), f"missing artifact: {p}"
return json.loads(p.read_text(encoding="utf-8"))
def _discover_two_frame_pins(seed_run_id: str) -> list[tuple[str, str]]:
"""Self-discover two ``(unit_id, frame_template_id)`` pins from the
baseline's ``step06_composition_plan.json``.
Schema source: ``src/phase_z2_pipeline.py`` ~L5530-L5560 — the step06
artifact emits ``data.selected_units[*].{source_section_ids,
frame_template_id}``. ``unit_id`` is derived as
``"+".join(source_section_ids)`` per the
``--override-frame UNIT_ID=TEMPLATE_ID`` contract documented at
``src/phase_z2_pipeline.py:7827-7832`` and computed by ``_unit_id``
at ``src/phase_z2_pipeline.py:2328``. Pinning the unit's own
template is a no-op semantically but exercises the
``--override-frame`` CLI surface end-to-end in both (B) and (C).
"""
step06 = _read_step_artifact(seed_run_id, "step06_composition_plan.json")
selected_units = step06.get("data", {}).get("selected_units") or []
pinnable: list[tuple[str, str]] = []
for u in selected_units:
sids = u.get("source_section_ids") or []
tpl_id = u.get("frame_template_id")
if not isinstance(sids, list) or not sids:
continue
if not isinstance(tpl_id, str) or not tpl_id:
continue
unit_id = "+".join(str(s) for s in sids)
if not unit_id:
continue
pinnable.append((unit_id, tpl_id))
if len(pinnable) >= 2:
break
assert len(pinnable) >= 2, (
f"baseline {seed_run_id} step06_composition_plan.json must expose "
f">= 2 (unit_id, frame_template_id) pairs for the u7a two-frames "
f"axis; got {pinnable}"
)
return pinnable
def _frame_override_args(pins: list[tuple[str, str]]) -> list[str]:
out: list[str] = []
for unit_id, tpl_id in pins:
out.extend(["--override-frame", f"{unit_id}={tpl_id}"])
return out
def _normalize_step13(payload: dict, run_id: str) -> dict:
"""Apply the Stage 2 equivalence whitelist to step13_render.json.
Whitelist axes (Stage 2 plan §u7a):
* ``run_id`` — appears only as a substring of
``data.final_html_path`` in the step13 schema
(``src/phase_z2_pipeline.py:7174-7192``).
* ``timestamps`` — ``_write_step_artifact``
(``src/phase_z2_pipeline.py:3826``) does not
stamp a timestamp on the payload, so no
normalization is needed for this axis.
* ``prev_run_id`` — surfaces via ``_reuse_marker.json`` (separate
sidecar), NOT via step13_render.json. No
normalization needed on the step13 surface.
Returns a deep copy of ``payload`` with the ``run_id`` substring of
``data.final_html_path`` replaced by the sentinel ``<RUN_ID>`` so
the (B) and (C) step13 payloads can be compared byte-for-byte.
"""
normalized = json.loads(json.dumps(payload, ensure_ascii=False))
data = normalized.get("data")
if isinstance(data, dict):
fhp = data.get("final_html_path")
if isinstance(fhp, str) and run_id in fhp:
data["final_html_path"] = fhp.replace(run_id, "<RUN_ID>")
return normalized
def test_full_rerun_vs_reuse_from_step13_equivalence_one_mdx_two_frames() -> None:
"""Stage 2 §u7a binding contract: full rerun (B) with two
``--override-frame`` pins and ``--reuse-from`` (C) with the same
pins yield byte-equal ``step13_render.json`` modulo the whitelist.
"""
mdx_path = SAMPLES_DIR / MDX_FILENAME
assert mdx_path.is_file(), f"sample missing: {mdx_path}"
# (A) baseline full run — no overrides — reuse seed.
seed_id = _unique_run_id("seed")
cp_a = _spawn_pipeline([str(mdx_path), seed_id])
_assert_run_ok("baseline (A)", cp_a)
# Self-discover two (unit_id, frame_template_id) pins.
pins = _discover_two_frame_pins(seed_id)
override_args = _frame_override_args(pins)
# (B) full rerun with the two frame overrides — independent control.
full_id = _unique_run_id("full")
cp_b = _spawn_pipeline([str(mdx_path), full_id, *override_args])
_assert_run_ok("full rerun (B)", cp_b)
# (C) --reuse-from seed with the same frame overrides — reuse path.
reuse_id = _unique_run_id("reuse")
cp_c = _spawn_pipeline([
str(mdx_path),
reuse_id,
"--reuse-from", seed_id,
*override_args,
])
_assert_run_ok("reuse rerun (C)", cp_c)
# Step 13 equivalence — apply whitelist + compare byte-for-byte.
full_step13 = _read_step_artifact(full_id, "step13_render.json")
reuse_step13 = _read_step_artifact(reuse_id, "step13_render.json")
full_norm = _normalize_step13(full_step13, full_id)
reuse_norm = _normalize_step13(reuse_step13, reuse_id)
assert full_norm == reuse_norm, (
"step13_render.json equivalence violated for IMP-43 #72 u7a "
f"(full={full_id}, reuse={reuse_id}, seed={seed_id}, pins={pins}):\n"
f"--- full (normalized) ---\n"
f"{json.dumps(full_norm, ensure_ascii=False, indent=2)}\n"
f"--- reuse (normalized) ---\n"
f"{json.dumps(reuse_norm, ensure_ascii=False, indent=2)}"
)

View File

@@ -0,0 +1,748 @@
"""IMP-43 (#72) u4b — fail-closed wrapper tests for ``--reuse-from``.
u4b scope (per the Stage 2 Exit Report):
- Translate the u4 raise surface (``FileNotFoundError`` /
``SnapshotValidationError`` / ``json.JSONDecodeError`` / ``OSError``)
into the CLI fail-closed contract: stderr message + ``sys.exit(2)``.
- Add the ``prev_run_dir == new_run_dir`` accidental-write guard BEFORE
any copy attempt (prev_run_dir must stay read-only).
- Add the missing-prev-run-dir surface (clean axis, not raw stack).
- Surface ``mdx_sha256 mismatch`` as its OWN axis (distinct from
generic snapshot validation failures).
The signature threading + the in-``run_phase_z2_mvp1`` branch that
invokes the wrapper land in u5. u4b adds the wrapper function only.
Tested surface (``src/phase_z2_pipeline.py``):
* ``execute_reuse_from_or_fail_closed``
* ``_abort_reuse_from``
* ``_paths_equivalent``
* ``REUSE_FAIL_CLOSED_AXES`` (closed enum)
"""
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
import pytest
import src.phase_z2_pipeline as _pz2
from src.phase_z2_reuse_snapshot import (
SNAPSHOT_FILENAME,
SNAPSHOT_VERSION,
build_snapshot,
)
# -- synthetic snapshot inputs (mirror u4 test fixture) ------------------
@dataclass
class _Section:
section_id: str
section_num: int
title: str
raw_content: str
heading_number: Optional[str] = None
v4_alias_keys: list = field(default_factory=list)
sub_sections: list = field(default_factory=list)
@dataclass
class _V4Candidate:
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
@dataclass
class _Unit:
source_section_ids: list
merge_type: str
frame_template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
phase_z_status: str
raw_content: str
title: str
score: float
v4_rank: Optional[int] = 1
selection_path: str = "rank_1"
fallback_reason: Optional[str] = None
rationale: dict = field(default_factory=dict)
auto_selectable: bool = True
filter_reasons: list = field(default_factory=list)
notes: list = field(default_factory=list)
v4_candidates: list = field(default_factory=list)
provisional: bool = False
def _mdx_text() -> str:
return "# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n"
def _build_canonical_snapshot(*, mdx_source_text: Optional[str] = None) -> dict:
text = mdx_source_text if mdx_source_text is not None else _mdx_text()
cand = _V4Candidate(
template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
)
section = _Section(
section_id="03-1",
section_num=1,
title="DX status",
raw_content="- bullet one\n- bullet two",
heading_number="3.1",
v4_alias_keys=["03-1.1"],
)
unit = _Unit(
source_section_ids=["03-1"],
merge_type="single",
frame_template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
phase_z_status="auto_renderable",
raw_content="- bullet one\n- bullet two",
title="DX status",
score=0.91,
v4_candidates=[cand],
)
return build_snapshot(
mdx_sha256=hashlib.sha256(text.encode("utf-8")).hexdigest(),
slide_title="Slide",
slide_footer=None,
sections=[section],
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
v4_evidence=[],
layout_preset_pre_override="single",
units=[unit],
comp_debug={},
v4_fallback_traces={},
ai_preflight={"enabled": False, "skipped": True},
)
def _seed_prev_run_dir(prev_run_dir: Path, *, snapshot: dict) -> None:
(prev_run_dir / "steps").mkdir(parents=True, exist_ok=True)
for fname in _pz2._REUSE_STEP_ARTIFACTS:
(prev_run_dir / "steps" / fname).write_text(
f'{{"name": "{fname}"}}'
if fname.endswith(".json")
else "raw mdx body bytes",
encoding="utf-8",
)
(prev_run_dir / SNAPSHOT_FILENAME).write_text(
json.dumps(snapshot, ensure_ascii=False, indent=2),
encoding="utf-8",
)
# -- REUSE_FAIL_CLOSED_AXES vocab lock ------------------------------------
def test_fail_closed_axes_is_closed_enum():
"""The nine axes are the entire fail-closed vocabulary; if a new
axis lands without test coverage update, this lock breaks.
``reuse_copy_os_error`` / ``snapshot_read_os_error`` were added in
the Codex #6 stage_3_edit rewind to cover OSError != FNF that the
earlier u4b implementation let escape as a raw traceback.
"""
assert _pz2.REUSE_FAIL_CLOSED_AXES == frozenset({
"prev_run_dir_missing",
"prev_run_dir_equals_new_run_dir",
"reuse_artifact_missing",
"reuse_copy_os_error",
"snapshot_missing_after_copy",
"snapshot_corrupt_json",
"snapshot_read_os_error",
"mdx_sha256_mismatch",
"snapshot_validation_failed",
})
# -- _abort_reuse_from -----------------------------------------------------
def test_abort_reuse_from_exits_with_code_two(capsys):
with pytest.raises(SystemExit) as ei:
_pz2._abort_reuse_from(
axis="prev_run_dir_missing",
value="never_existed",
path="D:/nope",
upstream="--reuse-from CLI argument",
)
assert ei.value.code == 2
def test_abort_reuse_from_stderr_contains_value_path_upstream(capsys):
with pytest.raises(SystemExit):
_pz2._abort_reuse_from(
axis="prev_run_dir_missing",
value="never_existed",
path="D:/nope",
upstream="--reuse-from CLI argument",
)
err = capsys.readouterr().err
assert "prev_run_dir_missing" in err
assert "value:" in err
assert "path:" in err
assert "upstream:" in err
assert "never_existed" in err
assert "D:/nope" in err
assert "--reuse-from CLI argument" in err
def test_abort_reuse_from_includes_reason_when_exc_passed(capsys):
"""The optional ``exc`` field surfaces the underlying type +
message so operators can distinguish e.g. JSONDecodeError line/col
info from a generic 'snapshot broken'."""
try:
raise ValueError("schema_version mismatch: expected 1, got 99")
except ValueError as exc:
with pytest.raises(SystemExit):
_pz2._abort_reuse_from(
axis="snapshot_validation_failed",
value=str(exc),
path="D:/some/path",
upstream="validate_snapshot",
exc=exc,
)
err = capsys.readouterr().err
assert "reason:" in err
assert "ValueError" in err
assert "schema_version mismatch" in err
def test_abort_reuse_from_rejects_unknown_axis():
"""Unknown axis = programmer error, not user error; must trip
AssertionError, not silently emit a malformed stderr line."""
with pytest.raises(AssertionError):
_pz2._abort_reuse_from(
axis="totally_made_up_axis",
value="x",
path="y",
upstream="z",
)
# -- _paths_equivalent -----------------------------------------------------
def test_paths_equivalent_same_path_returns_true(tmp_path: Path):
a = tmp_path / "x" / "y"
a.mkdir(parents=True)
assert _pz2._paths_equivalent(a, a) is True
def test_paths_equivalent_different_paths_returns_false(tmp_path: Path):
a = tmp_path / "alpha"
b = tmp_path / "beta"
a.mkdir()
b.mkdir()
assert _pz2._paths_equivalent(a, b) is False
def test_paths_equivalent_handles_nonexistent_paths(tmp_path: Path):
"""``Path.resolve(strict=False)`` should still normalize ``..``
even when the leaf does not yet exist (new_run_dir before mkdir)."""
a = tmp_path / "new_run" / "phase_z2"
b = tmp_path / "new_run" / "phase_z2"
assert _pz2._paths_equivalent(a, b) is True
# -- execute_reuse_from_or_fail_closed: happy path -----------------------
def test_happy_path_returns_prev_run_dir_copied_snapshot(
tmp_path: Path, monkeypatch
):
text = _mdx_text()
runs_root = tmp_path / "runs"
prev_run_id = "prev_run_id_001"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
new_run_dir = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot(mdx_source_text=text)
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
rv = _pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=text,
)
prev_dir_ret, copied_ret, snap_ret = rv
assert prev_dir_ret == prev_run_dir
assert SNAPSHOT_FILENAME in copied_ret
assert snap_ret["schema_version"] == SNAPSHOT_VERSION
# snapshot wrapper survives (value/source_path/upstream_step)
assert snap_ret["slide_title"]["value"] == "Slide"
# -- prev_run_dir_missing axis --------------------------------------------
def test_prev_run_dir_missing_aborts(tmp_path: Path, monkeypatch, capsys):
runs_root = tmp_path / "runs"
runs_root.mkdir()
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
with pytest.raises(SystemExit) as ei:
_pz2.execute_reuse_from_or_fail_closed(
reuse_from="does_not_exist_anywhere",
new_run_dir=tmp_path / "new" / "phase_z2",
mdx_source_text=_mdx_text(),
)
assert ei.value.code == 2
err = capsys.readouterr().err
assert "prev_run_dir_missing" in err
assert "does_not_exist_anywhere" in err
# -- prev_run_dir_equals_new_run_dir axis ---------------------------------
def test_prev_run_dir_equals_new_run_dir_aborts(
tmp_path: Path, monkeypatch, capsys
):
"""Accidental collision: if the new run_id resolves to the same
phase_z2 dir as prev_run_id, the copy step would overwrite
prev_run_dir in place. u4b must reject BEFORE the copy attempt."""
runs_root = tmp_path / "runs"
prev_run_id = "shared_run_id"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
# new_run_dir resolves to the SAME phase_z2 dir as prev_run_dir.
new_run_dir = prev_run_dir
with pytest.raises(SystemExit) as ei:
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=_mdx_text(),
)
assert ei.value.code == 2
err = capsys.readouterr().err
assert "prev_run_dir_equals_new_run_dir" in err
def test_prev_run_dir_equals_new_run_dir_does_not_mutate_prev(
tmp_path: Path, monkeypatch
):
"""Critical RO guarantee — the abort must fire BEFORE
``_copy_reuse_artifacts_from_prev_run`` runs, so the seeded prev
artifact bytes survive untouched."""
runs_root = tmp_path / "runs"
prev_run_id = "shared_run_id"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
sentinel_text = '{"name": "step02_normalized.json"}'
target = prev_run_dir / "steps" / "step02_normalized.json"
assert target.read_text(encoding="utf-8") == sentinel_text
with pytest.raises(SystemExit):
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=prev_run_dir,
mdx_source_text=_mdx_text(),
)
# prev_run_dir bytes still intact.
assert target.read_text(encoding="utf-8") == sentinel_text
# -- reuse_artifact_missing axis ------------------------------------------
def test_reuse_artifact_missing_aborts(tmp_path: Path, monkeypatch, capsys):
runs_root = tmp_path / "runs"
prev_run_id = "prev_run_001"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
new_run_dir = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
# Remove one required step file → triggers FileNotFoundError in
# _copy_reuse_artifacts_from_prev_run.
(prev_run_dir / "steps" / "step05_v4_evidence.json").unlink()
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
with pytest.raises(SystemExit) as ei:
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=_mdx_text(),
)
assert ei.value.code == 2
err = capsys.readouterr().err
assert "reuse_artifact_missing" in err
assert "step05_v4_evidence.json" in err
assert "reason:" in err
assert "FileNotFoundError" in err
def test_reuse_artifact_missing_snapshot_sidecar(
tmp_path: Path, monkeypatch, capsys
):
runs_root = tmp_path / "runs"
prev_run_id = "prev_run_002"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
new_run_dir = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
(prev_run_dir / SNAPSHOT_FILENAME).unlink()
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
with pytest.raises(SystemExit) as ei:
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=_mdx_text(),
)
assert ei.value.code == 2
err = capsys.readouterr().err
assert "reuse_artifact_missing" in err
assert SNAPSHOT_FILENAME in err
# -- snapshot_corrupt_json axis -------------------------------------------
def test_snapshot_corrupt_json_aborts(tmp_path: Path, monkeypatch, capsys):
runs_root = tmp_path / "runs"
prev_run_id = "prev_run_corrupt"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
new_run_dir = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
# Overwrite the snapshot with invalid JSON; copy will succeed,
# validate_snapshot will fail with JSONDecodeError (raised inside
# _load_and_validate_reuse_snapshot before validate_snapshot).
(prev_run_dir / SNAPSHOT_FILENAME).write_text(
"{ not valid json", encoding="utf-8"
)
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
with pytest.raises(SystemExit) as ei:
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=_mdx_text(),
)
assert ei.value.code == 2
err = capsys.readouterr().err
assert "snapshot_corrupt_json" in err
assert SNAPSHOT_FILENAME in err
assert "JSONDecodeError" in err
# -- mdx_sha256_mismatch axis (own surface) -------------------------------
def test_mdx_sha256_mismatch_aborts_with_own_axis(
tmp_path: Path, monkeypatch, capsys
):
"""Distinct from generic snapshot_validation_failed — operator
must be able to tell 'wrong --mdx-path for this prev_run_id' apart
from 'snapshot file is broken'."""
runs_root = tmp_path / "runs"
prev_run_id = "prev_run_diff_mdx"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
new_run_dir = tmp_path / "new" / "phase_z2"
text_a = "# Slide A\n"
text_b = "# Slide B (different bytes)\n"
snap = _build_canonical_snapshot(mdx_source_text=text_a)
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
with pytest.raises(SystemExit) as ei:
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=text_b,
)
assert ei.value.code == 2
err = capsys.readouterr().err
assert "mdx_sha256_mismatch" in err
# Must NOT be reported as generic snapshot_validation_failed —
# the mdx-sha case has its own axis.
assert "snapshot_validation_failed" not in err
assert "mdx_source_text" in err or "mdx_sha256" in err
# -- snapshot_validation_failed axis --------------------------------------
def test_snapshot_validation_failed_schema_version_aborts(
tmp_path: Path, monkeypatch, capsys
):
runs_root = tmp_path / "runs"
prev_run_id = "prev_run_schema_mismatch"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
new_run_dir = tmp_path / "new" / "phase_z2"
text = _mdx_text()
snap = _build_canonical_snapshot(mdx_source_text=text)
snap["schema_version"] = SNAPSHOT_VERSION + 1
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
with pytest.raises(SystemExit) as ei:
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=text,
)
assert ei.value.code == 2
err = capsys.readouterr().err
assert "snapshot_validation_failed" in err
assert "schema_version" in err
# NOT the mdx-sha axis — separate fingerprint.
assert "mdx_sha256_mismatch" not in err
def test_snapshot_validation_failed_missing_required_key_aborts(
tmp_path: Path, monkeypatch, capsys
):
runs_root = tmp_path / "runs"
prev_run_id = "prev_run_missing_key"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
new_run_dir = tmp_path / "new" / "phase_z2"
text = _mdx_text()
snap = _build_canonical_snapshot(mdx_source_text=text)
del snap["units"]
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
with pytest.raises(SystemExit) as ei:
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=text,
)
assert ei.value.code == 2
err = capsys.readouterr().err
assert "snapshot_validation_failed" in err
assert "units" in err
# -- reuse_copy_os_error axis (OSError != FileNotFoundError) -------------
def test_copy_os_error_aborts_with_own_axis(
tmp_path: Path, monkeypatch, capsys
):
"""Codex #6 stage_3_edit fixup — OSError raised inside
``_copy_reuse_artifacts_from_prev_run`` (e.g. PermissionError on
the destination, OSError(errno.EXDEV) on cross-device copy) must
translate to fail-closed (stderr + SystemExit(2)) instead of
escaping as a raw traceback.
Implementation must catch ``FileNotFoundError`` BEFORE the bare
``OSError`` handler (FNF is a subclass of OSError), otherwise the
missing-artifact case would be mis-bucketed under
``reuse_copy_os_error``.
"""
runs_root = tmp_path / "runs"
prev_run_id = "prev_run_perm_denied"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
new_run_dir = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
def _raise_perm(src, dst, *args, **kwargs):
raise PermissionError(f"simulated permission denied: {dst}")
monkeypatch.setattr(_pz2.shutil, "copyfile", _raise_perm)
with pytest.raises(SystemExit) as ei:
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=_mdx_text(),
)
assert ei.value.code == 2
err = capsys.readouterr().err
assert "reuse_copy_os_error" in err
assert "value:" in err
assert "path:" in err
assert "upstream:" in err
assert "reason:" in err
assert "PermissionError" in err
assert "simulated permission denied" in err
# Must NOT be mis-bucketed as the missing-artifact case.
assert "reuse_artifact_missing" not in err
def test_copy_filenotfounderror_still_uses_artifact_missing_axis(
tmp_path: Path, monkeypatch, capsys
):
"""Subclass ordering regression guard — ``FileNotFoundError`` IS an
``OSError`` subclass. If the bare-OSError handler ever moves above
the FNF handler, the missing-artifact case would be mis-bucketed
under ``reuse_copy_os_error``; this test pins the dispatch.
"""
runs_root = tmp_path / "runs"
prev_run_id = "prev_run_fnf_ordering"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
new_run_dir = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
(prev_run_dir / "steps" / "step05_v4_evidence.json").unlink()
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
with pytest.raises(SystemExit):
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=_mdx_text(),
)
err = capsys.readouterr().err
assert "reuse_artifact_missing" in err
assert "reuse_copy_os_error" not in err
# -- snapshot_read_os_error axis (OSError != FileNotFoundError) ----------
def test_snapshot_read_os_error_aborts_with_own_axis(
tmp_path: Path, monkeypatch, capsys
):
"""OSError raised inside ``_load_and_validate_reuse_snapshot``
(e.g. PermissionError on ``Path.read_text``, IsADirectoryError if
the snapshot path resolves to a directory after copy) must
translate to fail-closed instead of escaping as a raw traceback.
"""
runs_root = tmp_path / "runs"
prev_run_id = "prev_run_snapshot_perm"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
new_run_dir = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
def _raise_perm(*args, **kwargs):
raise PermissionError("simulated read denied on snapshot")
monkeypatch.setattr(
_pz2, "_load_and_validate_reuse_snapshot", _raise_perm
)
with pytest.raises(SystemExit) as ei:
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=_mdx_text(),
)
assert ei.value.code == 2
err = capsys.readouterr().err
assert "snapshot_read_os_error" in err
assert "value:" in err
assert "path:" in err
assert "upstream:" in err
assert "reason:" in err
assert "PermissionError" in err
assert "simulated read denied on snapshot" in err
# Must NOT be mis-bucketed as missing-after-copy or corrupt-json.
assert "snapshot_missing_after_copy" not in err
assert "snapshot_corrupt_json" not in err
def test_snapshot_filenotfounderror_still_uses_missing_after_copy_axis(
tmp_path: Path, monkeypatch, capsys
):
"""Subclass ordering regression guard for the load surface — FNF
must keep its own ``snapshot_missing_after_copy`` axis even though
the new bare-OSError branch sits below it.
"""
runs_root = tmp_path / "runs"
prev_run_id = "prev_run_load_fnf_ordering"
prev_run_dir = runs_root / prev_run_id / "phase_z2"
new_run_dir = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
def _raise_fnf(*args, **kwargs):
raise FileNotFoundError("simulated FNF on snapshot read")
monkeypatch.setattr(
_pz2, "_load_and_validate_reuse_snapshot", _raise_fnf
)
with pytest.raises(SystemExit):
_pz2.execute_reuse_from_or_fail_closed(
reuse_from=prev_run_id,
new_run_dir=new_run_dir,
mdx_source_text=_mdx_text(),
)
err = capsys.readouterr().err
assert "snapshot_missing_after_copy" in err
assert "snapshot_read_os_error" not in err
# -- module surface anchor ------------------------------------------------
def test_pipeline_exposes_u4b_surface():
"""u5 wires ``execute_reuse_from_or_fail_closed`` into the entry
point — the public callable + the closed-axis vocabulary must
remain module-level attributes."""
for name in (
"execute_reuse_from_or_fail_closed",
"_abort_reuse_from",
"_paths_equivalent",
"REUSE_FAIL_CLOSED_AXES",
):
assert hasattr(_pz2, name), f"u4b surface missing: {name}"
def test_pipeline_run_signature_reuse_from_threaded_after_u5():
"""u5 has now threaded ``reuse_from`` into ``run_phase_z2_mvp1`` as
a keyword-only parameter with default ``None``. The previous
``until_u5`` lock has flipped — this forward-direction lock
ensures the kwarg never silently drifts (positional promotion,
default change to a string, kind change). Mirror of the
equivalent lock in test_phase_z2_reuse_from_entry.py and
test_phase_z2_cli_reuse_from.py — kept in this file too so the
fail-closed regression suite is self-contained."""
import inspect
sig = inspect.signature(_pz2.run_phase_z2_mvp1)
assert "reuse_from" in sig.parameters, (
"u5 must thread reuse_from into run_phase_z2_mvp1 — kwarg missing. "
f"current params: {list(sig.parameters)}"
)
param = sig.parameters["reuse_from"]
assert param.kind is inspect.Parameter.KEYWORD_ONLY, (
f"reuse_from must be keyword-only (after the ``*`` barrier); "
f"got kind={param.kind}"
)
assert param.default is None, (
f"reuse_from must default to None to preserve pre-u5 behaviour; "
f"got default={param.default!r}"
)

View File

@@ -0,0 +1,493 @@
"""IMP-43 (#72) u2 — unit tests for ``src.phase_z2_reuse_snapshot``.
Scope mirror of the production module (Stage 2 u2):
* ``build_snapshot`` shape, provenance, JSON round-trip, required keys.
* ``serialize_section`` / ``serialize_unit`` field preservation, including
the duck-typed ``v4_candidates`` shape (template_id / frame_id /
frame_number / confidence / label).
* ``validate_snapshot`` fail-closed paths: non-dict input, schema
version mismatch, missing/empty/non-string ``mdx_sha256``, sha
mismatch, missing required keys, unwrapped wrapper, wrapper missing
a provenance field.
* Module-level constants exposed for u3 / u4 / u4b consumers.
The tests use synthetic duck-typed dataclasses so the snapshot module's
external surface is exercised without coupling to the production
``MdxSection`` / ``CompositionUnit`` / ``V4Match`` dataclass layouts.
That mirrors the production module's intentional duck-typing (no
imports from ``phase_z2_pipeline`` / ``phase_z2_composition``).
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from typing import Any, Optional
import pytest
from src.phase_z2_reuse_snapshot import (
REQUIRED_TOP_LEVEL_KEYS,
SNAPSHOT_FILENAME,
SNAPSHOT_VERSION,
SnapshotValidationError,
build_snapshot,
serialize_section,
serialize_unit,
validate_snapshot,
)
# -- synthetic duck-typed inputs ------------------------------------------
@dataclass
class _Section:
section_id: str
section_num: int
title: str
raw_content: str
heading_number: Optional[str] = None
v4_alias_keys: list = field(default_factory=list)
sub_sections: list = field(default_factory=list)
@dataclass
class _V4Candidate:
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
v4_rank: Optional[int] = None
@dataclass
class _Unit:
source_section_ids: list
merge_type: str
frame_template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
phase_z_status: str
raw_content: str
title: str
score: float
v4_rank: Optional[int] = 1
selection_path: str = "rank_1"
fallback_reason: Optional[str] = None
rationale: dict = field(default_factory=dict)
auto_selectable: bool = True
filter_reasons: list = field(default_factory=list)
notes: list = field(default_factory=list)
v4_candidates: list = field(default_factory=list)
provisional: bool = False
def _make_section(**overrides: Any) -> _Section:
base = dict(
section_id="03-1",
section_num=1,
title="DX status",
raw_content="- bullet one\n- bullet two",
)
base.update(overrides)
return _Section(**base)
def _make_unit(**overrides: Any) -> _Unit:
cand = _V4Candidate(
template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
)
base: dict[str, Any] = dict(
source_section_ids=["03-1"],
merge_type="single",
frame_template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
phase_z_status="auto_renderable",
raw_content="- bullet one\n- bullet two",
title="DX status",
score=0.91,
v4_candidates=[cand],
)
base.update(overrides)
return _Unit(**base)
def _make_build_kwargs(**overrides: Any) -> dict[str, Any]:
kwargs: dict[str, Any] = dict(
mdx_sha256="a" * 64,
slide_title="Title",
slide_footer="Footer",
sections=[_make_section()],
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
v4_evidence=[{"section_id": "03-1", "v4_candidates": []}],
layout_preset_pre_override="horizontal-2",
units=[_make_unit()],
comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
ai_preflight={"enabled": False, "skipped": True},
)
kwargs.update(overrides)
return kwargs
# -- module constants -----------------------------------------------------
def test_snapshot_filename_constant():
assert SNAPSHOT_FILENAME == "_reuse_snapshot.json"
def test_snapshot_version_is_positive_int():
assert isinstance(SNAPSHOT_VERSION, int)
assert SNAPSHOT_VERSION >= 1
def test_required_keys_include_contract_and_payload():
# Bare contract / integrity keys.
assert "schema_version" in REQUIRED_TOP_LEVEL_KEYS
assert "mdx_sha256" in REQUIRED_TOP_LEVEL_KEYS
# Payload axes per Stage 2 plan.
for k in (
"slide_title",
"slide_footer",
"sections",
"stage0_adapter_diagnostics",
"stage0_normalized_assets",
"v4_evidence",
"layout_preset_pre_override",
"units",
"comp_debug",
"v4_fallback_traces",
"ai_preflight",
):
assert k in REQUIRED_TOP_LEVEL_KEYS, f"missing from REQUIRED_TOP_LEVEL_KEYS: {k}"
# -- build_snapshot -------------------------------------------------------
def test_build_snapshot_round_trips_through_json():
snap = build_snapshot(**_make_build_kwargs())
payload = json.dumps(snap)
loaded = json.loads(payload)
assert loaded["schema_version"] == SNAPSHOT_VERSION
assert loaded["mdx_sha256"] == "a" * 64
def test_build_snapshot_has_all_required_keys():
snap = build_snapshot(**_make_build_kwargs())
for key in REQUIRED_TOP_LEVEL_KEYS:
assert key in snap, f"build_snapshot missing required key: {key}"
def test_build_snapshot_bare_keys_are_unwrapped_scalars():
snap = build_snapshot(**_make_build_kwargs())
assert snap["schema_version"] == SNAPSHOT_VERSION
assert snap["mdx_sha256"] == "a" * 64
# bare keys MUST NOT be wrapped — u4b mdx_sha256 check reads directly.
assert not isinstance(snap["schema_version"], dict)
assert not isinstance(snap["mdx_sha256"], dict)
def test_build_snapshot_provenance_wrapper_shape():
snap = build_snapshot(**_make_build_kwargs())
bare = {"schema_version", "mdx_sha256"}
for key, entry in snap.items():
if key in bare:
continue
assert isinstance(entry, dict), f"{key} is not wrapped"
assert set(entry.keys()) == {"value", "source_path", "upstream_step"}, key
assert isinstance(entry["source_path"], str) and entry["source_path"]
assert isinstance(entry["upstream_step"], str)
assert entry["upstream_step"].startswith("step"), entry["upstream_step"]
def test_build_snapshot_upstream_steps_stay_inside_reuse_boundary():
"""No ``upstream_step`` may point outside the Step 0/2/5/6 reuse
boundary (Stage 1 root_cause). A drift to e.g. ``step09`` would
silently invite work outside the reuse window — fail loudly.
Step 01's contribution is the ``mdx_sha256`` integrity key (a bare
contract scalar with no wrapper) so step01 does not need to appear
in payload provenance.
"""
snap = build_snapshot(**_make_build_kwargs())
allowed = {"step00", "step02", "step05", "step06"}
for key, entry in snap.items():
if key in {"schema_version", "mdx_sha256"}:
continue
assert entry["upstream_step"] in allowed, (
f"key {key!r}: upstream_step {entry['upstream_step']!r} outside reuse boundary"
)
def test_build_snapshot_units_carry_v4_candidates():
snap = build_snapshot(**_make_build_kwargs())
units = snap["units"]["value"]
assert len(units) == 1
assert units[0]["v4_candidates"][0]["template_id"] == "tpl_a"
assert units[0]["v4_candidates"][0]["frame_number"] == 13
assert units[0]["v4_candidates"][0]["confidence"] == pytest.approx(0.91)
def test_build_snapshot_sections_preserve_alias_keys_and_subsections():
sec = _make_section(
section_id="04-2",
v4_alias_keys=["04-2.1"],
sub_sections=[{"id": "04-2-sub-1"}],
heading_number="2.1",
)
snap = build_snapshot(**_make_build_kwargs(sections=[sec]))
payload = snap["sections"]["value"]
assert payload[0]["section_id"] == "04-2"
assert payload[0]["v4_alias_keys"] == ["04-2.1"]
assert payload[0]["sub_sections"] == [{"id": "04-2-sub-1"}]
assert payload[0]["heading_number"] == "2.1"
def test_build_snapshot_units_provenance_points_at_step06():
snap = build_snapshot(**_make_build_kwargs())
assert "step06_composition_plan.json" in snap["units"]["source_path"]
assert snap["units"]["upstream_step"] == "step06"
def test_build_snapshot_v4_evidence_provenance_points_at_step05():
snap = build_snapshot(**_make_build_kwargs())
assert "step05_v4_evidence.json" in snap["v4_evidence"]["source_path"]
assert snap["v4_evidence"]["upstream_step"] == "step05"
def test_build_snapshot_ai_preflight_provenance_points_at_step00():
snap = build_snapshot(**_make_build_kwargs())
assert "step00_preconditions.json" in snap["ai_preflight"]["source_path"]
assert snap["ai_preflight"]["upstream_step"] == "step00"
def test_build_snapshot_rejects_unjsonable_input():
bad_unit = _make_unit()
bad_unit.notes.append(object()) # not JSON-safe
with pytest.raises(TypeError):
build_snapshot(**_make_build_kwargs(units=[bad_unit]))
def test_build_snapshot_handles_none_optional_fields():
snap = build_snapshot(
**_make_build_kwargs(
slide_title=None,
slide_footer=None,
stage0_adapter_diagnostics=None,
stage0_normalized_assets=None,
comp_debug=None,
v4_fallback_traces=None,
ai_preflight=None,
)
)
# None inputs land as None / {} consistently — never raise.
assert snap["slide_title"]["value"] is None
assert snap["slide_footer"]["value"] is None
assert snap["stage0_adapter_diagnostics"]["value"] == {}
assert snap["stage0_normalized_assets"]["value"] == {}
assert snap["comp_debug"]["value"] == {}
assert snap["v4_fallback_traces"]["value"] == {}
assert snap["ai_preflight"]["value"] == {}
# -- serializer helpers ---------------------------------------------------
def test_serialize_section_preserves_all_documented_fields():
sec = _make_section(
heading_number="1.1",
v4_alias_keys=["03-1.x"],
sub_sections=[{"id": "s"}],
)
out = serialize_section(sec)
assert out["section_id"] == "03-1"
assert out["section_num"] == 1
assert out["title"] == "DX status"
assert out["raw_content"].startswith("- bullet")
assert out["heading_number"] == "1.1"
assert out["v4_alias_keys"] == ["03-1.x"]
assert out["sub_sections"] == [{"id": "s"}]
def test_serialize_section_works_with_missing_optional_attrs():
class _Minimal:
section_id = "x"
section_num = 0
title = "t"
raw_content = "r"
out = serialize_section(_Minimal())
assert out["heading_number"] is None
assert out["v4_alias_keys"] == []
assert out["sub_sections"] == []
def test_serialize_unit_v4_candidates_unwrap_to_named_attrs():
unit = _make_unit()
out = serialize_unit(unit)
cand = out["v4_candidates"][0]
assert cand == {
"template_id": "tpl_a",
"frame_id": "fid_a",
"frame_number": 13,
"confidence": pytest.approx(0.91),
"label": "use_as_is",
# u4 follow-up — Step 9 application-plan payload reads
# ``c.v4_rank`` off each rehydrated candidate. Snapshot
# serializer persists it via ``getattr(c, 'v4_rank', None)`` so
# legacy duck types (no v4_rank attr) get None and modern V4Match
# instances carry their rank (1/2/3/...).
"v4_rank": None,
}
def test_serialize_unit_v4_candidates_persist_v4_rank_when_present():
"""A v4_candidate with v4_rank=2 (V4Match-shape duck type) round-trips."""
ranked_cand = _V4Candidate(
template_id="tpl_b",
frame_id="fid_b",
frame_number=14,
confidence=0.82,
label="light_edit",
v4_rank=2,
)
unit = _make_unit(v4_candidates=[ranked_cand])
out = serialize_unit(unit)
assert out["v4_candidates"][0]["v4_rank"] == 2
def test_serialize_unit_handles_empty_v4_candidates():
unit = _make_unit(v4_candidates=[])
out = serialize_unit(unit)
assert out["v4_candidates"] == []
def test_serialize_unit_provisional_default_false():
unit = _make_unit()
assert serialize_unit(unit)["provisional"] is False
def test_serialize_unit_provisional_true_preserved():
unit = _make_unit(provisional=True)
assert serialize_unit(unit)["provisional"] is True
def test_serialize_unit_round_trips_through_json():
out = serialize_unit(_make_unit())
reloaded = json.loads(json.dumps(out))
assert reloaded["source_section_ids"] == ["03-1"]
assert reloaded["frame_template_id"] == "tpl_a"
# -- validate_snapshot ----------------------------------------------------
def test_validate_snapshot_accepts_well_formed():
snap = build_snapshot(**_make_build_kwargs())
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
def test_validate_snapshot_rejects_non_dict_input():
with pytest.raises(SnapshotValidationError):
validate_snapshot("not a dict", expected_mdx_sha256="a" * 64)
def test_validate_snapshot_rejects_version_mismatch():
snap = build_snapshot(**_make_build_kwargs())
snap["schema_version"] = SNAPSHOT_VERSION + 999
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "schema_version" in str(exc.value)
def test_validate_snapshot_rejects_missing_sha():
snap = build_snapshot(**_make_build_kwargs())
del snap["mdx_sha256"]
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "mdx_sha256" in str(exc.value)
def test_validate_snapshot_rejects_empty_sha():
snap = build_snapshot(**_make_build_kwargs())
snap["mdx_sha256"] = ""
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "mdx_sha256" in str(exc.value)
def test_validate_snapshot_rejects_non_string_sha():
snap = build_snapshot(**_make_build_kwargs())
snap["mdx_sha256"] = 12345
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "mdx_sha256" in str(exc.value)
def test_validate_snapshot_rejects_sha_mismatch():
snap = build_snapshot(**_make_build_kwargs())
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="b" * 64)
assert "mdx_sha256 mismatch" in str(exc.value)
def test_validate_snapshot_rejects_missing_required_key():
snap = build_snapshot(**_make_build_kwargs())
del snap["units"]
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "units" in str(exc.value)
def test_validate_snapshot_rejects_unwrapped_payload_key():
snap = build_snapshot(**_make_build_kwargs())
snap["units"] = "not a dict"
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "units" in str(exc.value)
def test_validate_snapshot_rejects_wrapper_missing_value():
snap = build_snapshot(**_make_build_kwargs())
snap["units"] = {"source_path": "x", "upstream_step": "step06"}
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "value" in str(exc.value)
def test_validate_snapshot_rejects_wrapper_missing_source_path():
snap = build_snapshot(**_make_build_kwargs())
snap["units"] = {"value": [], "upstream_step": "step06"}
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "source_path" in str(exc.value)
def test_validate_snapshot_rejects_wrapper_missing_upstream_step():
snap = build_snapshot(**_make_build_kwargs())
snap["units"] = {"value": [], "source_path": "x"}
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "upstream_step" in str(exc.value)
def test_validate_snapshot_error_subclasses_value_error():
snap = build_snapshot(**_make_build_kwargs())
snap["schema_version"] = 999
# u4b will pre-catch SnapshotValidationError, but the broader
# `except ValueError` net must still pick this up.
with pytest.raises(ValueError):
validate_snapshot(snap, expected_mdx_sha256="a" * 64)

View File

@@ -0,0 +1,282 @@
"""IMP-43 (#72) u3 — focused tests for the Step 6 reuse snapshot writer.
u3 scope (per the Stage 2 Exit Report):
- ``_write_reuse_snapshot`` writes ``run_dir/_reuse_snapshot.json`` *after*
the Step 6 artifact lands; failure WARNS and CONTINUES (the helper does
NOT raise out of the main pipeline run).
- The Step 6 artifact data dict records the run_dir-relative sidecar path
as ``data.reuse_snapshot_path`` (additive informational field, always
set to ``SNAPSHOT_FILENAME`` regardless of write success — u4 will
fail-closed on missing / invalid sidecar via u2's ``validate_snapshot``).
The helper is tested in isolation (no full pipeline run) — pipeline call
site presence is asserted structurally so we exercise behaviour without
re-running Step 0~6 inside the test process. End-to-end equivalence under
``--reuse-from`` is u7a / u7b scope.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
import pytest
import src.phase_z2_pipeline as _pz2
from src.phase_z2_reuse_snapshot import (
SNAPSHOT_FILENAME,
SNAPSHOT_VERSION,
SnapshotValidationError,
validate_snapshot,
)
# -- synthetic duck-typed inputs ------------------------------------------
@dataclass
class _Section:
section_id: str
section_num: int
title: str
raw_content: str
heading_number: Optional[str] = None
v4_alias_keys: list = field(default_factory=list)
sub_sections: list = field(default_factory=list)
@dataclass
class _V4Candidate:
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
@dataclass
class _Unit:
source_section_ids: list
merge_type: str
frame_template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
phase_z_status: str
raw_content: str
title: str
score: float
v4_rank: Optional[int] = 1
selection_path: str = "rank_1"
fallback_reason: Optional[str] = None
rationale: dict = field(default_factory=dict)
auto_selectable: bool = True
filter_reasons: list = field(default_factory=list)
notes: list = field(default_factory=list)
v4_candidates: list = field(default_factory=list)
provisional: bool = False
def _make_kwargs(**overrides: Any) -> dict[str, Any]:
cand = _V4Candidate(
template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
)
section = _Section(
section_id="03-1",
section_num=1,
title="DX status",
raw_content="- bullet one\n- bullet two",
)
unit = _Unit(
source_section_ids=["03-1"],
merge_type="single",
frame_template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
phase_z_status="auto_renderable",
raw_content="- bullet one\n- bullet two",
title="DX status",
score=0.91,
v4_candidates=[cand],
)
kwargs: dict[str, Any] = dict(
mdx_source_text="# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n",
slide_title="Slide",
slide_footer=None,
sections=[section],
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
v4_evidence=[
{
"section_id": "03-1",
"v4_candidates": [
{
"template_id": "tpl_a",
"frame_id": "fid_a",
"frame_number": 13,
"confidence": 0.91,
"label": "use_as_is",
}
],
"candidate_status": "ok",
}
],
layout_preset_pre_override="single",
units=[unit],
comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
ai_preflight={"enabled": False, "skipped": True},
)
kwargs.update(overrides)
return kwargs
# -- success path ---------------------------------------------------------
def test_writes_snapshot_file_at_run_dir_root(tmp_path: Path):
rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
assert rv == SNAPSHOT_FILENAME
fpath = tmp_path / SNAPSHOT_FILENAME
assert fpath.exists(), f"snapshot not written at {fpath}"
def test_written_snapshot_validates(tmp_path: Path):
kwargs = _make_kwargs()
rv = _pz2._write_reuse_snapshot(tmp_path, **kwargs)
assert rv == SNAPSHOT_FILENAME
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
# mdx_sha256 is derived from mdx_source_text — recompute to verify
# the helper is hashing the UTF-8 bytes of the same source we passed.
import hashlib as _hl
expected_sha = _hl.sha256(
kwargs["mdx_source_text"].encode("utf-8")
).hexdigest()
validate_snapshot(snap, expected_mdx_sha256=expected_sha)
def test_snapshot_has_correct_schema_version(tmp_path: Path):
_pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
assert snap["schema_version"] == SNAPSHOT_VERSION
def test_snapshot_records_layout_preset_pre_override(tmp_path: Path):
_pz2._write_reuse_snapshot(
tmp_path, **_make_kwargs(layout_preset_pre_override="horizontal-2")
)
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
assert snap["layout_preset_pre_override"]["value"] == "horizontal-2"
def test_snapshot_is_utf8_encoded_with_non_ascii_content(tmp_path: Path):
_pz2._write_reuse_snapshot(
tmp_path,
**_make_kwargs(
slide_title="설계 방식의 왜곡",
mdx_source_text="# 설계 방식\n\n- 한글 bullet\n",
),
)
# ensure_ascii=False is intentional so Korean text round-trips
# readable; if a future refactor drops it the bytes change but the
# JSON still parses — we assert the file is decodable AS utf-8 and
# the value survives the round trip.
raw = (tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8")
snap = json.loads(raw)
assert snap["slide_title"]["value"] == "설계 방식의 왜곡"
# -- failure path ---------------------------------------------------------
def test_failure_warns_and_returns_none(tmp_path: Path, monkeypatch, capsys):
"""When ``build_snapshot`` raises, the helper must NOT propagate the
exception — it WARNS on stderr and returns ``None`` so the main
pipeline run continues."""
def _boom(**_kwargs):
raise RuntimeError("synthetic build failure")
monkeypatch.setattr(_pz2, "build_snapshot", _boom)
rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
assert rv is None
captured = capsys.readouterr()
assert "reuse-snapshot" in captured.err
assert "WARN" in captured.err
assert "RuntimeError" in captured.err
# File MUST NOT exist on failure (no partial JSON on disk).
assert not (tmp_path / SNAPSHOT_FILENAME).exists()
def test_failure_on_unwritable_run_dir_warns_and_returns_none(
tmp_path: Path, monkeypatch, capsys
):
"""Simulate disk write failure: helper warns + returns None, never
raises out to the caller (Stage 2 guardrail: optional sidecar)."""
nonexistent = tmp_path / "does" / "not" / "exist"
# nonexistent.exists() is False — Path.write_text raises FileNotFoundError.
rv = _pz2._write_reuse_snapshot(nonexistent, **_make_kwargs())
assert rv is None
captured = capsys.readouterr()
assert "reuse-snapshot" in captured.err
assert "WARN" in captured.err
# FileNotFoundError specifically — sanity-check the type surfaces in
# the warning so debugging is not blind.
assert "FileNotFoundError" in captured.err
# -- pipeline integration anchors -----------------------------------------
def test_pipeline_imports_helper_and_constant():
"""The pipeline module must expose the helper for the post-Step-6
call site, and the constant must round-trip from the snapshot
module (single source of truth)."""
assert hasattr(_pz2, "_write_reuse_snapshot")
assert callable(_pz2._write_reuse_snapshot)
assert _pz2.SNAPSHOT_FILENAME == "_reuse_snapshot.json"
def test_pipeline_call_site_follows_step06_artifact_write():
"""Structural guard: the helper must be invoked AFTER the Step 6
artifact write in ``run_phase_z2_mvp1`` so the sidecar lands next
to ``steps/step06_composition_plan.json`` (Stage 2 spec)."""
source = Path(_pz2.__file__).read_text(encoding="utf-8")
# Locate the step06 artifact write call site by its locked name arg.
step06_marker = '6, "composition_plan"'
idx_step06 = source.find(step06_marker)
assert idx_step06 != -1, "step06 artifact write call site missing"
# The helper call must appear AFTER the step06 marker.
idx_helper = source.find("_write_reuse_snapshot(", idx_step06)
assert idx_helper != -1, "u3 helper call missing after step06 write"
def test_pipeline_step06_artifact_data_records_snapshot_path():
"""Structural guard: the Step 6 artifact data dict must include the
``reuse_snapshot_path`` field so a future ``--reuse-from`` consumer
can locate the expected sidecar via the canonical step artifact
(Stage 2 spec — informational; absence of the file is u4's
fail-closed concern)."""
source = Path(_pz2.__file__).read_text(encoding="utf-8")
step06_marker = '6, "composition_plan"'
idx_step06 = source.find(step06_marker)
assert idx_step06 != -1
# Search a generous window after the marker for the field key.
window = source[idx_step06 : idx_step06 + 8000]
assert '"reuse_snapshot_path"' in window
assert "SNAPSHOT_FILENAME" in window