feat(#72): IMP-43 u1~u8 --reuse-from incremental rerun (Step 0/1/2/5/6 reuse + Step 7+ re-execute)
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 25s
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 25s
u1 argparse --reuse-from PREV_RUN_ID + post-merge fail-closed guard (rejects
layout/zone_geometry/zone_section/image override axes by name; only
--override-frame is preserved).
u2 src/phase_z2_reuse_snapshot.py — JSON-only Step 6 snapshot with mdx_sha256
integrity key and {value, source_path, upstream_step} provenance per axis
(pickle forbidden per Stage 2 guardrail).
u3 _write_reuse_snapshot at the Step 6 boundary; soft-fails to stderr without
aborting the seed run.
u4 prev_run_dir RO copy of step00/01/02/05/06 + _reuse_snapshot.json into
new run_dir, state rehydration, reuse marker, frame-override application on
restored units, Step 7+ resume.
u4b fail-closed for missing prev_run_dir / missing/corrupt/invalid snapshot /
mdx_sha256 mismatch / accidental new==prev write, with value+path+upstream
diagnostics per axis.
u5 reuse_from Optional[str] threaded through run_phase_z2_mvp1 signature and
CLI dispatch; default None preserves byte-identical pre-IMP-43 behavior.
u6 Front /api/run optional reuseFromRunId forwarding (vite.config.ts +
designAgentApi.ts + run_pipeline_reuse_from.test.ts).
u7a fast CI equivalence (1 mdx × 1 layout × 2 frames); step13 whitelist =
run_id/timestamps/prev_run_id only. u7b 3 layouts × 3 mdx × 32 frames
sweep gated by pytest.mark.sweep (registered in pyproject.toml; default CI
must use -m 'not sweep').
u8 scripts/measure_reuse_savings.py argv-driven A/B/C harness with frame
pin self-discovery + seed-time exclusion; status board §8 TBD anchor
(issue-body 50-70% / 10-20s→3-8s claim explicitly unverified, not mirrored).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -345,13 +345,25 @@ export interface PipelineOverrides {
|
||||
|
||||
export async function runPipeline(
|
||||
file: File,
|
||||
overrides?: PipelineOverrides
|
||||
overrides?: PipelineOverrides,
|
||||
// IMP-43 (#72) u6 — optional prev RUN_ID for incremental rerun. When set,
|
||||
// the vite plugin forwards `--reuse-from <PREV_RUN_ID>` to the backend
|
||||
// and the pipeline resumes at Step 7 (Step 0/1/2/5/6 artifacts copied
|
||||
// from the prior run). When omitted / empty, the POST body is
|
||||
// byte-identical to pre-u6 (no reuseFromRunId key → no flag forwarded).
|
||||
reuseFromRunId?: string,
|
||||
): Promise<RunPipelineResult> {
|
||||
const content = await file.text();
|
||||
const body: Record<string, unknown> = {
|
||||
filename: file.name,
|
||||
content,
|
||||
overrides,
|
||||
};
|
||||
if (reuseFromRunId) body.reuseFromRunId = reuseFromRunId;
|
||||
const res = await fetch("/api/run", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ filename: file.name, content, overrides }),
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
const data = (await res.json()) as RunPipelineResult;
|
||||
if (!res.ok && !data.run_id) {
|
||||
|
||||
250
Front/client/tests/run_pipeline_reuse_from.test.ts
Normal file
250
Front/client/tests/run_pipeline_reuse_from.test.ts
Normal file
@@ -0,0 +1,250 @@
|
||||
// IMP-43 (#72) u6 — /api/run reuseFromRunId forwarding coverage.
|
||||
//
|
||||
// Stage 2 unit scope:
|
||||
// 1) Front/client/src/services/designAgentApi.ts `runPipeline`:
|
||||
// • accepts an optional 3rd arg `reuseFromRunId: string`.
|
||||
// • includes `reuseFromRunId` in the POST body when truthy.
|
||||
// • OMITS `reuseFromRunId` from the body when absent / empty / undefined
|
||||
// → byte-identical to the pre-u6 POST contract (absent flag = full
|
||||
// pipeline; backend u1 guard never sees an empty PREV_RUN_ID).
|
||||
// • leaves `filename`, `content`, and `overrides` untouched alongside
|
||||
// the new field (no payload-shape regression).
|
||||
// 2) Front/vite.config.ts `/api/run` handler:
|
||||
// • declares `reuseFromRunId?: string` in the payload type so a typed
|
||||
// client cannot send a payload the server silently drops.
|
||||
// • destructures `reuseFromRunId` from `payload` (sibling of
|
||||
// `overrides`, NOT nested under it — the backend u1 post-merge
|
||||
// guard treats reuse as a pipeline mode, not an override).
|
||||
// • forwards `--reuse-from <PREV_RUN_ID>` to spawn cliArgs guarded by
|
||||
// a truthy check (empty string / undefined ⇒ no flag, per Stage 2
|
||||
// contract: invalid CLI args must never reach argparse).
|
||||
// • places the forward block AFTER the `--override-section-assignment`
|
||||
// loop so the spawn argv preserves backend argparse's no-positional-
|
||||
// before-flag expectation and so `--override-frame` (still allowed
|
||||
// by the u1 guard) is positioned ahead of `--reuse-from`.
|
||||
//
|
||||
// runPipeline is exercised with a duck-typed `File` plus a `vi.stubGlobal`
|
||||
// fetch mock — mirrors the user_overrides_service.test.ts pattern. The
|
||||
// vite handler is source-sliced (mirrors handle_generate_diag.test.ts)
|
||||
// because the handler spawns python and a real /api/run round-trip is
|
||||
// out of unit-test scope.
|
||||
|
||||
import { afterEach, beforeEach, describe, expect, it, vi, type Mock } from "vitest";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { resolve } from "node:path";
|
||||
import { runPipeline } from "../src/services/designAgentApi";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// vite.config.ts source — read once for the handler source-slice assertions.
|
||||
// Path: Front/client/tests/ → Front/vite.config.ts (two levels up).
|
||||
// ---------------------------------------------------------------------------
|
||||
const VITE_CONFIG_PATH = resolve(__dirname, "..", "..", "vite.config.ts");
|
||||
const VITE_CONFIG_SOURCE = readFileSync(VITE_CONFIG_PATH, "utf-8");
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// fetch mock — minimal Response stub mirroring runPipeline's `.ok` + `.json()`
|
||||
// + `.status` surface. Same shape as the user_overrides_service.test.ts
|
||||
// helper so the two test files stay drift-free.
|
||||
// ---------------------------------------------------------------------------
|
||||
type MockResponse = {
|
||||
ok: boolean;
|
||||
status: number;
|
||||
json: () => Promise<unknown>;
|
||||
};
|
||||
|
||||
function mockResponse(body: unknown, ok = true, status = 200): MockResponse {
|
||||
return { ok, status, json: async () => body };
|
||||
}
|
||||
|
||||
const SUCCESS_BODY = {
|
||||
success: true,
|
||||
run_id: "test_run_id_20260524",
|
||||
exit_code: 0,
|
||||
final_html_exists: true,
|
||||
preview_exists: true,
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
};
|
||||
|
||||
// Duck-typed File — runPipeline reads only `.name` and `.text()`. Avoids a
|
||||
// hard dependency on the global File constructor (varies across node /
|
||||
// jsdom / happy-dom test environments).
|
||||
function makeFakeFile(name: string, content: string): File {
|
||||
return {
|
||||
name,
|
||||
text: async () => content,
|
||||
} as unknown as File;
|
||||
}
|
||||
|
||||
let fetchMock: Mock;
|
||||
|
||||
beforeEach(() => {
|
||||
fetchMock = vi.fn();
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
function lastPostBody(): Record<string, unknown> {
|
||||
const lastCall = fetchMock.mock.calls.at(-1);
|
||||
if (!lastCall) throw new Error("fetch was not called");
|
||||
const init = lastCall[1] as RequestInit | undefined;
|
||||
if (!init?.body) throw new Error("fetch was called without a body");
|
||||
return JSON.parse(String(init.body));
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// runPipeline (designAgentApi.ts) — forwarding/omission coverage
|
||||
// ============================================================================
|
||||
|
||||
describe("runPipeline reuseFromRunId forwarding (IMP-43 #72 u6)", () => {
|
||||
it("posts to /api/run via POST with JSON content-type", async () => {
|
||||
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
|
||||
await runPipeline(makeFakeFile("03.mdx", "# title"));
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
const [url, init] = fetchMock.mock.calls[0];
|
||||
expect(url).toBe("/api/run");
|
||||
expect((init as RequestInit).method).toBe("POST");
|
||||
expect((init as RequestInit).headers).toMatchObject({
|
||||
"Content-Type": "application/json",
|
||||
});
|
||||
});
|
||||
|
||||
it("includes reuseFromRunId in the POST body when provided", async () => {
|
||||
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
|
||||
await runPipeline(
|
||||
makeFakeFile("03.mdx", "# title"),
|
||||
undefined,
|
||||
"mdx03_20260524080000",
|
||||
);
|
||||
const body = lastPostBody();
|
||||
expect(body.reuseFromRunId).toBe("mdx03_20260524080000");
|
||||
expect(body.filename).toBe("03.mdx");
|
||||
expect(body.content).toBe("# title");
|
||||
});
|
||||
|
||||
it("omits reuseFromRunId when 3rd arg is undefined (pre-u6 byte-identical)", async () => {
|
||||
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
|
||||
await runPipeline(makeFakeFile("03.mdx", "# title"));
|
||||
const body = lastPostBody();
|
||||
expect("reuseFromRunId" in body).toBe(false);
|
||||
// Pre-u6 contract: filename/content are the only keys when overrides
|
||||
// is undefined (JSON.stringify drops undefined values; pre-u6 emitted
|
||||
// `JSON.stringify({filename, content, overrides})` with the same
|
||||
// drop-undefined behaviour, so the wire body is byte-identical).
|
||||
expect(Object.keys(body).sort()).toEqual(["content", "filename"]);
|
||||
});
|
||||
|
||||
it("omits reuseFromRunId but keeps overrides when only overrides provided", async () => {
|
||||
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
|
||||
await runPipeline(makeFakeFile("03.mdx", "# title"), {
|
||||
frames: { "03-1": "frame_07" },
|
||||
});
|
||||
const body = lastPostBody();
|
||||
expect("reuseFromRunId" in body).toBe(false);
|
||||
expect(Object.keys(body).sort()).toEqual([
|
||||
"content",
|
||||
"filename",
|
||||
"overrides",
|
||||
]);
|
||||
expect(body.overrides).toEqual({ frames: { "03-1": "frame_07" } });
|
||||
});
|
||||
|
||||
it("omits reuseFromRunId when passed an empty string (truthy guard)", async () => {
|
||||
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
|
||||
await runPipeline(makeFakeFile("03.mdx", "# title"), undefined, "");
|
||||
const body = lastPostBody();
|
||||
expect("reuseFromRunId" in body).toBe(false);
|
||||
});
|
||||
|
||||
it("forwards reuseFromRunId alongside frame overrides (the only u1-permitted combo)", async () => {
|
||||
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
|
||||
await runPipeline(
|
||||
makeFakeFile("03.mdx", "# title"),
|
||||
{ frames: { "03-1+03-2": "frame_07" } },
|
||||
"mdx03_20260524080000",
|
||||
);
|
||||
const body = lastPostBody();
|
||||
expect(body.overrides).toEqual({ frames: { "03-1+03-2": "frame_07" } });
|
||||
expect(body.reuseFromRunId).toBe("mdx03_20260524080000");
|
||||
});
|
||||
|
||||
it("returns the parsed RunPipelineResult on success", async () => {
|
||||
fetchMock.mockResolvedValueOnce(mockResponse(SUCCESS_BODY));
|
||||
const res = await runPipeline(
|
||||
makeFakeFile("03.mdx", "# title"),
|
||||
undefined,
|
||||
"mdx03_20260524080000",
|
||||
);
|
||||
expect(res.success).toBe(true);
|
||||
expect(res.run_id).toBe("test_run_id_20260524");
|
||||
});
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// /api/run handler (vite.config.ts) — source-slice forwarding contract
|
||||
// ============================================================================
|
||||
|
||||
describe("/api/run handler reuseFromRunId source-slice (IMP-43 #72 u6)", () => {
|
||||
it("declares reuseFromRunId?: string on the /api/run payload type", () => {
|
||||
// Payload type at the top of the /api/run handler body. The
|
||||
// optional-string declaration is the single source-of-truth for what
|
||||
// shape the handler accepts; a typed frontend client (u5 saveUserOverrides
|
||||
// sibling pattern) cannot silently send a payload the server drops.
|
||||
expect(VITE_CONFIG_SOURCE).toMatch(/reuseFromRunId\?:\s*string\s*;/);
|
||||
});
|
||||
|
||||
it("destructures reuseFromRunId from payload alongside filename/content/overrides", () => {
|
||||
expect(VITE_CONFIG_SOURCE).toMatch(
|
||||
/const\s*\{\s*filename\s*,\s*content\s*,\s*overrides\s*,\s*reuseFromRunId\s*\}\s*=\s*payload\s*;/,
|
||||
);
|
||||
});
|
||||
|
||||
it("forwards --reuse-from <PREV_RUN_ID> after the override-section-assignment loop", () => {
|
||||
// Stage 2 contract: reuse_from is a pipeline mode, not an override.
|
||||
// The forward block must sit AFTER the last override loop so the spawn
|
||||
// argv preserves the order documented in the u1 backend post-merge
|
||||
// guard (overrides parsed first; reuse_from precondition runs against
|
||||
// the merged overrides view).
|
||||
const reuseFromIdx = VITE_CONFIG_SOURCE.indexOf('"--reuse-from"');
|
||||
const zoneSectionsIdx = VITE_CONFIG_SOURCE.indexOf(
|
||||
'"--override-section-assignment"',
|
||||
);
|
||||
expect(reuseFromIdx).toBeGreaterThan(-1);
|
||||
expect(zoneSectionsIdx).toBeGreaterThan(-1);
|
||||
expect(reuseFromIdx).toBeGreaterThan(zoneSectionsIdx);
|
||||
});
|
||||
|
||||
it("guards the forward with a truthy check on reuseFromRunId", () => {
|
||||
// Empty string / undefined ⇒ no flag pushed (Stage 2 contract: invalid
|
||||
// CLI args must never reach argparse — the backend u1 guard would
|
||||
// fail-closed with `reuse_artifact_missing` on the empty PREV_RUN_ID).
|
||||
const reuseFromIdx = VITE_CONFIG_SOURCE.indexOf('"--reuse-from"');
|
||||
expect(reuseFromIdx).toBeGreaterThan(-1);
|
||||
const preface = VITE_CONFIG_SOURCE.slice(
|
||||
Math.max(0, reuseFromIdx - 200),
|
||||
reuseFromIdx,
|
||||
);
|
||||
expect(preface).toMatch(/if\s*\(\s*reuseFromRunId/);
|
||||
expect(preface).toMatch(/typeof\s+reuseFromRunId\s*===\s*"string"/);
|
||||
});
|
||||
|
||||
it("pushes reuseFromRunId as the --reuse-from argument value (no string interpolation)", () => {
|
||||
// The CLI value must be the raw PREV_RUN_ID — no `=` join, no quoting
|
||||
// (spawn is shell:false). Mirrors the `--override-layout` shape.
|
||||
const reuseFromIdx = VITE_CONFIG_SOURCE.indexOf('"--reuse-from"');
|
||||
expect(reuseFromIdx).toBeGreaterThan(-1);
|
||||
// Window spans both before (`cliArgs.push(`) and after
|
||||
// (`reuseFromRunId)`) the literal so the full push expression is
|
||||
// captured.
|
||||
const window = VITE_CONFIG_SOURCE.slice(
|
||||
Math.max(0, reuseFromIdx - 100),
|
||||
reuseFromIdx + 200,
|
||||
);
|
||||
expect(window).toMatch(
|
||||
/cliArgs\.push\(\s*"--reuse-from"\s*,\s*reuseFromRunId\s*\)/,
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -543,6 +543,13 @@ function vitePluginPhaseZApi(): Plugin {
|
||||
// (e.g., "top": ["03-1-sub-1"]). Forwarded as --override-section-assignment.
|
||||
zoneSections?: Record<string, string[]>;
|
||||
};
|
||||
// IMP-43 (#72) u6 — optional PREV_RUN_ID to reuse Step 0/1/2/5/6
|
||||
// artifacts from a prior run and resume execution at Step 7.
|
||||
// Lives at the payload root (NOT under `overrides`) because the
|
||||
// backend u1 post-merge guard rejects most override axes when
|
||||
// --reuse-from is supplied. Absent / empty = full pipeline
|
||||
// (byte-identical to pre-u6 spawn).
|
||||
reuseFromRunId?: string;
|
||||
};
|
||||
try {
|
||||
payload = JSON.parse(body);
|
||||
@@ -554,7 +561,7 @@ function vitePluginPhaseZApi(): Plugin {
|
||||
return;
|
||||
}
|
||||
|
||||
const { filename, content, overrides } = payload;
|
||||
const { filename, content, overrides, reuseFromRunId } = payload;
|
||||
if (!filename || typeof content !== "string") {
|
||||
res.writeHead(400, { "Content-Type": "application/json" });
|
||||
res.end(
|
||||
@@ -638,6 +645,19 @@ function vitePluginPhaseZApi(): Plugin {
|
||||
);
|
||||
}
|
||||
}
|
||||
// IMP-43 (#72) u6 — --reuse-from <PREV_RUN_ID> forward. Backend
|
||||
// (u1) parses this flag, validates the snapshot, copies Step
|
||||
// 0/1/2/5/6 artifacts from data/runs/<PREV_RUN_ID>/phase_z2 into
|
||||
// the new run_dir, and resumes execution at Step 7. The post-merge
|
||||
// guard at the same site rejects --override-layout /
|
||||
// --override-zone-geometry / --override-section-assignment /
|
||||
// --override-image with axis-named fail-closed exit; only
|
||||
// --override-frame (above) is preserved. Truthy check excludes
|
||||
// empty string + undefined so an invalid argument never reaches
|
||||
// argparse.
|
||||
if (reuseFromRunId && typeof reuseFromRunId === "string") {
|
||||
cliArgs.push("--reuse-from", reuseFromRunId);
|
||||
}
|
||||
console.log(
|
||||
`[phase-z-api] spawn pipeline: run_id=${runId}, mdx=${mdxPath}, args=${JSON.stringify(cliArgs.slice(2))}`
|
||||
);
|
||||
|
||||
@@ -182,6 +182,27 @@ Step 0 (사전 준비) 의 Figma → HTML 변환은 *precondition phase 의 작
|
||||
|
||||
---
|
||||
|
||||
## 8. IMP-43 (#72) `--reuse-from` measured savings
|
||||
|
||||
> Stage 2 §u8 binding contract: the issue-body 50–70% / 10–20s → 3–8s claim is **unverified** and is **not** mirrored here. Numbers below come from `scripts/measure_reuse_savings.py` on the project reference host; until that script is run and the values committed, every cell stays `TBD`.
|
||||
|
||||
| axis | value |
|
||||
|---|---|
|
||||
| measurement script | `scripts/measure_reuse_savings.py` |
|
||||
| reuse boundary (Stage 1 lock) | Step 0 / 1 / 2 / 5 / 6 only; Step 7+ re-executes |
|
||||
| full rerun seconds (p50) | TBD |
|
||||
| full rerun seconds (p95) | TBD |
|
||||
| reuse seconds (p50) | TBD |
|
||||
| reuse seconds (p95) | TBD |
|
||||
| reuse / full ratio (p50) | TBD |
|
||||
| last measured | TBD (date / host / mdx / iterations) |
|
||||
|
||||
Run protocol (per iteration): `(A)` seed → `(B)` full rerun with one self-discovered `--override-frame` pin → `(C)` `--reuse-from <seed>` with the same pin. The `(A)` seed time is reported separately and **not** included in the B-vs-C comparison — the reuse path's whole point is that the seed already exists from a prior interactive run.
|
||||
|
||||
Invocation: `python -m scripts.measure_reuse_savings samples/mdx_batch/02.mdx --iterations 5` (mdx is argv-driven; the script does not pin a sample internally).
|
||||
|
||||
---
|
||||
|
||||
## 사용 방법
|
||||
|
||||
- 새 작업 들어오면 → 본 board 의 *어느 step* 의 status 를 바꾸는 작업인지 식별
|
||||
|
||||
@@ -34,4 +34,5 @@ target-version = "py310"
|
||||
asyncio_mode = "auto"
|
||||
markers = [
|
||||
"integration: end-to-end pipeline integration tests (heavy; invoke Selenium)",
|
||||
"sweep: opt-in heavyweight sweep tests (IMP-43 u7b: 3 layouts × 3 mdx × frame-pin coverage). Invoke explicitly via `pytest -m sweep`; default CI must use `-m 'not sweep'`.",
|
||||
]
|
||||
|
||||
178
scripts/measure_reuse_savings.py
Normal file
178
scripts/measure_reuse_savings.py
Normal file
@@ -0,0 +1,178 @@
|
||||
"""IMP-43 (#72) u8 — measure ``--reuse-from`` wall-clock savings.
|
||||
|
||||
Argv-driven measurement helper for the Stage 2 §u8 binding contract:
|
||||
re-derive a realistic savings target instead of mirroring the
|
||||
unverified issue-body 50–70% / 10–20s → 3–8s claim.
|
||||
|
||||
Per-iteration measurement protocol (mirrors the u7a equivalence
|
||||
harness, ``tests/test_phase_z2_reuse_from_equivalence_unit.py``):
|
||||
|
||||
(A) baseline full run, no overrides — reuse seed
|
||||
(B) full rerun full run + one --override-frame pin — control path
|
||||
(C) reuse --reuse-from <seed> + same pin — reuse path
|
||||
|
||||
Wall-clock = ``time.perf_counter()`` around the subprocess.run call.
|
||||
The (A) seed run time is reported separately and NOT included in the
|
||||
B-vs-C comparison (the reuse path's whole point is that the seed
|
||||
already exists from a prior interactive run).
|
||||
|
||||
For each iteration the frame pin is self-discovered from the seed
|
||||
run's ``step06_composition_plan.json``: the first unit's
|
||||
``frame_template_id`` is re-pinned to itself, exercising the
|
||||
``--override-frame`` CLI surface end-to-end without changing the
|
||||
semantic frame assignment (same approach the u7a/u7b equivalence
|
||||
tests already lock).
|
||||
|
||||
Output: a JSON document to stdout with per-iteration timings,
|
||||
B/C p50 + p95, and the ratio C/B. Stderr carries the subprocess
|
||||
stdout/stderr tails on non-zero exits.
|
||||
|
||||
Guardrails (Stage 2):
|
||||
* argv-driven, no hardcoded mdx — caller picks the sample
|
||||
* no hardcoded savings target — TBD until measured
|
||||
* value + path + upstream provenance lives in the printed JSON
|
||||
* does NOT mutate prev_run_dir; new runs land under fresh run_ids
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import statistics
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
RUNS_DIR = REPO_ROOT / "data" / "runs"
|
||||
|
||||
|
||||
def _unique_run_id(prefix: str) -> str:
|
||||
return f"{prefix}_imp43_u8_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
|
||||
def _spawn(extra_args: list[str], timeout: int) -> tuple[subprocess.CompletedProcess, float]:
|
||||
start = time.perf_counter()
|
||||
cp = subprocess.run(
|
||||
[sys.executable, "-m", "src.phase_z2_pipeline", *extra_args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
cwd=str(REPO_ROOT),
|
||||
)
|
||||
return cp, time.perf_counter() - start
|
||||
|
||||
|
||||
def _assert_ok(label: str, cp: subprocess.CompletedProcess) -> None:
|
||||
if cp.returncode != 0:
|
||||
sys.stderr.write(
|
||||
f"[measure_reuse_savings] {label} failed rc={cp.returncode}\n"
|
||||
f"--- stderr tail ---\n{cp.stderr[-2000:]}\n"
|
||||
f"--- stdout tail ---\n{cp.stdout[-2000:]}\n"
|
||||
)
|
||||
raise SystemExit(2)
|
||||
|
||||
|
||||
def _discover_first_frame_pin(seed_run_id: str) -> tuple[str, str]:
|
||||
p = RUNS_DIR / seed_run_id / "phase_z2" / "steps" / "step06_composition_plan.json"
|
||||
payload = json.loads(p.read_text(encoding="utf-8"))
|
||||
for u in payload.get("data", {}).get("selected_units") or []:
|
||||
sids = u.get("source_section_ids") or []
|
||||
tpl = u.get("frame_template_id")
|
||||
if isinstance(sids, list) and sids and isinstance(tpl, str) and tpl:
|
||||
return ("+".join(str(s) for s in sids), tpl)
|
||||
raise SystemExit(
|
||||
f"[measure_reuse_savings] seed {seed_run_id} step06 has no pinnable "
|
||||
f"(unit_id, frame_template_id); path={p}"
|
||||
)
|
||||
|
||||
|
||||
def _percentile(values: list[float], pct: float) -> float:
|
||||
if not values:
|
||||
return float("nan")
|
||||
if len(values) == 1:
|
||||
return values[0]
|
||||
s = sorted(values)
|
||||
k = (len(s) - 1) * pct
|
||||
lo = int(k)
|
||||
hi = min(lo + 1, len(s) - 1)
|
||||
return s[lo] + (s[hi] - s[lo]) * (k - lo)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(
|
||||
prog="python -m scripts.measure_reuse_savings",
|
||||
description="Measure IMP-43 --reuse-from wall-clock savings.",
|
||||
)
|
||||
ap.add_argument("mdx_path", type=Path, help="MDX sample to measure against")
|
||||
ap.add_argument("--iterations", type=int, default=3, help="trials (default 3)")
|
||||
ap.add_argument("--timeout", type=int, default=900, help="per-run timeout seconds")
|
||||
args = ap.parse_args()
|
||||
|
||||
if not args.mdx_path.is_file():
|
||||
sys.stderr.write(f"[measure_reuse_savings] mdx not found: {args.mdx_path}\n")
|
||||
return 2
|
||||
|
||||
iterations: list[dict] = []
|
||||
for i in range(args.iterations):
|
||||
seed_id = _unique_run_id(f"seed{i}")
|
||||
cp_a, t_a = _spawn([str(args.mdx_path), seed_id], args.timeout)
|
||||
_assert_ok(f"(A) seed iter={i}", cp_a)
|
||||
|
||||
unit_id, tpl_id = _discover_first_frame_pin(seed_id)
|
||||
override = ["--override-frame", f"{unit_id}={tpl_id}"]
|
||||
|
||||
full_id = _unique_run_id(f"full{i}")
|
||||
cp_b, t_b = _spawn([str(args.mdx_path), full_id, *override], args.timeout)
|
||||
_assert_ok(f"(B) full rerun iter={i}", cp_b)
|
||||
|
||||
reuse_id = _unique_run_id(f"reuse{i}")
|
||||
cp_c, t_c = _spawn(
|
||||
[str(args.mdx_path), reuse_id, "--reuse-from", seed_id, *override],
|
||||
args.timeout,
|
||||
)
|
||||
_assert_ok(f"(C) reuse iter={i}", cp_c)
|
||||
|
||||
iterations.append({
|
||||
"iter": i,
|
||||
"seed_run_id": seed_id,
|
||||
"full_run_id": full_id,
|
||||
"reuse_run_id": reuse_id,
|
||||
"override_frame": f"{unit_id}={tpl_id}",
|
||||
"seed_seconds": t_a,
|
||||
"full_rerun_seconds": t_b,
|
||||
"reuse_seconds": t_c,
|
||||
})
|
||||
|
||||
full_times = [it["full_rerun_seconds"] for it in iterations]
|
||||
reuse_times = [it["reuse_seconds"] for it in iterations]
|
||||
|
||||
summary = {
|
||||
"mdx_path": str(args.mdx_path),
|
||||
"iterations_count": len(iterations),
|
||||
"full_rerun_seconds_p50": _percentile(full_times, 0.50),
|
||||
"full_rerun_seconds_p95": _percentile(full_times, 0.95),
|
||||
"reuse_seconds_p50": _percentile(reuse_times, 0.50),
|
||||
"reuse_seconds_p95": _percentile(reuse_times, 0.95),
|
||||
"reuse_over_full_ratio_p50": (
|
||||
_percentile(reuse_times, 0.50) / _percentile(full_times, 0.50)
|
||||
if full_times and statistics.median(full_times) > 0
|
||||
else float("nan")
|
||||
),
|
||||
"iterations": iterations,
|
||||
"note": (
|
||||
"IMP-43 (#72) u8 measurement. Issue-body 50–70% / 10–20s → 3–8s "
|
||||
"claim is NOT honored here — actual numbers depend on host, "
|
||||
"Selenium cold-start, and AI cache state. Update "
|
||||
"docs/architecture/PHASE-Z-PIPELINE-STATUS-BOARD.md §8 with the "
|
||||
"p50/p95 reported here when run on the project's reference host."
|
||||
),
|
||||
}
|
||||
sys.stdout.write(json.dumps(summary, ensure_ascii=False, indent=2))
|
||||
sys.stdout.write("\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -25,6 +25,7 @@ MVP-1.5b spec :
|
||||
- mvp1.5b_test* : 본 모듈, 원래 설계 라인 합류
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -33,7 +34,7 @@ import sys
|
||||
import time
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
import yaml
|
||||
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
||||
@@ -99,6 +100,15 @@ from src.phase_z2_ai_fallback.step12 import gather_step12_ai_repair_proposals
|
||||
# idempotent ``has_popup`` marker onto retry_trace per unit. No AI call.
|
||||
from src.phase_z2_ai_fallback.step17 import run_step17_popup_gate
|
||||
|
||||
# IMP-43 (#72) u3 — Step 6 reuse snapshot sidecar (JSON-only). Schema +
|
||||
# serializers + validator live in u2 (``src.phase_z2_reuse_snapshot``);
|
||||
# this module's call site at the Step 6 boundary writes the sidecar
|
||||
# alongside ``steps/step06_composition_plan.json`` so that future
|
||||
# ``--reuse-from`` runs (u4) can resume at Step 7 without re-deriving
|
||||
# Step 0/1/2/5/6 state. ``--reuse-from`` is u4 scope; here we only
|
||||
# WRITE the snapshot — restore wiring lands in u4.
|
||||
from src.phase_z2_reuse_snapshot import build_snapshot, SNAPSHOT_FILENAME
|
||||
|
||||
|
||||
# ─── Constants ──────────────────────────────────────────────────
|
||||
|
||||
@@ -3853,6 +3863,564 @@ def _write_step_artifact(
|
||||
return fpath
|
||||
|
||||
|
||||
# IMP-43 (#72) u3 — Step 6 reuse snapshot sidecar writer.
|
||||
#
|
||||
# Scope (u3 only — Stage 2 unit split):
|
||||
# * Writes ``run_dir/_reuse_snapshot.json`` *after* the Step 6 artifact.
|
||||
# * JSON-only (per Stage 2 guardrail — pickle forbidden); schema +
|
||||
# ``build_snapshot`` live in u2 (``src.phase_z2_reuse_snapshot``).
|
||||
# * Write failure WARNS and CONTINUES — the snapshot is an OPTIONAL
|
||||
# sidecar; absence means future ``--reuse-from`` (u4) will fail
|
||||
# closed when it cannot find / load the file. The main pipeline
|
||||
# run must not abort on snapshot write failure.
|
||||
# * Returns the run_dir-relative path (``"_reuse_snapshot.json"``) on
|
||||
# success, ``None`` on failure. The caller stamps the returned value
|
||||
# (or the constant when known ahead of time) into the Step 6 artifact.
|
||||
def _write_reuse_snapshot(
|
||||
run_dir: Path,
|
||||
*,
|
||||
mdx_source_text: str,
|
||||
slide_title: Optional[str],
|
||||
slide_footer: Optional[str],
|
||||
sections: list,
|
||||
stage0_adapter_diagnostics: Optional[dict],
|
||||
stage0_normalized_assets: Optional[dict],
|
||||
v4_evidence: list,
|
||||
layout_preset_pre_override: Optional[str],
|
||||
units: list,
|
||||
comp_debug: Optional[dict],
|
||||
v4_fallback_traces: Optional[dict],
|
||||
ai_preflight: Optional[dict],
|
||||
) -> Optional[str]:
|
||||
try:
|
||||
mdx_sha256 = hashlib.sha256(mdx_source_text.encode("utf-8")).hexdigest()
|
||||
snapshot = build_snapshot(
|
||||
mdx_sha256=mdx_sha256,
|
||||
slide_title=slide_title,
|
||||
slide_footer=slide_footer,
|
||||
sections=sections,
|
||||
stage0_adapter_diagnostics=stage0_adapter_diagnostics,
|
||||
stage0_normalized_assets=stage0_normalized_assets,
|
||||
v4_evidence=v4_evidence,
|
||||
layout_preset_pre_override=layout_preset_pre_override,
|
||||
units=units,
|
||||
comp_debug=comp_debug,
|
||||
v4_fallback_traces=v4_fallback_traces,
|
||||
ai_preflight=ai_preflight,
|
||||
)
|
||||
fpath = run_dir / SNAPSHOT_FILENAME
|
||||
fpath.write_text(
|
||||
json.dumps(snapshot, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return SNAPSHOT_FILENAME
|
||||
except Exception as exc:
|
||||
print(
|
||||
f" [reuse-snapshot] WARN — failed to write {SNAPSHOT_FILENAME} "
|
||||
f"(reason={type(exc).__name__}: {exc}); --reuse-from will not be "
|
||||
f"available from this run.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# IMP-43 (#72) u4 — --reuse-from copy + restore + entry helpers.
|
||||
#
|
||||
# Scope (u4 only — Stage 2 unit split):
|
||||
# * Pure path resolution / file copy / snapshot load+validate /
|
||||
# section + unit rehydration / marker writing.
|
||||
# * NO edits to ``run_phase_z2_mvp1`` body — the kwarg threading and
|
||||
# the entry-point branch that invokes these helpers land in u5.
|
||||
# * NO sys.exit(2) translation — helpers RAISE
|
||||
# (FileNotFoundError / SnapshotValidationError / OSError); u4b adds
|
||||
# the stderr + exit-code-2 wrapper, the prev_run_dir == new_run_dir
|
||||
# accidental-write guard, and the mdx_sha256 mismatch surface
|
||||
# fingerprint.
|
||||
#
|
||||
# Restore contract (Stage 2 boundary): Step 0/1/2/5/6 artifacts +
|
||||
# ``_reuse_snapshot.json``. Step numbers 3 / 4 are deliberately absent
|
||||
# — the pipeline DOES write ``step03_content_objects.json`` and
|
||||
# ``step04_internal_composition.json`` AFTER the Step 6 artifact and
|
||||
# BEFORE the Step 7 artifact (see ``_write_step_artifact`` call sites
|
||||
# for ``run_dir, 3`` and ``run_dir, 4`` above the ``run_dir, 7`` call
|
||||
# in this file), but both are emitted with
|
||||
# ``step_status="trace-only"`` and ``pipeline_path_connected=False``:
|
||||
# they are diagnostic projections derived from the Step 6
|
||||
# ``debug_zones`` snapshot, not deterministic inputs that Step 7+
|
||||
# consume. Restoring them is unnecessary because downstream code
|
||||
# reads ``debug_zones`` directly (rehydrated from the snapshot), and
|
||||
# copying trace-only files would muddle the boundary audit. Stage 2
|
||||
# boundary lock = pipeline-path-connected pre-Step 7 artifacts only.
|
||||
|
||||
_REUSE_STEP_ARTIFACTS: tuple[str, ...] = (
|
||||
"step00_preconditions.json",
|
||||
"step01_mdx_upload.json",
|
||||
"step01_mdx_source.md",
|
||||
"step02_normalized.json",
|
||||
"step05_v4_evidence.json",
|
||||
"step06_composition_plan.json",
|
||||
)
|
||||
|
||||
REUSE_MARKER_FILENAME = "_reuse_marker.json"
|
||||
|
||||
|
||||
def _resolve_reuse_from_prev_run_dir(reuse_from: str) -> Path:
|
||||
"""Resolve ``--reuse-from PREV_RUN_ID`` to its ``phase_z2`` run_dir.
|
||||
|
||||
Pure path computation — does NOT check existence. u4b adds the
|
||||
fail-closed prev-run-missing translation around this helper.
|
||||
"""
|
||||
return RUNS_DIR / reuse_from / "phase_z2"
|
||||
|
||||
|
||||
def _copy_reuse_artifacts_from_prev_run(
|
||||
prev_run_dir: Path, new_run_dir: Path
|
||||
) -> dict[str, str]:
|
||||
"""Copy Step 0/1/2/5/6 artifacts + ``_reuse_snapshot.json`` into new_run_dir.
|
||||
|
||||
Returns ``{artifact_name: new_run_dir-relative_path}`` for all
|
||||
copied files. Raises ``FileNotFoundError`` when any required
|
||||
artifact is missing in ``prev_run_dir`` (u4b translates to exit 2).
|
||||
"""
|
||||
new_steps = new_run_dir / "steps"
|
||||
new_steps.mkdir(parents=True, exist_ok=True)
|
||||
copied: dict[str, str] = {}
|
||||
for fname in _REUSE_STEP_ARTIFACTS:
|
||||
src = prev_run_dir / "steps" / fname
|
||||
if not src.exists():
|
||||
raise FileNotFoundError(
|
||||
f"reuse artifact missing in prev_run_dir: steps/{fname} "
|
||||
f"(expected at {src})"
|
||||
)
|
||||
shutil.copyfile(src, new_steps / fname)
|
||||
copied[fname] = f"steps/{fname}"
|
||||
snap_src = prev_run_dir / SNAPSHOT_FILENAME
|
||||
if not snap_src.exists():
|
||||
raise FileNotFoundError(
|
||||
f"reuse snapshot missing in prev_run_dir: {SNAPSHOT_FILENAME} "
|
||||
f"(expected at {snap_src})"
|
||||
)
|
||||
shutil.copyfile(snap_src, new_run_dir / SNAPSHOT_FILENAME)
|
||||
copied[SNAPSHOT_FILENAME] = SNAPSHOT_FILENAME
|
||||
return copied
|
||||
|
||||
|
||||
def _load_and_validate_reuse_snapshot(
|
||||
new_run_dir: Path, *, mdx_source_text: str
|
||||
) -> dict:
|
||||
"""Load + validate the reuse snapshot already copied into ``new_run_dir``.
|
||||
|
||||
Computes the expected ``mdx_sha256`` from ``mdx_source_text`` UTF-8
|
||||
bytes — same derivation as ``_write_reuse_snapshot`` so the
|
||||
integrity check is symmetric. Delegates structural validation to
|
||||
u2's ``validate_snapshot``; that raises
|
||||
``SnapshotValidationError`` (subclass of ``ValueError``) on
|
||||
schema_version mismatch, mdx_sha256 mismatch, missing required
|
||||
keys, or malformed wrappers — u4b catches and translates.
|
||||
"""
|
||||
from src.phase_z2_reuse_snapshot import validate_snapshot
|
||||
|
||||
snap_path = new_run_dir / SNAPSHOT_FILENAME
|
||||
snapshot = json.loads(snap_path.read_text(encoding="utf-8"))
|
||||
expected_sha = hashlib.sha256(mdx_source_text.encode("utf-8")).hexdigest()
|
||||
validate_snapshot(snapshot, expected_mdx_sha256=expected_sha)
|
||||
return snapshot
|
||||
|
||||
|
||||
@dataclass
|
||||
class _RehydratedV4Candidate:
|
||||
"""V4Match-shape duck type restored from snapshot ``v4_candidates``.
|
||||
|
||||
Exposes the 6-attribute contract that the reuse path's downstream
|
||||
consumers read off ``unit.v4_candidates`` entries:
|
||||
* template_id / frame_id / frame_number / confidence / label —
|
||||
read by ``_apply_frame_override_to_unit`` (frame swap).
|
||||
* v4_rank — read by ``_build_application_plan_unit`` (Step 9
|
||||
payload, ``data.application_plan.zones[i].v4_candidates[j]``).
|
||||
Default ``None`` keeps the dataclass safe to construct from
|
||||
legacy snapshots that pre-date the u4 fix where the snapshot
|
||||
serializer did not persist per-candidate rank.
|
||||
Kept local — circular-dep-free; the production ``V4Match`` dataclass
|
||||
additionally carries section_id / selection_path / fallback_reason /
|
||||
provisional that the reuse boundary deliberately does not require.
|
||||
"""
|
||||
template_id: str
|
||||
frame_id: str
|
||||
frame_number: int
|
||||
confidence: float
|
||||
label: str
|
||||
v4_rank: Optional[int] = None
|
||||
|
||||
|
||||
def _rehydrate_mdx_sections_from_snapshot(snapshot: dict) -> list:
|
||||
"""Rebuild ``list[MdxSection]`` from snapshot ``sections`` wrapper.
|
||||
|
||||
Mirrors the ``serialize_section`` field list (u2 source of truth).
|
||||
Returns a Python list of ``MdxSection`` dataclass instances so the
|
||||
Step 7+ pipeline code that does ``[s.section_id for s in sections]``
|
||||
keeps byte-for-byte behavior.
|
||||
"""
|
||||
entries = snapshot["sections"]["value"]
|
||||
return [
|
||||
MdxSection(
|
||||
section_id=e["section_id"],
|
||||
section_num=e["section_num"],
|
||||
title=e["title"],
|
||||
raw_content=e["raw_content"],
|
||||
heading_number=e.get("heading_number"),
|
||||
v4_alias_keys=list(e.get("v4_alias_keys") or []),
|
||||
sub_sections=list(e.get("sub_sections") or []),
|
||||
)
|
||||
for e in entries
|
||||
]
|
||||
|
||||
|
||||
def _rehydrate_composition_units_from_snapshot(snapshot: dict) -> list:
|
||||
"""Rebuild ``list[CompositionUnit]`` from snapshot ``units`` wrapper.
|
||||
|
||||
``v4_candidates`` entries are restored as ``_RehydratedV4Candidate``
|
||||
instances so attribute access (``cand.template_id`` etc.) works
|
||||
end-to-end through ``_apply_frame_override_to_unit`` without
|
||||
serializing the production ``V4Match`` dataclass shape.
|
||||
|
||||
Uses the ``src.phase_z2_composition`` import path to match
|
||||
line 4976 / 5125's local re-imports — the module is loaded under
|
||||
both ``phase_z2_composition`` and ``src.phase_z2_composition`` due
|
||||
to historical sys.path duality, so a top-level CompositionUnit
|
||||
reference would create a class-identity mismatch against tests and
|
||||
downstream code that imports via the ``src.`` path.
|
||||
"""
|
||||
from src.phase_z2_composition import CompositionUnit as _CompositionUnit
|
||||
|
||||
entries = snapshot["units"]["value"]
|
||||
units: list = []
|
||||
for e in entries:
|
||||
cands = [
|
||||
_RehydratedV4Candidate(
|
||||
template_id=c["template_id"],
|
||||
frame_id=c["frame_id"],
|
||||
frame_number=int(c["frame_number"]),
|
||||
confidence=float(c["confidence"]),
|
||||
label=c["label"],
|
||||
v4_rank=(
|
||||
int(c["v4_rank"])
|
||||
if c.get("v4_rank") is not None
|
||||
else None
|
||||
),
|
||||
)
|
||||
for c in (e.get("v4_candidates") or [])
|
||||
]
|
||||
units.append(_CompositionUnit(
|
||||
source_section_ids=list(e["source_section_ids"]),
|
||||
merge_type=e["merge_type"],
|
||||
frame_template_id=e["frame_template_id"],
|
||||
frame_id=e["frame_id"],
|
||||
frame_number=int(e["frame_number"]),
|
||||
confidence=float(e["confidence"]),
|
||||
label=e["label"],
|
||||
phase_z_status=e["phase_z_status"],
|
||||
raw_content=e["raw_content"],
|
||||
title=e["title"],
|
||||
v4_rank=e.get("v4_rank"),
|
||||
selection_path=e.get("selection_path") or "rank_1",
|
||||
fallback_reason=e.get("fallback_reason"),
|
||||
score=float(e.get("score") or 0.0),
|
||||
rationale=dict(e.get("rationale") or {}),
|
||||
auto_selectable=bool(e.get("auto_selectable", True)),
|
||||
filter_reasons=list(e.get("filter_reasons") or []),
|
||||
notes=list(e.get("notes") or []),
|
||||
v4_candidates=cands,
|
||||
provisional=bool(e.get("provisional", False)),
|
||||
))
|
||||
return units
|
||||
|
||||
|
||||
REUSE_MARKER_SCHEMA_VERSION = 1
|
||||
|
||||
|
||||
def _write_reuse_marker(
|
||||
new_run_dir: Path,
|
||||
*,
|
||||
prev_run_id: str,
|
||||
copied_artifacts: dict[str, str],
|
||||
) -> Path:
|
||||
"""Write ``_reuse_marker.json`` to ``new_run_dir`` for audit trail.
|
||||
|
||||
Records prev_run_id, copied artifact map, the locked Step 0/1/2/5/6
|
||||
boundary, and ``resume_at_step=7``. Informational sidecar — absence
|
||||
does not break the reused run; presence lets operators trace which
|
||||
prev_run_id the reuse path was sourced from. u5 invokes this after
|
||||
a successful copy + restore.
|
||||
"""
|
||||
marker = {
|
||||
"schema_version": REUSE_MARKER_SCHEMA_VERSION,
|
||||
"reuse_from_prev_run_id": prev_run_id,
|
||||
"snapshot_filename": SNAPSHOT_FILENAME,
|
||||
"copied_artifacts": dict(copied_artifacts),
|
||||
"boundary_steps": list(_REUSE_STEP_ARTIFACTS),
|
||||
"resume_at_step": 7,
|
||||
"note": (
|
||||
"IMP-43 (#72) u4 — this run was sourced from prev_run_id via "
|
||||
"--reuse-from. Steps 0/1/2/5/6 artifacts copied; Step 7+ "
|
||||
"re-executed in this run_dir."
|
||||
),
|
||||
}
|
||||
fpath = new_run_dir / REUSE_MARKER_FILENAME
|
||||
fpath.write_text(
|
||||
json.dumps(marker, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return fpath
|
||||
|
||||
|
||||
# IMP-43 (#72) u4b — fail-closed wrapper around the u4 helpers.
|
||||
#
|
||||
# Scope (u4b only — Stage 2 unit split):
|
||||
# * Translate the u4 raises (FileNotFoundError, SnapshotValidationError,
|
||||
# json.JSONDecodeError, OSError) into the CLI fail-closed contract:
|
||||
# stderr message + ``sys.exit(2)``.
|
||||
# * Add the prev_run_dir == new_run_dir accidental-write guard BEFORE
|
||||
# any copy attempt — otherwise ``_copy_reuse_artifacts_from_prev_run``
|
||||
# would overwrite prev_run_dir's own step files with itself and
|
||||
# mutate the "read-only" reuse source.
|
||||
# * Add the missing-prev-run-dir surface so the user gets a clean
|
||||
# "run id not found" message instead of the raw FileNotFoundError
|
||||
# stack from inside _copy_reuse_artifacts_from_prev_run.
|
||||
# * Surface the mdx_sha256 mismatch as its OWN axis (distinct from
|
||||
# generic snapshot validation failures) so the operator can tell
|
||||
# "wrong --mdx-path for this prev_run_id" apart from "snapshot file
|
||||
# is broken".
|
||||
#
|
||||
# Out of scope: signature threading into ``run_phase_z2_mvp1`` (u5),
|
||||
# the actual call site dispatch into Step 7+ (u5).
|
||||
#
|
||||
# Diagnostic format (factual-verification guardrail):
|
||||
# [error] --reuse-from fail-closed: <axis>
|
||||
# value: <repr>
|
||||
# path: <fs path / resource locator>
|
||||
# upstream: <where the value originated>
|
||||
# reason: <type>: <message> (only when exc != None)
|
||||
#
|
||||
# axis vocabulary (closed enum — tests pin this set):
|
||||
# * prev_run_dir_missing
|
||||
# * prev_run_dir_equals_new_run_dir
|
||||
# * reuse_artifact_missing
|
||||
# * reuse_copy_os_error # OSError != FileNotFoundError during copy
|
||||
# # (PermissionError, IsADirectoryError,
|
||||
# # OSError(errno.EXDEV), full-disk, etc.)
|
||||
# * snapshot_missing_after_copy
|
||||
# * snapshot_corrupt_json
|
||||
# * snapshot_read_os_error # OSError != FileNotFoundError during
|
||||
# # snapshot read (permission denied,
|
||||
# # path-became-dir, lower-level IO)
|
||||
# * mdx_sha256_mismatch
|
||||
# * snapshot_validation_failed
|
||||
|
||||
|
||||
REUSE_FAIL_CLOSED_AXES: frozenset[str] = frozenset({
|
||||
"prev_run_dir_missing",
|
||||
"prev_run_dir_equals_new_run_dir",
|
||||
"reuse_artifact_missing",
|
||||
"reuse_copy_os_error",
|
||||
"snapshot_missing_after_copy",
|
||||
"snapshot_corrupt_json",
|
||||
"snapshot_read_os_error",
|
||||
"mdx_sha256_mismatch",
|
||||
"snapshot_validation_failed",
|
||||
})
|
||||
|
||||
|
||||
def _abort_reuse_from(
|
||||
*,
|
||||
axis: str,
|
||||
value: Any,
|
||||
path: str,
|
||||
upstream: str,
|
||||
exc: Optional[BaseException] = None,
|
||||
) -> "NoReturn":
|
||||
"""Print provenance-tagged stderr message and ``sys.exit(2)``.
|
||||
|
||||
All four `value+path+upstream+axis` fields are mandatory so the
|
||||
operator can pinpoint the failed precondition without grepping the
|
||||
pipeline source. ``exc`` (when supplied) adds the underlying type
|
||||
+ message — useful for ``json.JSONDecodeError`` line/col info or
|
||||
OSError errno.
|
||||
"""
|
||||
if axis not in REUSE_FAIL_CLOSED_AXES:
|
||||
raise AssertionError(
|
||||
f"_abort_reuse_from: unknown axis {axis!r} "
|
||||
f"(expected one of {sorted(REUSE_FAIL_CLOSED_AXES)})"
|
||||
)
|
||||
lines = [
|
||||
f"[error] --reuse-from fail-closed: {axis}",
|
||||
f" value: {value!r}",
|
||||
f" path: {path}",
|
||||
f" upstream: {upstream}",
|
||||
]
|
||||
if exc is not None:
|
||||
lines.append(f" reason: {type(exc).__name__}: {exc}")
|
||||
print("\n".join(lines), file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
def _paths_equivalent(a: Path, b: Path) -> bool:
|
||||
"""Return True when ``a`` and ``b`` resolve to the same filesystem
|
||||
location, falling back to lexical equality when ``a`` doesn't
|
||||
exist yet (Path.resolve(strict=False) still normalizes case + sep
|
||||
on Windows + collapses ``..`` segments).
|
||||
"""
|
||||
try:
|
||||
return a.resolve(strict=False) == b.resolve(strict=False)
|
||||
except (OSError, RuntimeError):
|
||||
return a == b
|
||||
|
||||
|
||||
def execute_reuse_from_or_fail_closed(
|
||||
*,
|
||||
reuse_from: str,
|
||||
new_run_dir: Path,
|
||||
mdx_source_text: str,
|
||||
) -> tuple[Path, dict[str, str], dict]:
|
||||
"""Orchestrate u4 helpers under the u4b fail-closed contract.
|
||||
|
||||
Returns ``(prev_run_dir, copied_artifacts, snapshot)`` on success.
|
||||
Calls ``sys.exit(2)`` on any of the seven fail-closed axes; does
|
||||
NOT return in that case.
|
||||
|
||||
The caller (u5, into ``run_phase_z2_mvp1``) does NOT need to wrap
|
||||
this in its own try/except — every reachable failure inside this
|
||||
function terminates the process directly.
|
||||
"""
|
||||
from src.phase_z2_reuse_snapshot import SnapshotValidationError
|
||||
|
||||
prev_run_dir = _resolve_reuse_from_prev_run_dir(reuse_from)
|
||||
|
||||
# Guard 1: prev_run_dir must exist.
|
||||
if not prev_run_dir.exists():
|
||||
_abort_reuse_from(
|
||||
axis="prev_run_dir_missing",
|
||||
value=reuse_from,
|
||||
path=str(prev_run_dir),
|
||||
upstream="--reuse-from CLI argument",
|
||||
)
|
||||
|
||||
# Guard 2: prev_run_dir must NOT be the same as new_run_dir.
|
||||
# Without this, the copy step would overwrite prev_run_dir's own
|
||||
# files with themselves and break the RO guarantee on the reuse
|
||||
# source. The check resolves both sides so a relative-vs-absolute
|
||||
# or symlinked collision still trips it.
|
||||
if _paths_equivalent(prev_run_dir, new_run_dir):
|
||||
_abort_reuse_from(
|
||||
axis="prev_run_dir_equals_new_run_dir",
|
||||
value=reuse_from,
|
||||
path=str(prev_run_dir),
|
||||
upstream=(
|
||||
"_resolve_reuse_from_prev_run_dir(reuse_from) == new_run_dir "
|
||||
"(would overwrite prev_run_dir during copy)"
|
||||
),
|
||||
)
|
||||
|
||||
# Copy step 0/1/2/5/6 + snapshot from prev_run_dir → new_run_dir.
|
||||
# FileNotFoundError MUST be caught before the bare OSError handler —
|
||||
# it is a subclass of OSError and the missing-artifact case has its
|
||||
# own dedicated axis.
|
||||
try:
|
||||
copied = _copy_reuse_artifacts_from_prev_run(prev_run_dir, new_run_dir)
|
||||
except FileNotFoundError as exc:
|
||||
_abort_reuse_from(
|
||||
axis="reuse_artifact_missing",
|
||||
value=str(exc),
|
||||
path=str(prev_run_dir),
|
||||
upstream=(
|
||||
"Step 0/1/2/5/6 deterministic artifacts + "
|
||||
f"{SNAPSHOT_FILENAME} under prev_run_dir/steps/"
|
||||
),
|
||||
exc=exc,
|
||||
)
|
||||
except OSError as exc:
|
||||
# PermissionError, IsADirectoryError, OSError(errno.EXDEV) when
|
||||
# crossing filesystems with shutil.copyfile, disk-full, etc.
|
||||
# Without this branch the raw traceback would escape the wrapper
|
||||
# and contradict the docstring contract ("every reachable
|
||||
# failure inside this function terminates the process directly").
|
||||
_abort_reuse_from(
|
||||
axis="reuse_copy_os_error",
|
||||
value=str(exc),
|
||||
path=str(prev_run_dir),
|
||||
upstream=(
|
||||
"_copy_reuse_artifacts_from_prev_run "
|
||||
"(OSError != FileNotFoundError; shutil.copyfile or "
|
||||
"Path.mkdir surface)"
|
||||
),
|
||||
exc=exc,
|
||||
)
|
||||
|
||||
# Load + validate snapshot. Exception fan-out below mirrors the
|
||||
# u4 helper raise surface; each fail-closed axis is reported
|
||||
# separately so operators can tell the cases apart.
|
||||
# FileNotFoundError MUST be caught before the bare OSError handler.
|
||||
try:
|
||||
snapshot = _load_and_validate_reuse_snapshot(
|
||||
new_run_dir, mdx_source_text=mdx_source_text,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
# Should not happen — copy step would have failed first — but
|
||||
# left explicit to make the contract symmetric.
|
||||
_abort_reuse_from(
|
||||
axis="snapshot_missing_after_copy",
|
||||
value=str(exc),
|
||||
path=str(new_run_dir / SNAPSHOT_FILENAME),
|
||||
upstream="_copy_reuse_artifacts_from_prev_run side effect",
|
||||
exc=exc,
|
||||
)
|
||||
except json.JSONDecodeError as exc:
|
||||
_abort_reuse_from(
|
||||
axis="snapshot_corrupt_json",
|
||||
value=str(exc),
|
||||
path=str(new_run_dir / SNAPSHOT_FILENAME),
|
||||
upstream=f"json.loads({SNAPSHOT_FILENAME})",
|
||||
exc=exc,
|
||||
)
|
||||
except OSError as exc:
|
||||
# Permission denied on the copied snapshot, snap_path turned out
|
||||
# to be a directory, lower-level IO error. JSONDecodeError is
|
||||
# ValueError (independent of OSError) so order with that branch
|
||||
# does not matter; this branch only needs to follow FNF.
|
||||
_abort_reuse_from(
|
||||
axis="snapshot_read_os_error",
|
||||
value=str(exc),
|
||||
path=str(new_run_dir / SNAPSHOT_FILENAME),
|
||||
upstream=(
|
||||
"_load_and_validate_reuse_snapshot "
|
||||
"(OSError != FileNotFoundError; Path.read_text surface)"
|
||||
),
|
||||
exc=exc,
|
||||
)
|
||||
except SnapshotValidationError as exc:
|
||||
msg = str(exc)
|
||||
if "mdx_sha256 mismatch" in msg:
|
||||
_abort_reuse_from(
|
||||
axis="mdx_sha256_mismatch",
|
||||
value=msg,
|
||||
path=str(new_run_dir / SNAPSHOT_FILENAME),
|
||||
upstream=(
|
||||
"sha256(mdx_source_text) vs "
|
||||
f"{SNAPSHOT_FILENAME}#/mdx_sha256"
|
||||
),
|
||||
exc=exc,
|
||||
)
|
||||
else:
|
||||
_abort_reuse_from(
|
||||
axis="snapshot_validation_failed",
|
||||
value=msg,
|
||||
path=str(new_run_dir / SNAPSHOT_FILENAME),
|
||||
upstream="src.phase_z2_reuse_snapshot.validate_snapshot",
|
||||
exc=exc,
|
||||
)
|
||||
|
||||
return prev_run_dir, copied, snapshot
|
||||
|
||||
|
||||
def _write_step_html(
|
||||
run_dir: Path,
|
||||
step_num: int,
|
||||
@@ -4284,6 +4852,7 @@ def run_phase_z2_mvp1(
|
||||
override_zone_geometries: Optional[dict[str, dict]] = None,
|
||||
override_section_assignments: Optional[dict[str, list[str]]] = None,
|
||||
override_image_overrides: Optional[dict[str, dict]] = None,
|
||||
reuse_from: Optional[str] = None,
|
||||
) -> Path:
|
||||
"""MVP-1.5b entry — single slide + composition planner v0 + 8 preset vocabulary.
|
||||
|
||||
@@ -4306,6 +4875,22 @@ def run_phase_z2_mvp1(
|
||||
backend contract (KNOWN_AXES u1 + Vite allowlist u2 + typed
|
||||
client u3 + stamper u4) end-to-end addressable from CLI without
|
||||
diverging the function signature.
|
||||
|
||||
Incremental rerun (IMP-43 #72, u5) :
|
||||
reuse_from : Optional PREV_RUN_ID. When set, Steps 0/1/2/5/6 artifacts
|
||||
are copied from ``RUNS_DIR / PREV_RUN_ID / phase_z2``
|
||||
and the in-memory state (sections, units, layout_preset,
|
||||
comp_debug, v4_fallback_traces, slide_title/footer,
|
||||
stage0_*, v4_evidence, ai_preflight) is rehydrated
|
||||
from ``_reuse_snapshot.json`` via the u4 helpers,
|
||||
wrapped by u4b's fail-closed contract. Step 7+ then
|
||||
re-executes against ``override_frames`` in this new
|
||||
run_dir. ``None`` preserves the legacy single-pass
|
||||
behaviour (Steps 0-6 derive state from scratch).
|
||||
The post-merge u1 guard at the CLI surface rejects
|
||||
any layout / zone_geometry / zone_section / image
|
||||
override under ``--reuse-from`` so only frame
|
||||
overrides reach this kwarg's reuse branch.
|
||||
"""
|
||||
mdx_path = Path(mdx_path)
|
||||
if run_id is None:
|
||||
@@ -4315,6 +4900,16 @@ def run_phase_z2_mvp1(
|
||||
|
||||
print(f"[Phase Z-2 MVP-1.5b] start — mdx={mdx_path.name}, run_id={run_id}")
|
||||
|
||||
# IMP-43 (#72) u5 — Steps 0/1/2/5/6 entry-point branch.
|
||||
# ``reuse_from is None`` = normal pipeline (Steps 0-6 derive state).
|
||||
# ``reuse_from is not None`` = restore Steps 0/1/2/5/6 state from
|
||||
# prev_run snapshot via the u4 helpers wrapped by u4b's fail-closed
|
||||
# contract, then fall through to the shared Step 7+ block below.
|
||||
# The post-merge u1 guard has already rejected any layout /
|
||||
# zone_geometry / zone_section / image override on the reuse path,
|
||||
# so only --override-frame (handled at the Step 7-A axis below the
|
||||
# branch) survives into this code path.
|
||||
if reuse_from is None:
|
||||
# ─── Step 0: 사전 준비 (precondition snapshot) ───
|
||||
# IMP-92 u4 — boot-time AI fallback preflight (gated on
|
||||
# settings.ai_fallback_enabled; default OFF = skipped, no API call).
|
||||
@@ -4985,11 +5580,19 @@ def run_phase_z2_mvp1(
|
||||
# for IMP-47B (#76) AI handoff. section_assignment_override skip
|
||||
# honors IMP-06 (#6) zoneSections ground truth.
|
||||
"imp48_resplit": _imp48_audit,
|
||||
# IMP-43 (#72) u3 — additive informational field recording the
|
||||
# run_dir-relative location of the ``--reuse-from`` sidecar
|
||||
# (written immediately after this artifact). Path is stamped
|
||||
# unconditionally so that a future ``--reuse-from`` consumer
|
||||
# (u4) can locate the expected sidecar even when its write
|
||||
# failed (u4 then fail-closes on missing/invalid sidecar via
|
||||
# u2's ``validate_snapshot``).
|
||||
"reuse_snapshot_path": SNAPSHOT_FILENAME,
|
||||
},
|
||||
step_status="done",
|
||||
pipeline_path_connected=True,
|
||||
inputs=["step02_normalized.json", "step05_v4_evidence.json"],
|
||||
outputs=["step06_composition_plan.json"],
|
||||
outputs=["step06_composition_plan.json", SNAPSHOT_FILENAME],
|
||||
note=(
|
||||
"composition v0 count-based — sections → candidates → score → greedy select. "
|
||||
"Step 6-A (사용자 lock 2026-05-08): selected_units[i].v4_candidates 추가 "
|
||||
@@ -4999,10 +5602,116 @@ def run_phase_z2_mvp1(
|
||||
"→ per-section singles (each own rank-1 V4 evidence + raw_content 보존). "
|
||||
"guardrails: coverage equality / beneficial split (≥1 non-reject) / "
|
||||
"layout cap (≤4 units). imp48_resplit audit additive. "
|
||||
"logic 무변 — runtime 결과 동일. Step 9 application_plan input."
|
||||
"logic 무변 — runtime 결과 동일. Step 9 application_plan input. "
|
||||
"IMP-43 (#72) u3: _reuse_snapshot.json sidecar written next to "
|
||||
"this artifact (run_dir level) for future --reuse-from (u4) "
|
||||
"consumption. Optional sidecar — write failure warns + continues."
|
||||
),
|
||||
)
|
||||
|
||||
# IMP-43 (#72) u3 — write Step 6 reuse snapshot sidecar AFTER the
|
||||
# step06 artifact. The sidecar captures the in-memory state that
|
||||
# downstream steps need but that the canonical step02 / step05 /
|
||||
# step06 artifacts do not preserve in a deserialize-ready form (e.g.
|
||||
# ``CompositionUnit`` instances, raw ``comp_debug``, untruncated
|
||||
# ``v4_fallback_traces``, pre-override ``layout_preset``). Helper
|
||||
# warns + returns ``None`` on failure — does NOT abort the run.
|
||||
# Restore wiring (``--reuse-from``) lands in u4.
|
||||
_write_reuse_snapshot(
|
||||
run_dir,
|
||||
mdx_source_text=mdx_source_text,
|
||||
slide_title=slide_title,
|
||||
slide_footer=slide_footer,
|
||||
sections=sections,
|
||||
stage0_adapter_diagnostics=stage0_adapter_diagnostics,
|
||||
stage0_normalized_assets=stage0_normalized_assets,
|
||||
v4_evidence=v4_evidence_list,
|
||||
layout_preset_pre_override=layout_preset,
|
||||
units=units,
|
||||
comp_debug=comp_debug,
|
||||
v4_fallback_traces=v4_fallback_traces,
|
||||
ai_preflight=ai_preflight,
|
||||
)
|
||||
else:
|
||||
# IMP-43 (#72) u5 — reuse path: restore Steps 0/1/2/5/6 state
|
||||
# from prev_run snapshot. u4b's execute_reuse_from_or_fail_closed
|
||||
# handles all nine fail-closed axes (prev_run_dir_missing,
|
||||
# snapshot_corrupt_json, mdx_sha256_mismatch, etc.) — on success
|
||||
# it returns ``(prev_run_dir, copied_artifacts, snapshot)``;
|
||||
# any reachable failure terminates the process before this branch
|
||||
# binds a local.
|
||||
#
|
||||
# State variable shape matches the locals produced by Steps 0-6
|
||||
# above so the Step 7+ block reads them transparently:
|
||||
# ai_preflight : Step 0 preflight dict
|
||||
# slide_title / slide_footer : parse_mdx output
|
||||
# sections : list[MdxSection], post-align
|
||||
# stage0_adapter_diagnostics : Stage 0 adapter trace dict
|
||||
# stage0_normalized_assets : Step 3 handoff dict (popups/...)
|
||||
# v4_evidence_list : list[dict] (Step 5 artifact)
|
||||
# layout_preset : Step 6 post-IMP-48 preset
|
||||
# units : list[CompositionUnit]
|
||||
# comp_debug : Step 6 debug dict
|
||||
# v4_fallback_traces : dict[sid -> trace dict]
|
||||
#
|
||||
# NOT serialized (deterministic from external sources or restored
|
||||
# sections — recomputed here):
|
||||
# v4 : load_v4_result() — V4_RESULT_PATH on disk
|
||||
# section_alias_by_id : derived from restored sections
|
||||
#
|
||||
# u1 guard ensures override_layout is None on the reuse path, so
|
||||
# layout_override_applied / auto_layout_preset reflect the
|
||||
# restored Step 6 preset for the Step 7 artifact.
|
||||
mdx_source_text = mdx_path.read_text(encoding="utf-8")
|
||||
(run_dir / "steps").mkdir(exist_ok=True)
|
||||
_prev_run_dir, _copied_artifacts, _snapshot = execute_reuse_from_or_fail_closed(
|
||||
reuse_from=reuse_from,
|
||||
new_run_dir=run_dir,
|
||||
mdx_source_text=mdx_source_text,
|
||||
)
|
||||
ai_preflight = _snapshot["ai_preflight"]["value"]
|
||||
slide_title = _snapshot["slide_title"]["value"]
|
||||
slide_footer = _snapshot["slide_footer"]["value"]
|
||||
sections = _rehydrate_mdx_sections_from_snapshot(_snapshot)
|
||||
stage0_adapter_diagnostics = _snapshot["stage0_adapter_diagnostics"]["value"]
|
||||
stage0_normalized_assets = _snapshot["stage0_normalized_assets"]["value"]
|
||||
v4_evidence_list = _snapshot["v4_evidence"]["value"]
|
||||
layout_preset = _snapshot["layout_preset_pre_override"]["value"]
|
||||
units = _rehydrate_composition_units_from_snapshot(_snapshot)
|
||||
comp_debug = _snapshot["comp_debug"]["value"]
|
||||
v4_fallback_traces = _snapshot["v4_fallback_traces"]["value"]
|
||||
v4 = load_v4_result()
|
||||
section_alias_by_id = {
|
||||
s.section_id: list(getattr(s, "v4_alias_keys", []) or [])
|
||||
for s in sections
|
||||
}
|
||||
auto_layout_preset = layout_preset
|
||||
layout_override_applied = False
|
||||
# IMP-43 (#72) u4 fix — shared Step 7+ block reads
|
||||
# ``section_assignment_plan`` unconditionally at the render_records
|
||||
# gate below, and ``section_assignment_summary`` is mirrored into
|
||||
# comp_debug via the normal-path override branch. Both stay at
|
||||
# their "no override applied" defaults on the reuse path because
|
||||
# u1's fail-closed guard already rejected --override-section-
|
||||
# assignment when --reuse-from is set. Without these explicit
|
||||
# defaults the reuse branch falls through to ``if
|
||||
# section_assignment_plan is not None:`` (line ~5754) with an
|
||||
# unbound local and the run aborts with UnboundLocalError before
|
||||
# Step 7 can begin (see Codex #14 rewind report).
|
||||
section_assignment_plan: Optional[list[dict]] = None
|
||||
section_assignment_summary: Optional[dict] = None
|
||||
_write_reuse_marker(
|
||||
run_dir,
|
||||
prev_run_id=reuse_from,
|
||||
copied_artifacts=_copied_artifacts,
|
||||
)
|
||||
print(
|
||||
f" reuse : sections={len(sections)} "
|
||||
f"({[s.section_id for s in sections]}), "
|
||||
f"units={len(units)}, layout={layout_preset}, "
|
||||
f"prev_run_id={reuse_from}"
|
||||
)
|
||||
|
||||
# 5. Per-unit: synthesize MdxSection → mapper → assets → zone data
|
||||
# mapper FitError 는 catch — 자동 파이프라인은 다른 zone 계속 진행. abort X.
|
||||
positions = LAYOUT_PRESETS[layout_preset]["positions"]
|
||||
@@ -7211,6 +7920,28 @@ if __name__ == "__main__":
|
||||
"settings.ai_fallback_auto_cache=True for this run."
|
||||
),
|
||||
)
|
||||
# IMP-43 (#72) u1 — incremental rerun reuse pointer. Reuse target
|
||||
# = Step 0/1/2/5/6 deterministic artifacts from a prior run; Step 7
|
||||
# onward re-executes against the new frame overrides. Only frame
|
||||
# overrides preserve the reusable subset (Stage 2 boundary lock);
|
||||
# layout/geometry/section/image overrides invalidate it and are
|
||||
# rejected by the post-merge guard below. Signature threading +
|
||||
# snapshot copy/restore land in u5 and u4 respectively; this unit
|
||||
# only adds the CLI surface + fail-closed precondition guard.
|
||||
parser.add_argument(
|
||||
"--reuse-from",
|
||||
dest="reuse_from",
|
||||
default=None,
|
||||
metavar="PREV_RUN_ID",
|
||||
help=(
|
||||
"Reuse Step 0/1/2/5/6 artifacts from a previous run id "
|
||||
"(directory under data/runs/<PREV_RUN_ID>/phase_z2) and resume "
|
||||
"execution at Step 7. Only --override-frame is preserved; "
|
||||
"--override-layout / --override-zone-geometry / "
|
||||
"--override-section-assignment / --override-image invalidate "
|
||||
"the reusable boundary and will be rejected."
|
||||
),
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.auto_cache:
|
||||
@@ -7436,6 +8167,37 @@ if __name__ == "__main__":
|
||||
continue
|
||||
overrides_images = _accepted_img
|
||||
|
||||
# IMP-43 (#72) u1 — fail-closed reuse_from precondition guard.
|
||||
# Placed AFTER the user_overrides.json merge so persisted overrides
|
||||
# are evaluated against the same reuse boundary as CLI overrides
|
||||
# (Stage 2 lock: "fail-closed guard after user_overrides.json merge
|
||||
# and before dispatch"). Reuse target = Step 0/1/2/5/6 deterministic
|
||||
# artifacts; only frame overrides preserve that subset. layout /
|
||||
# zone_geometry / zone_section / image overrides each invalidate at
|
||||
# least one of Step 0/1/2/5/6 and must reject. Frame-only is allowed
|
||||
# (no rejected axes → falls through to dispatch). Error stderr names
|
||||
# every rejected axis so the user can either drop the rejected axes
|
||||
# or rerun without --reuse-from.
|
||||
if args.reuse_from is not None:
|
||||
_rejected_axes: list[str] = []
|
||||
if _final_override_layout is not None:
|
||||
_rejected_axes.append("layout")
|
||||
if overrides_geoms:
|
||||
_rejected_axes.append("zone_geometry")
|
||||
if overrides_section_assignments:
|
||||
_rejected_axes.append("zone_section")
|
||||
if overrides_images:
|
||||
_rejected_axes.append("image")
|
||||
if _rejected_axes:
|
||||
print(
|
||||
f"[error] --reuse-from incompatible with override axes: "
|
||||
f"{', '.join(_rejected_axes)}. Only --override-frame is "
|
||||
f"preserved across Step 0/1/2/5/6 reuse; drop the rejected "
|
||||
f"overrides or rerun without --reuse-from.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(2)
|
||||
|
||||
run_phase_z2_mvp1(
|
||||
args.mdx_path,
|
||||
args.run_id,
|
||||
@@ -7444,4 +8206,5 @@ if __name__ == "__main__":
|
||||
override_zone_geometries=overrides_geoms or None,
|
||||
override_section_assignments=overrides_section_assignments or None,
|
||||
override_image_overrides=overrides_images or None,
|
||||
reuse_from=args.reuse_from,
|
||||
)
|
||||
|
||||
301
src/phase_z2_reuse_snapshot.py
Normal file
301
src/phase_z2_reuse_snapshot.py
Normal file
@@ -0,0 +1,301 @@
|
||||
"""IMP-43 (#72) u2 — Step 6 reuse snapshot schema (JSON-only).
|
||||
|
||||
Stage 2 plan (locked) — ``--reuse-from PREV_RUN_ID`` reuses the
|
||||
Step 0 / 1 / 2 / 5 / 6 deterministic artifact subset plus the
|
||||
in-memory state that downstream steps need but that the existing
|
||||
``step02_normalized.json`` / ``step05_v4_evidence.json`` /
|
||||
``step06_composition_plan.json`` artifacts do not capture in a
|
||||
deserialize-ready form (e.g. ``CompositionUnit`` instances,
|
||||
``comp_debug``, ``v4_fallback_traces`` raw map, pre-override
|
||||
``layout_preset``). This module owns the schema for the additional
|
||||
``_reuse_snapshot.json`` sidecar written next to ``step06_composition_plan.json``.
|
||||
|
||||
Scope (u2 only, Stage 2 unit split):
|
||||
* Pure schema + serializers + validator. No file I/O.
|
||||
* JSON-only — pickle is forbidden per Stage 2 guardrails.
|
||||
* Provenance per top-level field: ``{value, source_path, upstream_step}``.
|
||||
* ``mdx_sha256`` integrity key — ``--reuse-from`` must fail closed when
|
||||
the prev run's MDX bytes don't match the current MDX bytes.
|
||||
* ``schema_version`` — bumped on any non-additive shape change.
|
||||
|
||||
Out of scope (deferred to later units):
|
||||
* Writing the snapshot into the run_dir (u3).
|
||||
* Copy / restore on ``--reuse-from`` (u4).
|
||||
* Fail-closed snapshot/path errors at restore time (u4b).
|
||||
* Threading ``reuse_from`` through ``run_phase_z2_mvp1`` (u5).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
SNAPSHOT_VERSION = 1
|
||||
SNAPSHOT_FILENAME = "_reuse_snapshot.json"
|
||||
|
||||
|
||||
# Required top-level keys. Bare scalars (no provenance wrapper):
|
||||
# - schema_version (contract key)
|
||||
# - mdx_sha256 (integrity key)
|
||||
# All other keys are wrapped {value, source_path, upstream_step}.
|
||||
REQUIRED_TOP_LEVEL_KEYS: tuple[str, ...] = (
|
||||
"schema_version",
|
||||
"mdx_sha256",
|
||||
"slide_title",
|
||||
"slide_footer",
|
||||
"sections",
|
||||
"stage0_adapter_diagnostics",
|
||||
"stage0_normalized_assets",
|
||||
"v4_evidence",
|
||||
"layout_preset_pre_override",
|
||||
"units",
|
||||
"comp_debug",
|
||||
"v4_fallback_traces",
|
||||
"ai_preflight",
|
||||
)
|
||||
|
||||
_BARE_KEYS: frozenset[str] = frozenset({"schema_version", "mdx_sha256"})
|
||||
|
||||
|
||||
def _wrap(value: Any, *, source_path: str, upstream_step: str) -> dict[str, Any]:
|
||||
return {
|
||||
"value": value,
|
||||
"source_path": source_path,
|
||||
"upstream_step": upstream_step,
|
||||
}
|
||||
|
||||
|
||||
def serialize_section(section: Any) -> dict[str, Any]:
|
||||
"""Serialize an ``MdxSection``-shaped object into a JSON-safe dict.
|
||||
|
||||
Duck-typed: accepts the production ``MdxSection`` dataclass or any
|
||||
object exposing the same attribute names. Preserves the subset of
|
||||
fields needed to reconstruct downstream pipeline behavior on the
|
||||
reuse path.
|
||||
"""
|
||||
return {
|
||||
"section_id": section.section_id,
|
||||
"section_num": section.section_num,
|
||||
"title": section.title,
|
||||
"raw_content": section.raw_content,
|
||||
"heading_number": getattr(section, "heading_number", None),
|
||||
"v4_alias_keys": list(getattr(section, "v4_alias_keys", []) or []),
|
||||
"sub_sections": list(getattr(section, "sub_sections", []) or []),
|
||||
}
|
||||
|
||||
|
||||
def serialize_unit(unit: Any) -> dict[str, Any]:
|
||||
"""Serialize a ``CompositionUnit``-shaped object into a JSON-safe dict.
|
||||
|
||||
``v4_candidates`` entries are V4Match-duck-typed per the
|
||||
CompositionUnit docstring; each is unwrapped to its 6 named
|
||||
attributes so the snapshot file does not pin V4Match's dataclass
|
||||
layout. ``v4_rank`` is included so the reuse path's Step 9
|
||||
application-plan payload (``_build_application_plan_unit``)
|
||||
remains byte-equivalent to the full-rerun path — full rerun stamps
|
||||
each candidate's rank via ``_v4_match_from_judgment`` (e.g. 1, 2,
|
||||
3, …) and Step 9 surfaces it under ``v4_candidates[i].v4_rank``.
|
||||
Persisting it here lets the rehydrated ``_RehydratedV4Candidate``
|
||||
expose the same attribute end-to-end and avoids None drift in the
|
||||
Step 13 equivalence comparison (u7a).
|
||||
"""
|
||||
return {
|
||||
"source_section_ids": list(unit.source_section_ids),
|
||||
"merge_type": unit.merge_type,
|
||||
"frame_template_id": unit.frame_template_id,
|
||||
"frame_id": unit.frame_id,
|
||||
"frame_number": unit.frame_number,
|
||||
"confidence": float(unit.confidence),
|
||||
"label": unit.label,
|
||||
"phase_z_status": unit.phase_z_status,
|
||||
"raw_content": unit.raw_content,
|
||||
"title": unit.title,
|
||||
"v4_rank": unit.v4_rank,
|
||||
"selection_path": unit.selection_path,
|
||||
"fallback_reason": unit.fallback_reason,
|
||||
"score": float(unit.score),
|
||||
"rationale": dict(unit.rationale or {}),
|
||||
"auto_selectable": bool(unit.auto_selectable),
|
||||
"filter_reasons": list(unit.filter_reasons or []),
|
||||
"notes": list(unit.notes or []),
|
||||
"v4_candidates": [
|
||||
{
|
||||
"template_id": c.template_id,
|
||||
"frame_id": c.frame_id,
|
||||
"frame_number": c.frame_number,
|
||||
"confidence": float(c.confidence),
|
||||
"label": c.label,
|
||||
"v4_rank": getattr(c, "v4_rank", None),
|
||||
}
|
||||
for c in (unit.v4_candidates or [])
|
||||
],
|
||||
"provisional": bool(getattr(unit, "provisional", False)),
|
||||
}
|
||||
|
||||
|
||||
def build_snapshot(
|
||||
*,
|
||||
mdx_sha256: str,
|
||||
slide_title: Optional[str],
|
||||
slide_footer: Optional[str],
|
||||
sections: list,
|
||||
stage0_adapter_diagnostics: Optional[dict],
|
||||
stage0_normalized_assets: Optional[dict],
|
||||
v4_evidence: list,
|
||||
layout_preset_pre_override: Optional[str],
|
||||
units: list,
|
||||
comp_debug: Optional[dict],
|
||||
v4_fallback_traces: Optional[dict],
|
||||
ai_preflight: Optional[dict],
|
||||
) -> dict[str, Any]:
|
||||
"""Build a JSON-serializable Step 6 reuse snapshot with provenance.
|
||||
|
||||
Each top-level entry — except the two bare contract / integrity
|
||||
keys (``schema_version``, ``mdx_sha256``) — is wrapped with
|
||||
``{value, source_path, upstream_step}``.
|
||||
|
||||
The function calls ``json.dumps(snapshot)`` at the end to enforce
|
||||
JSON-safety at build time: any latent non-JSON value (set, Path,
|
||||
dataclass instance, etc.) raises ``TypeError`` at the call site,
|
||||
not later at restore.
|
||||
"""
|
||||
snapshot: dict[str, Any] = {
|
||||
"schema_version": SNAPSHOT_VERSION,
|
||||
"mdx_sha256": mdx_sha256,
|
||||
"slide_title": _wrap(
|
||||
slide_title,
|
||||
source_path="steps/step02_normalized.json#/slide_title",
|
||||
upstream_step="step02",
|
||||
),
|
||||
"slide_footer": _wrap(
|
||||
slide_footer,
|
||||
source_path="steps/step02_normalized.json#/slide_footer",
|
||||
upstream_step="step02",
|
||||
),
|
||||
"sections": _wrap(
|
||||
[serialize_section(s) for s in sections],
|
||||
source_path="steps/step02_normalized.json#/sections",
|
||||
upstream_step="step02",
|
||||
),
|
||||
"stage0_adapter_diagnostics": _wrap(
|
||||
dict(stage0_adapter_diagnostics or {}),
|
||||
source_path="steps/step02_normalized.json#/stage0_adapter_diagnostics",
|
||||
upstream_step="step02",
|
||||
),
|
||||
"stage0_normalized_assets": _wrap(
|
||||
dict(stage0_normalized_assets or {}),
|
||||
source_path="steps/step02_normalized.json#/stage0_normalized_assets",
|
||||
upstream_step="step02",
|
||||
),
|
||||
"v4_evidence": _wrap(
|
||||
list(v4_evidence or []),
|
||||
source_path="steps/step05_v4_evidence.json#/evidence_per_section",
|
||||
upstream_step="step05",
|
||||
),
|
||||
"layout_preset_pre_override": _wrap(
|
||||
layout_preset_pre_override,
|
||||
source_path="steps/step06_composition_plan.json#/layout_preset_decided",
|
||||
upstream_step="step06",
|
||||
),
|
||||
"units": _wrap(
|
||||
[serialize_unit(u) for u in units],
|
||||
source_path="steps/step06_composition_plan.json#/selected_units",
|
||||
upstream_step="step06",
|
||||
),
|
||||
"comp_debug": _wrap(
|
||||
dict(comp_debug or {}),
|
||||
source_path="steps/step06_composition_plan.json#/*",
|
||||
upstream_step="step06",
|
||||
),
|
||||
"v4_fallback_traces": _wrap(
|
||||
dict(v4_fallback_traces or {}),
|
||||
# v4_fallback_traces is assembled inside run_phase_z2_mvp1
|
||||
# (see phase_z2_pipeline.py around the Step 5/6 boundary) and
|
||||
# surfaces only partially into step06_composition_plan.json
|
||||
# via the v4_fallback_summary / imp48_resplit fields. The
|
||||
# canonical untruncated source is the in-memory dict at end
|
||||
# of Step 6 — that's what the reuse path needs.
|
||||
source_path="phase_z2_pipeline.run_phase_z2_mvp1::v4_fallback_traces",
|
||||
upstream_step="step06",
|
||||
),
|
||||
"ai_preflight": _wrap(
|
||||
dict(ai_preflight or {}),
|
||||
source_path="steps/step00_preconditions.json#/ai_preflight",
|
||||
upstream_step="step00",
|
||||
),
|
||||
}
|
||||
json.dumps(snapshot)
|
||||
return snapshot
|
||||
|
||||
|
||||
class SnapshotValidationError(ValueError):
|
||||
"""Raised by ``validate_snapshot`` when the snapshot is structurally
|
||||
unusable or fails the ``mdx_sha256`` integrity check.
|
||||
|
||||
Subclass of ``ValueError`` so existing ``except ValueError`` callers
|
||||
(u4b will add a tighter ``except SnapshotValidationError``) still
|
||||
catch it without escaping to the outer CLI.
|
||||
"""
|
||||
|
||||
|
||||
def validate_snapshot(
|
||||
snapshot: Any,
|
||||
*,
|
||||
expected_mdx_sha256: str,
|
||||
) -> None:
|
||||
"""Validate a loaded snapshot dict (fail-closed).
|
||||
|
||||
Raises ``SnapshotValidationError`` when:
|
||||
* ``snapshot`` is not a dict
|
||||
* ``schema_version`` is missing or != ``SNAPSHOT_VERSION``
|
||||
* ``mdx_sha256`` is missing, non-string, or doesn't match
|
||||
``expected_mdx_sha256``
|
||||
* any required top-level key is missing
|
||||
* a wrapped entry doesn't expose ``{value, source_path, upstream_step}``
|
||||
|
||||
Returns ``None`` on success.
|
||||
|
||||
Callers (u4b) translate the raised error into an exit-code-2 abort
|
||||
with the failing axis surfaced as `value + path + upstream`
|
||||
(factual-verification guardrail).
|
||||
"""
|
||||
if not isinstance(snapshot, dict):
|
||||
raise SnapshotValidationError(
|
||||
f"snapshot is not a dict (got {type(snapshot).__name__})"
|
||||
)
|
||||
|
||||
version = snapshot.get("schema_version")
|
||||
if version != SNAPSHOT_VERSION:
|
||||
raise SnapshotValidationError(
|
||||
f"schema_version mismatch: expected {SNAPSHOT_VERSION!r}, got {version!r}"
|
||||
)
|
||||
|
||||
actual_sha = snapshot.get("mdx_sha256")
|
||||
if not isinstance(actual_sha, str) or not actual_sha:
|
||||
raise SnapshotValidationError(
|
||||
f"mdx_sha256 missing or non-string: got {actual_sha!r}"
|
||||
)
|
||||
if actual_sha != expected_mdx_sha256:
|
||||
raise SnapshotValidationError(
|
||||
f"mdx_sha256 mismatch: snapshot={actual_sha!r} "
|
||||
f"expected={expected_mdx_sha256!r}"
|
||||
)
|
||||
|
||||
missing = [k for k in REQUIRED_TOP_LEVEL_KEYS if k not in snapshot]
|
||||
if missing:
|
||||
raise SnapshotValidationError(
|
||||
f"missing required keys: {missing!r}"
|
||||
)
|
||||
|
||||
for key, entry in snapshot.items():
|
||||
if key in _BARE_KEYS:
|
||||
continue
|
||||
if not isinstance(entry, dict):
|
||||
raise SnapshotValidationError(
|
||||
f"key {key!r}: expected wrapper dict, got {type(entry).__name__}"
|
||||
)
|
||||
for field_name in ("value", "source_path", "upstream_step"):
|
||||
if field_name not in entry:
|
||||
raise SnapshotValidationError(
|
||||
f"key {key!r}: wrapper missing {field_name!r}"
|
||||
)
|
||||
383
tests/test_phase_z2_cli_reuse_from.py
Normal file
383
tests/test_phase_z2_cli_reuse_from.py
Normal file
@@ -0,0 +1,383 @@
|
||||
"""IMP-43 (#72) u1 + u5 — focused tests for the ``--reuse-from`` CLI surface.
|
||||
|
||||
u1 scope (per the Stage 2 Exit Report):
|
||||
|
||||
- argparse flag ``--reuse-from PREV_RUN_ID`` parses without error.
|
||||
- Fail-closed precondition guard runs AFTER the ``user_overrides.json``
|
||||
merge and BEFORE dispatch. With ``--reuse-from`` set, the guard
|
||||
must:
|
||||
* accept frame-only overrides (or no overrides at all);
|
||||
* reject layout / zone-geometry / zone-section / image overrides
|
||||
with ``sys.exit(2)`` whose stderr names every rejected axis.
|
||||
|
||||
u5 scope (added 2026-05-24):
|
||||
|
||||
- ``reuse_from`` is keyword-only on ``run_phase_z2_mvp1`` and defaults
|
||||
to ``None`` so the absent-flag path preserves pre-u5 behaviour.
|
||||
- The CLI dispatch forwards ``args.reuse_from`` verbatim — both
|
||||
``None`` (flag absent) and ``"PREV_RUN_ID"`` (flag present) reach
|
||||
the kwarg unchanged.
|
||||
- The fake ``run_phase_z2_mvp1`` stub below mirrors the production
|
||||
signature so the forwarding lock would fail loudly on any
|
||||
forwarding regression.
|
||||
|
||||
The harness mirrors ``tests/test_phase_z2_cli_overrides.py`` — the
|
||||
``if __name__ == "__main__"`` block of ``src.phase_z2_pipeline`` is
|
||||
exec'd inside the module's namespace after monkeypatching
|
||||
``run_phase_z2_mvp1`` with a recording stub. The persistence fallback
|
||||
is silenced by redirecting ``src.user_overrides_io.DEFAULT_OVERRIDES_ROOT``
|
||||
to a clean tmp directory so persisted state from prior runs cannot bleed
|
||||
into the parser-only assertions here.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
import src.phase_z2_pipeline as _pz2
|
||||
import src.user_overrides_io as _io
|
||||
|
||||
|
||||
# -- harness ---------------------------------------------------------------
|
||||
|
||||
|
||||
def _exec_main_block(
|
||||
captured: dict[str, Any], argv: list[str], monkeypatch
|
||||
) -> None:
|
||||
"""Run the ``__main__`` body of phase_z2_pipeline.py with a fake
|
||||
``run_phase_z2_mvp1`` so its kwargs are observable. Captures the
|
||||
presence of the call (``called=True``) so guard-driven early exits
|
||||
can be distinguished from a successful parse + dispatch."""
|
||||
|
||||
def _fake_run(
|
||||
mdx_path,
|
||||
run_id,
|
||||
*,
|
||||
override_layout=None,
|
||||
override_frames=None,
|
||||
override_zone_geometries=None,
|
||||
override_section_assignments=None,
|
||||
override_image_overrides=None,
|
||||
reuse_from=None,
|
||||
):
|
||||
captured["called"] = True
|
||||
captured["mdx_path"] = mdx_path
|
||||
captured["run_id"] = run_id
|
||||
captured["override_layout"] = override_layout
|
||||
captured["override_frames"] = override_frames
|
||||
captured["override_zone_geometries"] = override_zone_geometries
|
||||
captured["override_section_assignments"] = override_section_assignments
|
||||
captured["override_image_overrides"] = override_image_overrides
|
||||
captured["reuse_from"] = reuse_from
|
||||
|
||||
monkeypatch.setattr(_pz2, "run_phase_z2_mvp1", _fake_run)
|
||||
monkeypatch.setattr(sys, "argv", argv)
|
||||
|
||||
src_path = Path(_pz2.__file__)
|
||||
source = src_path.read_text(encoding="utf-8")
|
||||
tree = ast.parse(source)
|
||||
for node in tree.body:
|
||||
if (
|
||||
isinstance(node, ast.If)
|
||||
and isinstance(node.test, ast.Compare)
|
||||
and isinstance(node.test.left, ast.Name)
|
||||
and node.test.left.id == "__name__"
|
||||
):
|
||||
block = ast.Module(body=node.body, type_ignores=[])
|
||||
exec(compile(block, str(src_path), "exec"), _pz2.__dict__)
|
||||
return
|
||||
raise AssertionError("no `if __name__ == '__main__'` block found")
|
||||
|
||||
|
||||
def _redirect_overrides_root(tmp_path: Path, monkeypatch) -> None:
|
||||
"""Isolate the persistence fallback so file state never leaks in."""
|
||||
monkeypatch.setattr(_io, "DEFAULT_OVERRIDES_ROOT", tmp_path)
|
||||
|
||||
|
||||
# -- success paths --------------------------------------------------------
|
||||
|
||||
|
||||
def test_reuse_from_alone_parses_and_dispatches(tmp_path, monkeypatch):
|
||||
"""``--reuse-from`` with no other overrides must parse cleanly and
|
||||
fall through to dispatch (frame-only / empty override is allowed).
|
||||
u5 (2026-05-24): also asserts the CLI threads ``args.reuse_from``
|
||||
verbatim into the ``reuse_from`` kwarg."""
|
||||
_redirect_overrides_root(tmp_path, monkeypatch)
|
||||
captured: dict[str, Any] = {}
|
||||
_exec_main_block(
|
||||
captured,
|
||||
[
|
||||
"src.phase_z2_pipeline",
|
||||
"03.mdx",
|
||||
"--reuse-from",
|
||||
"03__DX_20260508025134",
|
||||
],
|
||||
monkeypatch,
|
||||
)
|
||||
|
||||
assert captured.get("called") is True
|
||||
# u5 — verbatim threading.
|
||||
assert captured["reuse_from"] == "03__DX_20260508025134"
|
||||
|
||||
|
||||
def test_reuse_from_with_frame_override_dispatches(tmp_path, monkeypatch):
|
||||
"""Frame overrides ARE preserved across Step 0/1/2/5/6 reuse, so
|
||||
``--reuse-from`` + ``--override-frame`` must reach dispatch.
|
||||
u5: forwards both ``reuse_from`` and ``override_frames`` in the
|
||||
same call."""
|
||||
_redirect_overrides_root(tmp_path, monkeypatch)
|
||||
captured: dict[str, Any] = {}
|
||||
_exec_main_block(
|
||||
captured,
|
||||
[
|
||||
"src.phase_z2_pipeline",
|
||||
"03.mdx",
|
||||
"--reuse-from",
|
||||
"03__DX_20260508025134",
|
||||
"--override-frame",
|
||||
"03-1=frame_foo",
|
||||
],
|
||||
monkeypatch,
|
||||
)
|
||||
|
||||
assert captured.get("called") is True
|
||||
assert captured["override_frames"] == {"03-1": "frame_foo"}
|
||||
# u5 — frame override + reuse_from reach the kwarg simultaneously.
|
||||
assert captured["reuse_from"] == "03__DX_20260508025134"
|
||||
|
||||
|
||||
# -- u5 — flag-absent default + signature surface ------------------------
|
||||
|
||||
|
||||
def test_no_reuse_from_threads_none_kwarg(tmp_path, monkeypatch):
|
||||
"""u5 — when ``--reuse-from`` is absent, the kwarg must reach
|
||||
``run_phase_z2_mvp1`` as ``None`` (not omitted, not ``""``). This
|
||||
locks the "default None preserves current behavior" requirement
|
||||
from the Stage 2 plan §u5."""
|
||||
_redirect_overrides_root(tmp_path, monkeypatch)
|
||||
captured: dict[str, Any] = {}
|
||||
_exec_main_block(
|
||||
captured,
|
||||
["src.phase_z2_pipeline", "03.mdx"],
|
||||
monkeypatch,
|
||||
)
|
||||
|
||||
assert captured.get("called") is True
|
||||
assert captured["reuse_from"] is None
|
||||
|
||||
|
||||
def test_run_phase_z2_mvp1_signature_includes_reuse_from():
|
||||
"""Production signature lock — ``reuse_from`` must be a keyword-only
|
||||
parameter with default ``None``. Mirror of the entry-tests
|
||||
invariant; kept here so the CLI-surface test file fails loudly if
|
||||
the production signature drifts away from the dispatch contract."""
|
||||
import inspect
|
||||
|
||||
sig = inspect.signature(_pz2.run_phase_z2_mvp1)
|
||||
assert "reuse_from" in sig.parameters, list(sig.parameters)
|
||||
param = sig.parameters["reuse_from"]
|
||||
assert param.kind is inspect.Parameter.KEYWORD_ONLY, param.kind
|
||||
assert param.default is None, param.default
|
||||
|
||||
|
||||
# -- fail-closed (single-axis rejection) ----------------------------------
|
||||
|
||||
|
||||
def test_reuse_from_with_layout_override_exits(tmp_path, monkeypatch, capsys):
|
||||
_redirect_overrides_root(tmp_path, monkeypatch)
|
||||
captured: dict[str, Any] = {}
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
_exec_main_block(
|
||||
captured,
|
||||
[
|
||||
"src.phase_z2_pipeline",
|
||||
"03.mdx",
|
||||
"--reuse-from",
|
||||
"03__DX_20260508025134",
|
||||
"--override-layout",
|
||||
"horizontal-2",
|
||||
],
|
||||
monkeypatch,
|
||||
)
|
||||
|
||||
assert excinfo.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "--reuse-from incompatible with override axes" in err
|
||||
assert "layout" in err
|
||||
assert captured.get("called") is not True
|
||||
|
||||
|
||||
def test_reuse_from_with_zone_geometry_override_exits(
|
||||
tmp_path, monkeypatch, capsys
|
||||
):
|
||||
_redirect_overrides_root(tmp_path, monkeypatch)
|
||||
captured: dict[str, Any] = {}
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
_exec_main_block(
|
||||
captured,
|
||||
[
|
||||
"src.phase_z2_pipeline",
|
||||
"03.mdx",
|
||||
"--reuse-from",
|
||||
"03__DX_20260508025134",
|
||||
"--override-zone-geometry",
|
||||
"top=0,0,1,0.3",
|
||||
],
|
||||
monkeypatch,
|
||||
)
|
||||
|
||||
assert excinfo.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "--reuse-from incompatible with override axes" in err
|
||||
assert "zone_geometry" in err
|
||||
assert captured.get("called") is not True
|
||||
|
||||
|
||||
def test_reuse_from_with_zone_section_override_exits(
|
||||
tmp_path, monkeypatch, capsys
|
||||
):
|
||||
_redirect_overrides_root(tmp_path, monkeypatch)
|
||||
captured: dict[str, Any] = {}
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
_exec_main_block(
|
||||
captured,
|
||||
[
|
||||
"src.phase_z2_pipeline",
|
||||
"03.mdx",
|
||||
"--reuse-from",
|
||||
"03__DX_20260508025134",
|
||||
"--override-section-assignment",
|
||||
"top=03-1",
|
||||
],
|
||||
monkeypatch,
|
||||
)
|
||||
|
||||
assert excinfo.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "--reuse-from incompatible with override axes" in err
|
||||
assert "zone_section" in err
|
||||
assert captured.get("called") is not True
|
||||
|
||||
|
||||
def test_reuse_from_with_image_override_exits(tmp_path, monkeypatch, capsys):
|
||||
_redirect_overrides_root(tmp_path, monkeypatch)
|
||||
captured: dict[str, Any] = {}
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
_exec_main_block(
|
||||
captured,
|
||||
[
|
||||
"src.phase_z2_pipeline",
|
||||
"03.mdx",
|
||||
"--reuse-from",
|
||||
"03__DX_20260508025134",
|
||||
"--override-image",
|
||||
"img-abc=10,15,30,25",
|
||||
],
|
||||
monkeypatch,
|
||||
)
|
||||
|
||||
assert excinfo.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "--reuse-from incompatible with override axes" in err
|
||||
assert "image" in err
|
||||
assert captured.get("called") is not True
|
||||
|
||||
|
||||
# -- fail-closed (multi-axis aggregation) ---------------------------------
|
||||
|
||||
|
||||
def test_reuse_from_with_multiple_rejected_axes_lists_all(
|
||||
tmp_path, monkeypatch, capsys
|
||||
):
|
||||
"""Stderr must enumerate every rejected axis (not stop at first)."""
|
||||
_redirect_overrides_root(tmp_path, monkeypatch)
|
||||
captured: dict[str, Any] = {}
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
_exec_main_block(
|
||||
captured,
|
||||
[
|
||||
"src.phase_z2_pipeline",
|
||||
"03.mdx",
|
||||
"--reuse-from",
|
||||
"03__DX_20260508025134",
|
||||
"--override-layout",
|
||||
"horizontal-2",
|
||||
"--override-zone-geometry",
|
||||
"top=0,0,1,0.3",
|
||||
"--override-image",
|
||||
"img-abc=10,15,30,25",
|
||||
],
|
||||
monkeypatch,
|
||||
)
|
||||
|
||||
assert excinfo.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "layout" in err
|
||||
assert "zone_geometry" in err
|
||||
assert "image" in err
|
||||
assert captured.get("called") is not True
|
||||
|
||||
|
||||
# -- guard inactive when --reuse-from absent ------------------------------
|
||||
|
||||
|
||||
def test_no_reuse_from_layout_override_still_dispatches(
|
||||
tmp_path, monkeypatch
|
||||
):
|
||||
"""Without ``--reuse-from``, the guard must be silent — existing
|
||||
override behaviour is preserved end-to-end."""
|
||||
_redirect_overrides_root(tmp_path, monkeypatch)
|
||||
captured: dict[str, Any] = {}
|
||||
_exec_main_block(
|
||||
captured,
|
||||
[
|
||||
"src.phase_z2_pipeline",
|
||||
"03.mdx",
|
||||
"--override-layout",
|
||||
"horizontal-2",
|
||||
],
|
||||
monkeypatch,
|
||||
)
|
||||
|
||||
assert captured.get("called") is True
|
||||
assert captured["override_layout"] == "horizontal-2"
|
||||
|
||||
|
||||
# -- fail-closed honours persisted overrides ------------------------------
|
||||
|
||||
|
||||
def test_reuse_from_with_persisted_layout_override_exits(
|
||||
tmp_path, monkeypatch, capsys
|
||||
):
|
||||
"""The guard runs AFTER the user_overrides.json merge, so a layout
|
||||
persisted on disk (not on the CLI) must still reject when
|
||||
``--reuse-from`` is set. This locks the Stage 2 placement rule."""
|
||||
_redirect_overrides_root(tmp_path, monkeypatch)
|
||||
# Persist a layout override keyed by the MDX stem ``03``.
|
||||
overrides_dir = tmp_path
|
||||
overrides_dir.mkdir(parents=True, exist_ok=True)
|
||||
(overrides_dir / "03.json").write_text(
|
||||
'{"layout": "vertical-2"}', encoding="utf-8"
|
||||
)
|
||||
captured: dict[str, Any] = {}
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
_exec_main_block(
|
||||
captured,
|
||||
[
|
||||
"src.phase_z2_pipeline",
|
||||
"03.mdx",
|
||||
"--reuse-from",
|
||||
"03__DX_20260508025134",
|
||||
],
|
||||
monkeypatch,
|
||||
)
|
||||
|
||||
assert excinfo.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "--reuse-from incompatible with override axes" in err
|
||||
assert "layout" in err
|
||||
assert captured.get("called") is not True
|
||||
555
tests/test_phase_z2_reuse_from_entry.py
Normal file
555
tests/test_phase_z2_reuse_from_entry.py
Normal file
@@ -0,0 +1,555 @@
|
||||
"""IMP-43 (#72) u4 — focused tests for the --reuse-from entry helpers.
|
||||
|
||||
u4 scope (per the Stage 2 Exit Report):
|
||||
|
||||
- Pure path resolution, file copy, snapshot load+validate, MdxSection +
|
||||
CompositionUnit rehydration, and reuse-marker writing.
|
||||
- Helpers RAISE on missing artifacts / corrupt snapshot / mdx_sha256
|
||||
mismatch — u4b adds the stderr + sys.exit(2) translation and the
|
||||
prev_run_dir == new_run_dir accidental-write guard around them.
|
||||
- The kwarg threading + the in-``run_phase_z2_mvp1`` branch that
|
||||
invokes these helpers land in u5.
|
||||
|
||||
Tested helpers (``src/phase_z2_pipeline.py``):
|
||||
* ``_resolve_reuse_from_prev_run_dir``
|
||||
* ``_copy_reuse_artifacts_from_prev_run``
|
||||
* ``_load_and_validate_reuse_snapshot``
|
||||
* ``_rehydrate_mdx_sections_from_snapshot``
|
||||
* ``_rehydrate_composition_units_from_snapshot``
|
||||
* ``_write_reuse_marker``
|
||||
* ``_RehydratedV4Candidate`` (V4Match-shape duck type)
|
||||
* ``_REUSE_STEP_ARTIFACTS`` / ``REUSE_MARKER_FILENAME`` /
|
||||
``REUSE_MARKER_SCHEMA_VERSION``
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
import pytest
|
||||
|
||||
import src.phase_z2_pipeline as _pz2
|
||||
from src.phase_z2_composition import CompositionUnit
|
||||
from src.phase_z2_reuse_snapshot import (
|
||||
SNAPSHOT_FILENAME,
|
||||
SNAPSHOT_VERSION,
|
||||
SnapshotValidationError,
|
||||
build_snapshot,
|
||||
)
|
||||
|
||||
|
||||
# -- synthetic duck-typed inputs (mirror u3 test fixture) -----------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Section:
|
||||
section_id: str
|
||||
section_num: int
|
||||
title: str
|
||||
raw_content: str
|
||||
heading_number: Optional[str] = None
|
||||
v4_alias_keys: list = field(default_factory=list)
|
||||
sub_sections: list = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _V4Candidate:
|
||||
template_id: str
|
||||
frame_id: str
|
||||
frame_number: int
|
||||
confidence: float
|
||||
label: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Unit:
|
||||
source_section_ids: list
|
||||
merge_type: str
|
||||
frame_template_id: str
|
||||
frame_id: str
|
||||
frame_number: int
|
||||
confidence: float
|
||||
label: str
|
||||
phase_z_status: str
|
||||
raw_content: str
|
||||
title: str
|
||||
score: float
|
||||
v4_rank: Optional[int] = 1
|
||||
selection_path: str = "rank_1"
|
||||
fallback_reason: Optional[str] = None
|
||||
rationale: dict = field(default_factory=dict)
|
||||
auto_selectable: bool = True
|
||||
filter_reasons: list = field(default_factory=list)
|
||||
notes: list = field(default_factory=list)
|
||||
v4_candidates: list = field(default_factory=list)
|
||||
provisional: bool = False
|
||||
|
||||
|
||||
def _mdx_text() -> str:
|
||||
return "# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n"
|
||||
|
||||
|
||||
def _build_canonical_snapshot(
|
||||
*,
|
||||
mdx_source_text: Optional[str] = None,
|
||||
layout_preset: str = "single",
|
||||
) -> dict:
|
||||
text = mdx_source_text if mdx_source_text is not None else _mdx_text()
|
||||
cand = _V4Candidate(
|
||||
template_id="tpl_a",
|
||||
frame_id="fid_a",
|
||||
frame_number=13,
|
||||
confidence=0.91,
|
||||
label="use_as_is",
|
||||
)
|
||||
section = _Section(
|
||||
section_id="03-1",
|
||||
section_num=1,
|
||||
title="DX status",
|
||||
raw_content="- bullet one\n- bullet two",
|
||||
heading_number="3.1",
|
||||
v4_alias_keys=["03-1.1"],
|
||||
sub_sections=[],
|
||||
)
|
||||
unit = _Unit(
|
||||
source_section_ids=["03-1"],
|
||||
merge_type="single",
|
||||
frame_template_id="tpl_a",
|
||||
frame_id="fid_a",
|
||||
frame_number=13,
|
||||
confidence=0.91,
|
||||
label="use_as_is",
|
||||
phase_z_status="auto_renderable",
|
||||
raw_content="- bullet one\n- bullet two",
|
||||
title="DX status",
|
||||
score=0.91,
|
||||
v4_candidates=[cand],
|
||||
provisional=False,
|
||||
auto_selectable=True,
|
||||
filter_reasons=[],
|
||||
notes=["a note"],
|
||||
rationale={"weight": 1.0},
|
||||
)
|
||||
return build_snapshot(
|
||||
mdx_sha256=hashlib.sha256(text.encode("utf-8")).hexdigest(),
|
||||
slide_title="Slide",
|
||||
slide_footer=None,
|
||||
sections=[section],
|
||||
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
|
||||
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
|
||||
v4_evidence=[
|
||||
{
|
||||
"section_id": "03-1",
|
||||
"v4_candidates": [
|
||||
{
|
||||
"template_id": "tpl_a",
|
||||
"frame_id": "fid_a",
|
||||
"frame_number": 13,
|
||||
"confidence": 0.91,
|
||||
"label": "use_as_is",
|
||||
}
|
||||
],
|
||||
"candidate_status": "ok",
|
||||
}
|
||||
],
|
||||
layout_preset_pre_override=layout_preset,
|
||||
units=[unit],
|
||||
comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
|
||||
v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
|
||||
ai_preflight={"enabled": False, "skipped": True},
|
||||
)
|
||||
|
||||
|
||||
def _seed_prev_run_dir(prev_run_dir: Path, *, snapshot: dict) -> None:
|
||||
"""Populate ``prev_run_dir`` with the Step 0/1/2/5/6 artifacts plus
|
||||
the reuse snapshot — minimal but valid surface for u4 helpers."""
|
||||
(prev_run_dir / "steps").mkdir(parents=True, exist_ok=True)
|
||||
for fname in _pz2._REUSE_STEP_ARTIFACTS:
|
||||
# JSON-shaped surface — exact shape doesn't matter for u4 (the
|
||||
# copy helper doesn't introspect contents); just must exist.
|
||||
(prev_run_dir / "steps" / fname).write_text(
|
||||
f'{{"name": "{fname}"}}'
|
||||
if fname.endswith(".json")
|
||||
else "raw mdx body bytes",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(prev_run_dir / SNAPSHOT_FILENAME).write_text(
|
||||
json.dumps(snapshot, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
# -- _REUSE_STEP_ARTIFACTS constant ---------------------------------------
|
||||
|
||||
|
||||
def test_reuse_step_artifacts_locks_stage2_boundary():
|
||||
"""Stage 2 boundary lock — Step 0/1/2/5/6 artifacts only.
|
||||
Step 3/4 deliberately absent: step03 / step04 ARE written after
|
||||
Step 6 (around src/phase_z2_pipeline.py:5931 / 5964) before the
|
||||
Step 7 artifact (~6294), but both are emitted with
|
||||
step_status='trace-only' / pipeline_path_connected=False — they
|
||||
are diagnostic projections of the Step 6 debug_zones, not
|
||||
pipeline-path-connected inputs that Step 7+ rehydrate from."""
|
||||
assert _pz2._REUSE_STEP_ARTIFACTS == (
|
||||
"step00_preconditions.json",
|
||||
"step01_mdx_upload.json",
|
||||
"step01_mdx_source.md",
|
||||
"step02_normalized.json",
|
||||
"step05_v4_evidence.json",
|
||||
"step06_composition_plan.json",
|
||||
)
|
||||
|
||||
|
||||
def test_reuse_marker_filename_is_dotfile_at_run_dir_root():
|
||||
assert _pz2.REUSE_MARKER_FILENAME == "_reuse_marker.json"
|
||||
|
||||
|
||||
# -- _resolve_reuse_from_prev_run_dir -------------------------------------
|
||||
|
||||
|
||||
def test_resolve_prev_run_dir_returns_runs_dir_phase_z2_path():
|
||||
rv = _pz2._resolve_reuse_from_prev_run_dir("20260524_120000_phase_z2")
|
||||
expected = _pz2.RUNS_DIR / "20260524_120000_phase_z2" / "phase_z2"
|
||||
assert rv == expected
|
||||
|
||||
|
||||
def test_resolve_prev_run_dir_does_not_check_existence(tmp_path: Path):
|
||||
"""Pure path computation — must NOT touch the filesystem (u4b
|
||||
handles the missing-prev-run case)."""
|
||||
rv = _pz2._resolve_reuse_from_prev_run_dir("never_existed_run_id")
|
||||
assert isinstance(rv, Path)
|
||||
# The path does not actually exist; helper still returned cleanly.
|
||||
assert not rv.exists()
|
||||
|
||||
|
||||
# -- _copy_reuse_artifacts_from_prev_run ----------------------------------
|
||||
|
||||
|
||||
def test_copy_reuse_artifacts_copies_all_step_files(tmp_path: Path):
|
||||
prev = tmp_path / "prev" / "phase_z2"
|
||||
new = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev, snapshot=snap)
|
||||
|
||||
copied = _pz2._copy_reuse_artifacts_from_prev_run(prev, new)
|
||||
|
||||
for fname in _pz2._REUSE_STEP_ARTIFACTS:
|
||||
assert (new / "steps" / fname).exists(), f"missing copy: {fname}"
|
||||
assert copied[fname] == f"steps/{fname}"
|
||||
|
||||
|
||||
def test_copy_reuse_artifacts_copies_snapshot_to_run_dir_root(tmp_path: Path):
|
||||
prev = tmp_path / "prev" / "phase_z2"
|
||||
new = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev, snapshot=snap)
|
||||
|
||||
copied = _pz2._copy_reuse_artifacts_from_prev_run(prev, new)
|
||||
|
||||
# Snapshot lives at run_dir root (NOT under steps/) per u3 contract.
|
||||
assert (new / SNAPSHOT_FILENAME).exists()
|
||||
assert copied[SNAPSHOT_FILENAME] == SNAPSHOT_FILENAME
|
||||
|
||||
|
||||
def test_copy_reuse_artifacts_creates_steps_subdir_if_absent(tmp_path: Path):
|
||||
prev = tmp_path / "prev" / "phase_z2"
|
||||
new = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev, snapshot=snap)
|
||||
|
||||
# new_run_dir / steps does not yet exist
|
||||
assert not (new / "steps").exists()
|
||||
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
|
||||
assert (new / "steps").is_dir()
|
||||
|
||||
|
||||
def test_copy_reuse_artifacts_missing_step_raises_filenotfound(
|
||||
tmp_path: Path,
|
||||
):
|
||||
prev = tmp_path / "prev" / "phase_z2"
|
||||
new = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev, snapshot=snap)
|
||||
# Delete one of the required step artifacts.
|
||||
(prev / "steps" / "step05_v4_evidence.json").unlink()
|
||||
|
||||
with pytest.raises(FileNotFoundError) as ei:
|
||||
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
|
||||
msg = str(ei.value)
|
||||
assert "step05_v4_evidence.json" in msg
|
||||
assert "prev_run_dir" in msg
|
||||
|
||||
|
||||
def test_copy_reuse_artifacts_missing_snapshot_raises_filenotfound(
|
||||
tmp_path: Path,
|
||||
):
|
||||
prev = tmp_path / "prev" / "phase_z2"
|
||||
new = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev, snapshot=snap)
|
||||
(prev / SNAPSHOT_FILENAME).unlink()
|
||||
|
||||
with pytest.raises(FileNotFoundError) as ei:
|
||||
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
|
||||
assert SNAPSHOT_FILENAME in str(ei.value)
|
||||
|
||||
|
||||
def test_copy_reuse_artifacts_byte_identical_copy(tmp_path: Path):
|
||||
"""Bytes must match exactly — copy, not transform."""
|
||||
prev = tmp_path / "prev" / "phase_z2"
|
||||
new = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev, snapshot=snap)
|
||||
|
||||
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
|
||||
|
||||
for fname in _pz2._REUSE_STEP_ARTIFACTS:
|
||||
assert (
|
||||
(prev / "steps" / fname).read_bytes()
|
||||
== (new / "steps" / fname).read_bytes()
|
||||
)
|
||||
assert (
|
||||
(prev / SNAPSHOT_FILENAME).read_bytes()
|
||||
== (new / SNAPSHOT_FILENAME).read_bytes()
|
||||
)
|
||||
|
||||
|
||||
# -- _load_and_validate_reuse_snapshot ------------------------------------
|
||||
|
||||
|
||||
def test_load_and_validate_returns_snapshot_dict(tmp_path: Path):
|
||||
text = _mdx_text()
|
||||
snap = _build_canonical_snapshot(mdx_source_text=text)
|
||||
(tmp_path / SNAPSHOT_FILENAME).write_text(
|
||||
json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
|
||||
loaded = _pz2._load_and_validate_reuse_snapshot(
|
||||
tmp_path, mdx_source_text=text
|
||||
)
|
||||
assert loaded["schema_version"] == SNAPSHOT_VERSION
|
||||
assert loaded["slide_title"]["value"] == "Slide"
|
||||
|
||||
|
||||
def test_load_and_validate_mdx_sha256_mismatch_raises(tmp_path: Path):
|
||||
"""Snapshot was built for ``text_a`` but caller passes ``text_b``;
|
||||
u2 validator raises ``SnapshotValidationError`` (subclass of
|
||||
``ValueError``). u4b translates to exit 2 — here we only assert the
|
||||
raise."""
|
||||
text_a = "# Slide A\n"
|
||||
text_b = "# Slide B (different bytes)\n"
|
||||
snap = _build_canonical_snapshot(mdx_source_text=text_a)
|
||||
(tmp_path / SNAPSHOT_FILENAME).write_text(
|
||||
json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
|
||||
with pytest.raises(SnapshotValidationError) as ei:
|
||||
_pz2._load_and_validate_reuse_snapshot(
|
||||
tmp_path, mdx_source_text=text_b
|
||||
)
|
||||
assert "mdx_sha256 mismatch" in str(ei.value)
|
||||
|
||||
|
||||
def test_load_and_validate_corrupt_json_raises(tmp_path: Path):
|
||||
(tmp_path / SNAPSHOT_FILENAME).write_text(
|
||||
"{ not valid json", encoding="utf-8"
|
||||
)
|
||||
with pytest.raises(json.JSONDecodeError):
|
||||
_pz2._load_and_validate_reuse_snapshot(
|
||||
tmp_path, mdx_source_text=_mdx_text()
|
||||
)
|
||||
|
||||
|
||||
def test_load_and_validate_missing_snapshot_file_raises(tmp_path: Path):
|
||||
"""No snapshot at all — bare ``read_text`` raises FileNotFoundError.
|
||||
u4b translates this to exit 2 with a provenance message."""
|
||||
with pytest.raises(FileNotFoundError):
|
||||
_pz2._load_and_validate_reuse_snapshot(
|
||||
tmp_path, mdx_source_text=_mdx_text()
|
||||
)
|
||||
|
||||
|
||||
def test_load_and_validate_schema_version_mismatch_raises(tmp_path: Path):
|
||||
text = _mdx_text()
|
||||
snap = _build_canonical_snapshot(mdx_source_text=text)
|
||||
snap["schema_version"] = SNAPSHOT_VERSION + 1 # force mismatch
|
||||
(tmp_path / SNAPSHOT_FILENAME).write_text(
|
||||
json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
with pytest.raises(SnapshotValidationError) as ei:
|
||||
_pz2._load_and_validate_reuse_snapshot(
|
||||
tmp_path, mdx_source_text=text
|
||||
)
|
||||
assert "schema_version" in str(ei.value)
|
||||
|
||||
|
||||
# -- _rehydrate_mdx_sections_from_snapshot --------------------------------
|
||||
|
||||
|
||||
def test_rehydrate_sections_returns_mdxsection_instances():
|
||||
snap = _build_canonical_snapshot()
|
||||
sections = _pz2._rehydrate_mdx_sections_from_snapshot(snap)
|
||||
assert len(sections) == 1
|
||||
assert isinstance(sections[0], _pz2.MdxSection)
|
||||
assert sections[0].section_id == "03-1"
|
||||
assert sections[0].title == "DX status"
|
||||
assert sections[0].raw_content == "- bullet one\n- bullet two"
|
||||
|
||||
|
||||
def test_rehydrate_sections_preserves_heading_number_and_aliases():
|
||||
snap = _build_canonical_snapshot()
|
||||
sections = _pz2._rehydrate_mdx_sections_from_snapshot(snap)
|
||||
assert sections[0].heading_number == "3.1"
|
||||
assert sections[0].v4_alias_keys == ["03-1.1"]
|
||||
assert sections[0].sub_sections == []
|
||||
|
||||
|
||||
# -- _rehydrate_composition_units_from_snapshot ---------------------------
|
||||
|
||||
|
||||
def test_rehydrate_units_returns_composition_unit_instances():
|
||||
snap = _build_canonical_snapshot()
|
||||
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
|
||||
assert len(units) == 1
|
||||
assert isinstance(units[0], CompositionUnit)
|
||||
|
||||
|
||||
def test_rehydrate_units_preserves_core_fields():
|
||||
snap = _build_canonical_snapshot()
|
||||
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
|
||||
u = units[0]
|
||||
assert u.source_section_ids == ["03-1"]
|
||||
assert u.merge_type == "single"
|
||||
assert u.frame_template_id == "tpl_a"
|
||||
assert u.frame_id == "fid_a"
|
||||
assert u.frame_number == 13
|
||||
assert u.confidence == pytest.approx(0.91)
|
||||
assert u.label == "use_as_is"
|
||||
assert u.phase_z_status == "auto_renderable"
|
||||
assert u.title == "DX status"
|
||||
assert u.score == pytest.approx(0.91)
|
||||
|
||||
|
||||
def test_rehydrate_units_preserves_provisional_and_auto_selectable():
|
||||
snap = _build_canonical_snapshot()
|
||||
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
|
||||
assert units[0].provisional is False
|
||||
assert units[0].auto_selectable is True
|
||||
assert units[0].filter_reasons == []
|
||||
assert units[0].notes == ["a note"]
|
||||
assert units[0].rationale == {"weight": 1.0}
|
||||
|
||||
|
||||
def test_rehydrate_units_v4_candidates_expose_attribute_access():
|
||||
"""``_apply_frame_override_to_unit`` reads
|
||||
``cand.template_id`` / ``cand.frame_id`` / etc. off
|
||||
``unit.v4_candidates`` — restored entries MUST expose attribute
|
||||
access, not raw dict access."""
|
||||
snap = _build_canonical_snapshot()
|
||||
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
|
||||
cands = units[0].v4_candidates
|
||||
assert len(cands) == 1
|
||||
c = cands[0]
|
||||
assert isinstance(c, _pz2._RehydratedV4Candidate)
|
||||
assert c.template_id == "tpl_a"
|
||||
assert c.frame_id == "fid_a"
|
||||
assert c.frame_number == 13
|
||||
assert c.confidence == pytest.approx(0.91)
|
||||
assert c.label == "use_as_is"
|
||||
|
||||
|
||||
def test_rehydrate_units_empty_v4_candidates_yields_empty_list():
|
||||
snap = _build_canonical_snapshot()
|
||||
snap["units"]["value"][0]["v4_candidates"] = []
|
||||
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
|
||||
assert units[0].v4_candidates == []
|
||||
|
||||
|
||||
# -- _write_reuse_marker --------------------------------------------------
|
||||
|
||||
|
||||
def test_write_reuse_marker_writes_json_with_prev_run_id(tmp_path: Path):
|
||||
copied = {
|
||||
"step00_preconditions.json": "steps/step00_preconditions.json",
|
||||
SNAPSHOT_FILENAME: SNAPSHOT_FILENAME,
|
||||
}
|
||||
rv = _pz2._write_reuse_marker(
|
||||
tmp_path,
|
||||
prev_run_id="20260524_010101_phase_z2",
|
||||
copied_artifacts=copied,
|
||||
)
|
||||
assert rv == tmp_path / _pz2.REUSE_MARKER_FILENAME
|
||||
marker = json.loads(rv.read_text(encoding="utf-8"))
|
||||
assert marker["schema_version"] == _pz2.REUSE_MARKER_SCHEMA_VERSION
|
||||
assert marker["reuse_from_prev_run_id"] == "20260524_010101_phase_z2"
|
||||
assert marker["snapshot_filename"] == SNAPSHOT_FILENAME
|
||||
|
||||
|
||||
def test_write_reuse_marker_records_copied_artifacts_and_boundary(
|
||||
tmp_path: Path,
|
||||
):
|
||||
copied = {
|
||||
fname: f"steps/{fname}" for fname in _pz2._REUSE_STEP_ARTIFACTS
|
||||
}
|
||||
copied[SNAPSHOT_FILENAME] = SNAPSHOT_FILENAME
|
||||
_pz2._write_reuse_marker(
|
||||
tmp_path,
|
||||
prev_run_id="20260524_010101_phase_z2",
|
||||
copied_artifacts=copied,
|
||||
)
|
||||
marker = json.loads(
|
||||
(tmp_path / _pz2.REUSE_MARKER_FILENAME).read_text(encoding="utf-8")
|
||||
)
|
||||
assert marker["copied_artifacts"] == copied
|
||||
assert marker["boundary_steps"] == list(_pz2._REUSE_STEP_ARTIFACTS)
|
||||
assert marker["resume_at_step"] == 7
|
||||
|
||||
|
||||
# -- module surface anchors -----------------------------------------------
|
||||
|
||||
|
||||
def test_pipeline_exposes_all_u4_helpers():
|
||||
"""u5 wires these into ``run_phase_z2_mvp1`` — they must remain
|
||||
module-level callable surface on ``phase_z2_pipeline``."""
|
||||
for name in (
|
||||
"_resolve_reuse_from_prev_run_dir",
|
||||
"_copy_reuse_artifacts_from_prev_run",
|
||||
"_load_and_validate_reuse_snapshot",
|
||||
"_rehydrate_mdx_sections_from_snapshot",
|
||||
"_rehydrate_composition_units_from_snapshot",
|
||||
"_write_reuse_marker",
|
||||
"_RehydratedV4Candidate",
|
||||
"_REUSE_STEP_ARTIFACTS",
|
||||
"REUSE_MARKER_FILENAME",
|
||||
"REUSE_MARKER_SCHEMA_VERSION",
|
||||
):
|
||||
assert hasattr(_pz2, name), f"u4 surface missing: {name}"
|
||||
|
||||
|
||||
def test_pipeline_run_signature_reuse_from_is_kw_only_optional_none():
|
||||
"""u5 — ``reuse_from`` is now part of ``run_phase_z2_mvp1``'s public
|
||||
signature. The kwarg MUST be keyword-only (after the ``*`` barrier),
|
||||
default to ``None`` (so absent flag preserves the pre-u5 behaviour),
|
||||
and sit alongside the existing override kwargs. The locked
|
||||
``until_u5`` regression has flipped — keep this assertion as the
|
||||
forward-direction lock so future signature drift (e.g. a positional
|
||||
promotion or a default change) trips loudly."""
|
||||
import inspect
|
||||
|
||||
sig = inspect.signature(_pz2.run_phase_z2_mvp1)
|
||||
assert "reuse_from" in sig.parameters, (
|
||||
"u5 must thread reuse_from into run_phase_z2_mvp1 — kwarg missing. "
|
||||
f"current params: {list(sig.parameters)}"
|
||||
)
|
||||
param = sig.parameters["reuse_from"]
|
||||
assert param.kind is inspect.Parameter.KEYWORD_ONLY, (
|
||||
f"reuse_from must be keyword-only (after the ``*`` barrier); "
|
||||
f"got kind={param.kind}"
|
||||
)
|
||||
assert param.default is None, (
|
||||
f"reuse_from must default to None to preserve pre-u5 behaviour; "
|
||||
f"got default={param.default!r}"
|
||||
)
|
||||
261
tests/test_phase_z2_reuse_from_equivalence_sweep.py
Normal file
261
tests/test_phase_z2_reuse_from_equivalence_sweep.py
Normal file
@@ -0,0 +1,261 @@
|
||||
"""IMP-43 (#72) u7b — Opt-in sweep equivalence test for full rerun vs
|
||||
``--reuse-from`` across 3 layouts × 3 mdx samples × per-baseline frame pins.
|
||||
|
||||
u7b scope (per the Stage 2 Exit Report):
|
||||
|
||||
* Three mdx samples — ``01.mdx``, ``02.mdx``, ``03.mdx`` (the baseline
|
||||
full run for each must exit 0 to give step13 equivalence something
|
||||
to compare; ``04.mdx`` / ``05.mdx`` are deliberately excluded per
|
||||
the u7a docstring — adapter_needed / EMPTY_SHELL_NO_CONTENT).
|
||||
* Three ``--override-layout`` axes — ``None`` (auto), ``horizontal-2``,
|
||||
``vertical-2``. ``None`` exercises the natural layout for that mdx;
|
||||
the explicit pins exercise the layout-locked branch (Step 7-B
|
||||
``select_layout_preset`` honors ``--override-layout`` per
|
||||
``src/phase_z2_pipeline.py:5210``). The reuse path (C) inherits the
|
||||
locked layout via the Step 6 snapshot ``layout_preset_pre_override``
|
||||
(u2) — it MUST NOT pass ``--override-layout`` itself (u1 fail-closed
|
||||
guard at ``src/phase_z2_pipeline.py:8181-8199`` rejects layout
|
||||
overrides combined with ``--reuse-from``).
|
||||
* "All 32 frames" coverage axis — each test case discovers ALL pinnable
|
||||
``(unit_id, frame_template_id)`` pairs from its baseline ``step06_
|
||||
composition_plan.json`` and uses every pin in (B) and (C). Union of
|
||||
pins across the 9 (mdx, layout) cases approximates the V4 catalog
|
||||
coverage; pure Cartesian 3×3×32 = 288 parametrize combos × 3
|
||||
subprocess runs ≈ 864 pipeline runs is impractical even opt-in.
|
||||
|
||||
Three subprocess pipeline runs per case (same shape as u7a):
|
||||
(A) baseline full run — no frame overrides — reuse seed.
|
||||
(B) full rerun with the discovered frame overrides — independent
|
||||
control path that does NOT touch ``--reuse-from``.
|
||||
(C) ``--reuse-from <seed_id>`` with the same frame overrides — the
|
||||
reuse path.
|
||||
|
||||
Assert: ``step13_render.json`` from (B) and (C) is byte-equal modulo the
|
||||
Stage 2 whitelist (only ``run_id`` substring inside
|
||||
``data.final_html_path`` is normalized — see u7a docstring for the full
|
||||
whitelist rationale).
|
||||
|
||||
Opt-in:
|
||||
* ``@pytest.mark.sweep`` — marker registered in ``pyproject.toml``.
|
||||
Default CI must run ``pytest -m 'not sweep'``; explicit opt-in is
|
||||
``pytest -m sweep tests/test_phase_z2_reuse_from_equivalence_sweep.py``.
|
||||
* If an mdx / layout combo's baseline (A) returns non-zero (e.g., a
|
||||
layout pin incompatible with the mdx's natural unit_count produces
|
||||
a pipeline error), the case is skipped — u7b is a reuse-equivalence
|
||||
test, not a baseline-correctness test (those live elsewhere).
|
||||
|
||||
Persisted ``data/user_overrides/<stem>.json`` isolation:
|
||||
IMP-52 (#80) u2 introduced an MDX-keyed persistence fallback at
|
||||
``src/phase_z2_pipeline.py:8075-8168`` that merges the on-disk file
|
||||
into the subprocess overrides regardless of CLI flags. For mdx stems
|
||||
whose persistence file carries non-frame axes (e.g.,
|
||||
``data/user_overrides/03.json`` holds ``layout`` + ``zone_geometries``),
|
||||
two orthogonality problems break u7b:
|
||||
|
||||
1. (A) and (B) absorb the persisted ``layout`` / ``zone_geometries``
|
||||
independent of the ``layout_pin`` parameter, collapsing the test
|
||||
matrix — the parametrized layout axis stops being a real axis.
|
||||
2. (C) on the reuse path receives the persisted non-frame axes via
|
||||
the same merge, which the u1 fail-closed guard at
|
||||
``src/phase_z2_pipeline.py:8181-8199`` rejects with exit code 2
|
||||
before step13 equivalence can be measured.
|
||||
|
||||
The ``_isolated_persisted_overrides`` context manager renames the
|
||||
persistence file out of the way for the duration of each parametrized
|
||||
case (try/finally restore; crash-resistant via a startup recovery
|
||||
branch). The hidden backup filename starts with ``.`` so
|
||||
``user_overrides_io.validate_key`` (``src/user_overrides_io.py:72``)
|
||||
cannot accidentally re-load it mid-run. The pipeline subprocess does
|
||||
not write the persistence file (writes are gated to the Vite
|
||||
``/api/user-overrides`` endpoint), so the rename is safe across the
|
||||
three subprocess spawns. The real-world reuse-from × persistence
|
||||
interaction (where ``--reuse-from`` should arguably suppress
|
||||
non-frame persistence injection rather than fail closed) is a
|
||||
follow-up issue candidate, surfaced in this unit's unit_executed
|
||||
Gitea comment.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.test_phase_z2_reuse_from_equivalence_unit import (
|
||||
_assert_run_ok,
|
||||
_frame_override_args,
|
||||
_normalize_step13,
|
||||
_read_step_artifact,
|
||||
_spawn_pipeline,
|
||||
)
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
SAMPLES_DIR = REPO_ROOT / "samples" / "mdx_batch"
|
||||
RUNS_DIR = REPO_ROOT / "data" / "runs"
|
||||
OVERRIDES_DIR = REPO_ROOT / "data" / "user_overrides"
|
||||
|
||||
MDX_FILES = ("01.mdx", "02.mdx", "03.mdx")
|
||||
LAYOUT_PINS = (None, "horizontal-2", "vertical-2")
|
||||
|
||||
|
||||
def _unique(prefix: str) -> str:
|
||||
return f"{prefix}_imp43_u7b_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _isolated_persisted_overrides(mdx_name: str):
|
||||
"""Temporarily rename ``data/user_overrides/<stem>.json`` so the
|
||||
three subprocess runs see a clean persistence state.
|
||||
|
||||
Rationale: see module docstring "Persisted ... isolation" section.
|
||||
The pipeline reads the file at
|
||||
``src/phase_z2_pipeline.py:8098`` via ``load(key)`` which resolves
|
||||
to ``DEFAULT_OVERRIDES_ROOT`` (``src/user_overrides_io.py:54``);
|
||||
moving the file out of the way reduces ``load(key) -> {}`` and
|
||||
prevents the merge from injecting persisted axes.
|
||||
|
||||
Crash recovery: a prior run that crashed between rename and
|
||||
restore would leave ``.<stem>.imp43_u7b_isolation.bak`` next to
|
||||
the missing ``<stem>.json``. The recovery branch at startup
|
||||
restores the backup before proceeding so we never lose the
|
||||
original on a second invocation.
|
||||
"""
|
||||
stem = Path(mdx_name).stem
|
||||
src = OVERRIDES_DIR / f"{stem}.json"
|
||||
backup = OVERRIDES_DIR / f".{stem}.imp43_u7b_isolation.bak"
|
||||
if backup.is_file() and not src.is_file():
|
||||
os.replace(backup, src)
|
||||
moved = False
|
||||
if src.is_file():
|
||||
OVERRIDES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
os.replace(src, backup)
|
||||
moved = True
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if moved and backup.is_file():
|
||||
os.replace(backup, src)
|
||||
|
||||
|
||||
def _discover_all_frame_pins(seed_run_id: str) -> list[tuple[str, str]]:
|
||||
"""Discover ALL ``(unit_id, frame_template_id)`` pins from baseline plan.
|
||||
|
||||
Unlike u7a (capped at 2 for fast CI), u7b uses every pin so the sweep
|
||||
naturally exercises the union of frame templates produced across the
|
||||
9 (mdx, layout) cases — the practical realization of the Stage 2
|
||||
plan's "all 32 frames" axis (full Cartesian 3×3×32 would be 288×3 =
|
||||
864 pipeline runs; impractical even opt-in).
|
||||
|
||||
Schema source: ``src/phase_z2_pipeline.py:5530-5560`` — step06 artifact
|
||||
emits ``data.selected_units[*].{source_section_ids, frame_template_id}``;
|
||||
``unit_id = "+".join(source_section_ids)`` per the ``--override-frame``
|
||||
contract documented at ``src/phase_z2_pipeline.py:7827-7832``.
|
||||
"""
|
||||
step06 = _read_step_artifact(seed_run_id, "step06_composition_plan.json")
|
||||
selected_units = step06.get("data", {}).get("selected_units") or []
|
||||
pins: list[tuple[str, str]] = []
|
||||
for u in selected_units:
|
||||
sids = u.get("source_section_ids") or []
|
||||
tpl_id = u.get("frame_template_id")
|
||||
if not isinstance(sids, list) or not sids:
|
||||
continue
|
||||
if not isinstance(tpl_id, str) or not tpl_id:
|
||||
continue
|
||||
unit_id = "+".join(str(s) for s in sids)
|
||||
if unit_id:
|
||||
pins.append((unit_id, tpl_id))
|
||||
return pins
|
||||
|
||||
|
||||
@pytest.mark.sweep
|
||||
@pytest.mark.parametrize("layout_pin", LAYOUT_PINS)
|
||||
@pytest.mark.parametrize("mdx_name", MDX_FILES)
|
||||
def test_full_rerun_vs_reuse_from_step13_equivalence_sweep(
|
||||
mdx_name: str, layout_pin: str | None
|
||||
) -> None:
|
||||
"""Stage 2 §u7b binding contract: across the (mdx × layout) sweep,
|
||||
full rerun (B) with discovered frame overrides and ``--reuse-from``
|
||||
(C) with the same overrides yield byte-equal ``step13_render.json``
|
||||
modulo the u7a whitelist.
|
||||
|
||||
Skip semantics: if baseline (A) fails for a (mdx, layout) combo
|
||||
(e.g., layout pin incompatible with mdx unit_count), the case is
|
||||
skipped — baseline correctness is not the equivalence axis under
|
||||
test here.
|
||||
"""
|
||||
mdx_path = SAMPLES_DIR / mdx_name
|
||||
if not mdx_path.is_file():
|
||||
pytest.skip(f"sample missing: {mdx_path}")
|
||||
|
||||
layout_args: list[str] = (
|
||||
[] if layout_pin is None else ["--override-layout", layout_pin]
|
||||
)
|
||||
|
||||
# Isolate any persisted ``data/user_overrides/<stem>.json`` for this
|
||||
# mdx before spawning the three subprocesses; see module docstring
|
||||
# "Persisted ... isolation" section for the orthogonality and
|
||||
# fail-closed-guard rationale.
|
||||
with _isolated_persisted_overrides(mdx_name):
|
||||
# (A) baseline full run — no frame overrides — reuse seed.
|
||||
seed_id = _unique("seed")
|
||||
cp_a = _spawn_pipeline([str(mdx_path), seed_id, *layout_args])
|
||||
if cp_a.returncode != 0:
|
||||
pytest.skip(
|
||||
f"baseline (A) non-zero for mdx={mdx_name} layout={layout_pin} "
|
||||
f"(returncode={cp_a.returncode}); not a reuse-equivalence axis. "
|
||||
f"stderr tail: {cp_a.stderr[-400:]}"
|
||||
)
|
||||
|
||||
pins = _discover_all_frame_pins(seed_id)
|
||||
if not pins:
|
||||
pytest.skip(
|
||||
f"no pinnable (unit_id, frame_template_id) pairs in baseline "
|
||||
f"step06 for mdx={mdx_name} layout={layout_pin}; nothing to "
|
||||
f"exercise on the override-frame surface"
|
||||
)
|
||||
override_args = _frame_override_args(pins)
|
||||
|
||||
# (B) full rerun with the discovered frame overrides — independent control.
|
||||
full_id = _unique("full")
|
||||
cp_b = _spawn_pipeline([str(mdx_path), full_id, *layout_args, *override_args])
|
||||
_assert_run_ok(
|
||||
f"full rerun (B) mdx={mdx_name} layout={layout_pin} pins={len(pins)}",
|
||||
cp_b,
|
||||
)
|
||||
|
||||
# (C) --reuse-from seed with the same frame overrides — reuse path.
|
||||
# NOTE: must NOT pass --override-layout here — u1 fail-closed guard
|
||||
# rejects layout+reuse combination. Layout is restored from the Step 6
|
||||
# snapshot (u2 layout_preset_pre_override) instead.
|
||||
reuse_id = _unique("reuse")
|
||||
cp_c = _spawn_pipeline([
|
||||
str(mdx_path),
|
||||
reuse_id,
|
||||
"--reuse-from", seed_id,
|
||||
*override_args,
|
||||
])
|
||||
_assert_run_ok(
|
||||
f"reuse rerun (C) mdx={mdx_name} layout={layout_pin} pins={len(pins)}",
|
||||
cp_c,
|
||||
)
|
||||
|
||||
# Step 13 equivalence — apply whitelist + compare byte-for-byte.
|
||||
full_step13 = _read_step_artifact(full_id, "step13_render.json")
|
||||
reuse_step13 = _read_step_artifact(reuse_id, "step13_render.json")
|
||||
full_norm = _normalize_step13(full_step13, full_id)
|
||||
reuse_norm = _normalize_step13(reuse_step13, reuse_id)
|
||||
|
||||
assert full_norm == reuse_norm, (
|
||||
f"step13_render.json equivalence violated for IMP-43 #72 u7b "
|
||||
f"(mdx={mdx_name}, layout={layout_pin}, full={full_id}, "
|
||||
f"reuse={reuse_id}, seed={seed_id}, pins={pins}):\n"
|
||||
f"--- full (normalized) ---\n"
|
||||
f"{json.dumps(full_norm, ensure_ascii=False, indent=2)}\n"
|
||||
f"--- reuse (normalized) ---\n"
|
||||
f"{json.dumps(reuse_norm, ensure_ascii=False, indent=2)}"
|
||||
)
|
||||
204
tests/test_phase_z2_reuse_from_equivalence_unit.py
Normal file
204
tests/test_phase_z2_reuse_from_equivalence_unit.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""IMP-43 (#72) u7a — Fast CI equivalence test for full rerun vs ``--reuse-from``.
|
||||
|
||||
u7a scope (per the Stage 2 Exit Report):
|
||||
|
||||
* One mdx (``samples/mdx_batch/02.mdx``), one layout (auto), two
|
||||
``--override-frame`` pins self-discovered from the baseline's
|
||||
``step06_composition_plan.json`` (each pin re-states the unit's
|
||||
own ``frame_template_id`` — semantically a no-op, but it
|
||||
exercises the full ``--override-frame`` CLI surface through both
|
||||
paths, satisfying the "two frames" axis of the Stage 2 plan).
|
||||
* Three subprocess pipeline runs:
|
||||
(A) baseline full run — no overrides — reuse seed
|
||||
(B) full rerun with the two ``--override-frame`` pins — the
|
||||
independent control path that does NOT touch ``--reuse-from``
|
||||
(C) ``--reuse-from <seed_id>`` with the same two
|
||||
``--override-frame`` pins — the reuse path
|
||||
* Assert: ``step13_render.json`` from (B) and (C) is byte-equal modulo
|
||||
the Stage 2 whitelist — only ``run_id`` (as a substring of
|
||||
``data.final_html_path``), ``timestamps``, and ``prev_run_id`` may
|
||||
legitimately differ. ``step13_render.json`` has no timestamps and
|
||||
no ``prev_run_id`` field (the latter surfaces via the separate
|
||||
``_reuse_marker.json`` sidecar instead — out of scope for this
|
||||
step13 equivalence axis), so the only effective normalization
|
||||
target is the ``run_id`` substring inside ``data.final_html_path``.
|
||||
|
||||
Per Stage 2 plan: the sweep equivalence coverage (3 layouts × 3 mdx ×
|
||||
all 32 frames) lives in u7b under ``pytest.mark.sweep`` — u7a stays
|
||||
fast (3 pipeline runs on a single small mdx) so it can run in default
|
||||
CI without an opt-in marker.
|
||||
|
||||
Why mdx02:
|
||||
* ``test_pipeline_smoke_imp85.py::test_non_vp_smoke_runs_clean`` already
|
||||
pins mdx02 as a non-VP exit-0 path (the baseline (A) run must
|
||||
exit 0 for the equivalence axis to even have something to
|
||||
compare against).
|
||||
* mdx04 / mdx05 are deliberately excluded — mdx04 routes zones to
|
||||
``adapter_needed`` per IMP-#85 u1 and mdx05 exits 1 with
|
||||
``EMPTY_SHELL_NO_CONTENT`` per IMP-#87 u3, neither of which gives
|
||||
a stable step13 equivalence surface for a fast CI lock.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
SAMPLES_DIR = REPO_ROOT / "samples" / "mdx_batch"
|
||||
RUNS_DIR = REPO_ROOT / "data" / "runs"
|
||||
MDX_FILENAME = "02.mdx"
|
||||
|
||||
|
||||
def _unique_run_id(prefix: str) -> str:
|
||||
return f"{prefix}_imp43_u7a_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
|
||||
def _spawn_pipeline(extra_args: list[str], timeout: int = 600) -> subprocess.CompletedProcess:
|
||||
"""Spawn ``python -m src.phase_z2_pipeline <args>`` and capture I/O."""
|
||||
return subprocess.run(
|
||||
[sys.executable, "-m", "src.phase_z2_pipeline", *extra_args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
cwd=str(REPO_ROOT),
|
||||
)
|
||||
|
||||
|
||||
def _assert_run_ok(label: str, cp: subprocess.CompletedProcess) -> None:
|
||||
assert cp.returncode == 0, (
|
||||
f"{label} pipeline returncode={cp.returncode}\n"
|
||||
f"--- stderr tail ---\n{cp.stderr[-2000:]}\n"
|
||||
f"--- stdout tail ---\n{cp.stdout[-2000:]}"
|
||||
)
|
||||
|
||||
|
||||
def _read_step_artifact(run_id: str, fname: str) -> dict:
|
||||
p = RUNS_DIR / run_id / "phase_z2" / "steps" / fname
|
||||
assert p.is_file(), f"missing artifact: {p}"
|
||||
return json.loads(p.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def _discover_two_frame_pins(seed_run_id: str) -> list[tuple[str, str]]:
|
||||
"""Self-discover two ``(unit_id, frame_template_id)`` pins from the
|
||||
baseline's ``step06_composition_plan.json``.
|
||||
|
||||
Schema source: ``src/phase_z2_pipeline.py`` ~L5530-L5560 — the step06
|
||||
artifact emits ``data.selected_units[*].{source_section_ids,
|
||||
frame_template_id}``. ``unit_id`` is derived as
|
||||
``"+".join(source_section_ids)`` per the
|
||||
``--override-frame UNIT_ID=TEMPLATE_ID`` contract documented at
|
||||
``src/phase_z2_pipeline.py:7827-7832`` and computed by ``_unit_id``
|
||||
at ``src/phase_z2_pipeline.py:2328``. Pinning the unit's own
|
||||
template is a no-op semantically but exercises the
|
||||
``--override-frame`` CLI surface end-to-end in both (B) and (C).
|
||||
"""
|
||||
step06 = _read_step_artifact(seed_run_id, "step06_composition_plan.json")
|
||||
selected_units = step06.get("data", {}).get("selected_units") or []
|
||||
pinnable: list[tuple[str, str]] = []
|
||||
for u in selected_units:
|
||||
sids = u.get("source_section_ids") or []
|
||||
tpl_id = u.get("frame_template_id")
|
||||
if not isinstance(sids, list) or not sids:
|
||||
continue
|
||||
if not isinstance(tpl_id, str) or not tpl_id:
|
||||
continue
|
||||
unit_id = "+".join(str(s) for s in sids)
|
||||
if not unit_id:
|
||||
continue
|
||||
pinnable.append((unit_id, tpl_id))
|
||||
if len(pinnable) >= 2:
|
||||
break
|
||||
assert len(pinnable) >= 2, (
|
||||
f"baseline {seed_run_id} step06_composition_plan.json must expose "
|
||||
f">= 2 (unit_id, frame_template_id) pairs for the u7a two-frames "
|
||||
f"axis; got {pinnable}"
|
||||
)
|
||||
return pinnable
|
||||
|
||||
|
||||
def _frame_override_args(pins: list[tuple[str, str]]) -> list[str]:
|
||||
out: list[str] = []
|
||||
for unit_id, tpl_id in pins:
|
||||
out.extend(["--override-frame", f"{unit_id}={tpl_id}"])
|
||||
return out
|
||||
|
||||
|
||||
def _normalize_step13(payload: dict, run_id: str) -> dict:
|
||||
"""Apply the Stage 2 equivalence whitelist to step13_render.json.
|
||||
|
||||
Whitelist axes (Stage 2 plan §u7a):
|
||||
* ``run_id`` — appears only as a substring of
|
||||
``data.final_html_path`` in the step13 schema
|
||||
(``src/phase_z2_pipeline.py:7174-7192``).
|
||||
* ``timestamps`` — ``_write_step_artifact``
|
||||
(``src/phase_z2_pipeline.py:3826``) does not
|
||||
stamp a timestamp on the payload, so no
|
||||
normalization is needed for this axis.
|
||||
* ``prev_run_id`` — surfaces via ``_reuse_marker.json`` (separate
|
||||
sidecar), NOT via step13_render.json. No
|
||||
normalization needed on the step13 surface.
|
||||
|
||||
Returns a deep copy of ``payload`` with the ``run_id`` substring of
|
||||
``data.final_html_path`` replaced by the sentinel ``<RUN_ID>`` so
|
||||
the (B) and (C) step13 payloads can be compared byte-for-byte.
|
||||
"""
|
||||
normalized = json.loads(json.dumps(payload, ensure_ascii=False))
|
||||
data = normalized.get("data")
|
||||
if isinstance(data, dict):
|
||||
fhp = data.get("final_html_path")
|
||||
if isinstance(fhp, str) and run_id in fhp:
|
||||
data["final_html_path"] = fhp.replace(run_id, "<RUN_ID>")
|
||||
return normalized
|
||||
|
||||
|
||||
def test_full_rerun_vs_reuse_from_step13_equivalence_one_mdx_two_frames() -> None:
|
||||
"""Stage 2 §u7a binding contract: full rerun (B) with two
|
||||
``--override-frame`` pins and ``--reuse-from`` (C) with the same
|
||||
pins yield byte-equal ``step13_render.json`` modulo the whitelist.
|
||||
"""
|
||||
mdx_path = SAMPLES_DIR / MDX_FILENAME
|
||||
assert mdx_path.is_file(), f"sample missing: {mdx_path}"
|
||||
|
||||
# (A) baseline full run — no overrides — reuse seed.
|
||||
seed_id = _unique_run_id("seed")
|
||||
cp_a = _spawn_pipeline([str(mdx_path), seed_id])
|
||||
_assert_run_ok("baseline (A)", cp_a)
|
||||
|
||||
# Self-discover two (unit_id, frame_template_id) pins.
|
||||
pins = _discover_two_frame_pins(seed_id)
|
||||
override_args = _frame_override_args(pins)
|
||||
|
||||
# (B) full rerun with the two frame overrides — independent control.
|
||||
full_id = _unique_run_id("full")
|
||||
cp_b = _spawn_pipeline([str(mdx_path), full_id, *override_args])
|
||||
_assert_run_ok("full rerun (B)", cp_b)
|
||||
|
||||
# (C) --reuse-from seed with the same frame overrides — reuse path.
|
||||
reuse_id = _unique_run_id("reuse")
|
||||
cp_c = _spawn_pipeline([
|
||||
str(mdx_path),
|
||||
reuse_id,
|
||||
"--reuse-from", seed_id,
|
||||
*override_args,
|
||||
])
|
||||
_assert_run_ok("reuse rerun (C)", cp_c)
|
||||
|
||||
# Step 13 equivalence — apply whitelist + compare byte-for-byte.
|
||||
full_step13 = _read_step_artifact(full_id, "step13_render.json")
|
||||
reuse_step13 = _read_step_artifact(reuse_id, "step13_render.json")
|
||||
full_norm = _normalize_step13(full_step13, full_id)
|
||||
reuse_norm = _normalize_step13(reuse_step13, reuse_id)
|
||||
|
||||
assert full_norm == reuse_norm, (
|
||||
"step13_render.json equivalence violated for IMP-43 #72 u7a "
|
||||
f"(full={full_id}, reuse={reuse_id}, seed={seed_id}, pins={pins}):\n"
|
||||
f"--- full (normalized) ---\n"
|
||||
f"{json.dumps(full_norm, ensure_ascii=False, indent=2)}\n"
|
||||
f"--- reuse (normalized) ---\n"
|
||||
f"{json.dumps(reuse_norm, ensure_ascii=False, indent=2)}"
|
||||
)
|
||||
748
tests/test_phase_z2_reuse_from_fail_closed.py
Normal file
748
tests/test_phase_z2_reuse_from_fail_closed.py
Normal file
@@ -0,0 +1,748 @@
|
||||
"""IMP-43 (#72) u4b — fail-closed wrapper tests for ``--reuse-from``.
|
||||
|
||||
u4b scope (per the Stage 2 Exit Report):
|
||||
|
||||
- Translate the u4 raise surface (``FileNotFoundError`` /
|
||||
``SnapshotValidationError`` / ``json.JSONDecodeError`` / ``OSError``)
|
||||
into the CLI fail-closed contract: stderr message + ``sys.exit(2)``.
|
||||
- Add the ``prev_run_dir == new_run_dir`` accidental-write guard BEFORE
|
||||
any copy attempt (prev_run_dir must stay read-only).
|
||||
- Add the missing-prev-run-dir surface (clean axis, not raw stack).
|
||||
- Surface ``mdx_sha256 mismatch`` as its OWN axis (distinct from
|
||||
generic snapshot validation failures).
|
||||
|
||||
The signature threading + the in-``run_phase_z2_mvp1`` branch that
|
||||
invokes the wrapper land in u5. u4b adds the wrapper function only.
|
||||
|
||||
Tested surface (``src/phase_z2_pipeline.py``):
|
||||
* ``execute_reuse_from_or_fail_closed``
|
||||
* ``_abort_reuse_from``
|
||||
* ``_paths_equivalent``
|
||||
* ``REUSE_FAIL_CLOSED_AXES`` (closed enum)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
import src.phase_z2_pipeline as _pz2
|
||||
from src.phase_z2_reuse_snapshot import (
|
||||
SNAPSHOT_FILENAME,
|
||||
SNAPSHOT_VERSION,
|
||||
build_snapshot,
|
||||
)
|
||||
|
||||
|
||||
# -- synthetic snapshot inputs (mirror u4 test fixture) ------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Section:
|
||||
section_id: str
|
||||
section_num: int
|
||||
title: str
|
||||
raw_content: str
|
||||
heading_number: Optional[str] = None
|
||||
v4_alias_keys: list = field(default_factory=list)
|
||||
sub_sections: list = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _V4Candidate:
|
||||
template_id: str
|
||||
frame_id: str
|
||||
frame_number: int
|
||||
confidence: float
|
||||
label: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Unit:
|
||||
source_section_ids: list
|
||||
merge_type: str
|
||||
frame_template_id: str
|
||||
frame_id: str
|
||||
frame_number: int
|
||||
confidence: float
|
||||
label: str
|
||||
phase_z_status: str
|
||||
raw_content: str
|
||||
title: str
|
||||
score: float
|
||||
v4_rank: Optional[int] = 1
|
||||
selection_path: str = "rank_1"
|
||||
fallback_reason: Optional[str] = None
|
||||
rationale: dict = field(default_factory=dict)
|
||||
auto_selectable: bool = True
|
||||
filter_reasons: list = field(default_factory=list)
|
||||
notes: list = field(default_factory=list)
|
||||
v4_candidates: list = field(default_factory=list)
|
||||
provisional: bool = False
|
||||
|
||||
|
||||
def _mdx_text() -> str:
|
||||
return "# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n"
|
||||
|
||||
|
||||
def _build_canonical_snapshot(*, mdx_source_text: Optional[str] = None) -> dict:
|
||||
text = mdx_source_text if mdx_source_text is not None else _mdx_text()
|
||||
cand = _V4Candidate(
|
||||
template_id="tpl_a",
|
||||
frame_id="fid_a",
|
||||
frame_number=13,
|
||||
confidence=0.91,
|
||||
label="use_as_is",
|
||||
)
|
||||
section = _Section(
|
||||
section_id="03-1",
|
||||
section_num=1,
|
||||
title="DX status",
|
||||
raw_content="- bullet one\n- bullet two",
|
||||
heading_number="3.1",
|
||||
v4_alias_keys=["03-1.1"],
|
||||
)
|
||||
unit = _Unit(
|
||||
source_section_ids=["03-1"],
|
||||
merge_type="single",
|
||||
frame_template_id="tpl_a",
|
||||
frame_id="fid_a",
|
||||
frame_number=13,
|
||||
confidence=0.91,
|
||||
label="use_as_is",
|
||||
phase_z_status="auto_renderable",
|
||||
raw_content="- bullet one\n- bullet two",
|
||||
title="DX status",
|
||||
score=0.91,
|
||||
v4_candidates=[cand],
|
||||
)
|
||||
return build_snapshot(
|
||||
mdx_sha256=hashlib.sha256(text.encode("utf-8")).hexdigest(),
|
||||
slide_title="Slide",
|
||||
slide_footer=None,
|
||||
sections=[section],
|
||||
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
|
||||
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
|
||||
v4_evidence=[],
|
||||
layout_preset_pre_override="single",
|
||||
units=[unit],
|
||||
comp_debug={},
|
||||
v4_fallback_traces={},
|
||||
ai_preflight={"enabled": False, "skipped": True},
|
||||
)
|
||||
|
||||
|
||||
def _seed_prev_run_dir(prev_run_dir: Path, *, snapshot: dict) -> None:
|
||||
(prev_run_dir / "steps").mkdir(parents=True, exist_ok=True)
|
||||
for fname in _pz2._REUSE_STEP_ARTIFACTS:
|
||||
(prev_run_dir / "steps" / fname).write_text(
|
||||
f'{{"name": "{fname}"}}'
|
||||
if fname.endswith(".json")
|
||||
else "raw mdx body bytes",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(prev_run_dir / SNAPSHOT_FILENAME).write_text(
|
||||
json.dumps(snapshot, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
# -- REUSE_FAIL_CLOSED_AXES vocab lock ------------------------------------
|
||||
|
||||
|
||||
def test_fail_closed_axes_is_closed_enum():
|
||||
"""The nine axes are the entire fail-closed vocabulary; if a new
|
||||
axis lands without test coverage update, this lock breaks.
|
||||
|
||||
``reuse_copy_os_error`` / ``snapshot_read_os_error`` were added in
|
||||
the Codex #6 stage_3_edit rewind to cover OSError != FNF that the
|
||||
earlier u4b implementation let escape as a raw traceback.
|
||||
"""
|
||||
assert _pz2.REUSE_FAIL_CLOSED_AXES == frozenset({
|
||||
"prev_run_dir_missing",
|
||||
"prev_run_dir_equals_new_run_dir",
|
||||
"reuse_artifact_missing",
|
||||
"reuse_copy_os_error",
|
||||
"snapshot_missing_after_copy",
|
||||
"snapshot_corrupt_json",
|
||||
"snapshot_read_os_error",
|
||||
"mdx_sha256_mismatch",
|
||||
"snapshot_validation_failed",
|
||||
})
|
||||
|
||||
|
||||
# -- _abort_reuse_from -----------------------------------------------------
|
||||
|
||||
|
||||
def test_abort_reuse_from_exits_with_code_two(capsys):
|
||||
with pytest.raises(SystemExit) as ei:
|
||||
_pz2._abort_reuse_from(
|
||||
axis="prev_run_dir_missing",
|
||||
value="never_existed",
|
||||
path="D:/nope",
|
||||
upstream="--reuse-from CLI argument",
|
||||
)
|
||||
assert ei.value.code == 2
|
||||
|
||||
|
||||
def test_abort_reuse_from_stderr_contains_value_path_upstream(capsys):
|
||||
with pytest.raises(SystemExit):
|
||||
_pz2._abort_reuse_from(
|
||||
axis="prev_run_dir_missing",
|
||||
value="never_existed",
|
||||
path="D:/nope",
|
||||
upstream="--reuse-from CLI argument",
|
||||
)
|
||||
err = capsys.readouterr().err
|
||||
assert "prev_run_dir_missing" in err
|
||||
assert "value:" in err
|
||||
assert "path:" in err
|
||||
assert "upstream:" in err
|
||||
assert "never_existed" in err
|
||||
assert "D:/nope" in err
|
||||
assert "--reuse-from CLI argument" in err
|
||||
|
||||
|
||||
def test_abort_reuse_from_includes_reason_when_exc_passed(capsys):
|
||||
"""The optional ``exc`` field surfaces the underlying type +
|
||||
message so operators can distinguish e.g. JSONDecodeError line/col
|
||||
info from a generic 'snapshot broken'."""
|
||||
try:
|
||||
raise ValueError("schema_version mismatch: expected 1, got 99")
|
||||
except ValueError as exc:
|
||||
with pytest.raises(SystemExit):
|
||||
_pz2._abort_reuse_from(
|
||||
axis="snapshot_validation_failed",
|
||||
value=str(exc),
|
||||
path="D:/some/path",
|
||||
upstream="validate_snapshot",
|
||||
exc=exc,
|
||||
)
|
||||
err = capsys.readouterr().err
|
||||
assert "reason:" in err
|
||||
assert "ValueError" in err
|
||||
assert "schema_version mismatch" in err
|
||||
|
||||
|
||||
def test_abort_reuse_from_rejects_unknown_axis():
|
||||
"""Unknown axis = programmer error, not user error; must trip
|
||||
AssertionError, not silently emit a malformed stderr line."""
|
||||
with pytest.raises(AssertionError):
|
||||
_pz2._abort_reuse_from(
|
||||
axis="totally_made_up_axis",
|
||||
value="x",
|
||||
path="y",
|
||||
upstream="z",
|
||||
)
|
||||
|
||||
|
||||
# -- _paths_equivalent -----------------------------------------------------
|
||||
|
||||
|
||||
def test_paths_equivalent_same_path_returns_true(tmp_path: Path):
|
||||
a = tmp_path / "x" / "y"
|
||||
a.mkdir(parents=True)
|
||||
assert _pz2._paths_equivalent(a, a) is True
|
||||
|
||||
|
||||
def test_paths_equivalent_different_paths_returns_false(tmp_path: Path):
|
||||
a = tmp_path / "alpha"
|
||||
b = tmp_path / "beta"
|
||||
a.mkdir()
|
||||
b.mkdir()
|
||||
assert _pz2._paths_equivalent(a, b) is False
|
||||
|
||||
|
||||
def test_paths_equivalent_handles_nonexistent_paths(tmp_path: Path):
|
||||
"""``Path.resolve(strict=False)`` should still normalize ``..``
|
||||
even when the leaf does not yet exist (new_run_dir before mkdir)."""
|
||||
a = tmp_path / "new_run" / "phase_z2"
|
||||
b = tmp_path / "new_run" / "phase_z2"
|
||||
assert _pz2._paths_equivalent(a, b) is True
|
||||
|
||||
|
||||
# -- execute_reuse_from_or_fail_closed: happy path -----------------------
|
||||
|
||||
|
||||
def test_happy_path_returns_prev_run_dir_copied_snapshot(
|
||||
tmp_path: Path, monkeypatch
|
||||
):
|
||||
text = _mdx_text()
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "prev_run_id_001"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
new_run_dir = tmp_path / "new" / "phase_z2"
|
||||
|
||||
snap = _build_canonical_snapshot(mdx_source_text=text)
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
rv = _pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=text,
|
||||
)
|
||||
prev_dir_ret, copied_ret, snap_ret = rv
|
||||
|
||||
assert prev_dir_ret == prev_run_dir
|
||||
assert SNAPSHOT_FILENAME in copied_ret
|
||||
assert snap_ret["schema_version"] == SNAPSHOT_VERSION
|
||||
# snapshot wrapper survives (value/source_path/upstream_step)
|
||||
assert snap_ret["slide_title"]["value"] == "Slide"
|
||||
|
||||
|
||||
# -- prev_run_dir_missing axis --------------------------------------------
|
||||
|
||||
|
||||
def test_prev_run_dir_missing_aborts(tmp_path: Path, monkeypatch, capsys):
|
||||
runs_root = tmp_path / "runs"
|
||||
runs_root.mkdir()
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
with pytest.raises(SystemExit) as ei:
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from="does_not_exist_anywhere",
|
||||
new_run_dir=tmp_path / "new" / "phase_z2",
|
||||
mdx_source_text=_mdx_text(),
|
||||
)
|
||||
assert ei.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "prev_run_dir_missing" in err
|
||||
assert "does_not_exist_anywhere" in err
|
||||
|
||||
|
||||
# -- prev_run_dir_equals_new_run_dir axis ---------------------------------
|
||||
|
||||
|
||||
def test_prev_run_dir_equals_new_run_dir_aborts(
|
||||
tmp_path: Path, monkeypatch, capsys
|
||||
):
|
||||
"""Accidental collision: if the new run_id resolves to the same
|
||||
phase_z2 dir as prev_run_id, the copy step would overwrite
|
||||
prev_run_dir in place. u4b must reject BEFORE the copy attempt."""
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "shared_run_id"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
# new_run_dir resolves to the SAME phase_z2 dir as prev_run_dir.
|
||||
new_run_dir = prev_run_dir
|
||||
|
||||
with pytest.raises(SystemExit) as ei:
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=_mdx_text(),
|
||||
)
|
||||
assert ei.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "prev_run_dir_equals_new_run_dir" in err
|
||||
|
||||
|
||||
def test_prev_run_dir_equals_new_run_dir_does_not_mutate_prev(
|
||||
tmp_path: Path, monkeypatch
|
||||
):
|
||||
"""Critical RO guarantee — the abort must fire BEFORE
|
||||
``_copy_reuse_artifacts_from_prev_run`` runs, so the seeded prev
|
||||
artifact bytes survive untouched."""
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "shared_run_id"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
sentinel_text = '{"name": "step02_normalized.json"}'
|
||||
target = prev_run_dir / "steps" / "step02_normalized.json"
|
||||
assert target.read_text(encoding="utf-8") == sentinel_text
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=prev_run_dir,
|
||||
mdx_source_text=_mdx_text(),
|
||||
)
|
||||
# prev_run_dir bytes still intact.
|
||||
assert target.read_text(encoding="utf-8") == sentinel_text
|
||||
|
||||
|
||||
# -- reuse_artifact_missing axis ------------------------------------------
|
||||
|
||||
|
||||
def test_reuse_artifact_missing_aborts(tmp_path: Path, monkeypatch, capsys):
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "prev_run_001"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
new_run_dir = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
# Remove one required step file → triggers FileNotFoundError in
|
||||
# _copy_reuse_artifacts_from_prev_run.
|
||||
(prev_run_dir / "steps" / "step05_v4_evidence.json").unlink()
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
with pytest.raises(SystemExit) as ei:
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=_mdx_text(),
|
||||
)
|
||||
assert ei.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "reuse_artifact_missing" in err
|
||||
assert "step05_v4_evidence.json" in err
|
||||
assert "reason:" in err
|
||||
assert "FileNotFoundError" in err
|
||||
|
||||
|
||||
def test_reuse_artifact_missing_snapshot_sidecar(
|
||||
tmp_path: Path, monkeypatch, capsys
|
||||
):
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "prev_run_002"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
new_run_dir = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
(prev_run_dir / SNAPSHOT_FILENAME).unlink()
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
with pytest.raises(SystemExit) as ei:
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=_mdx_text(),
|
||||
)
|
||||
assert ei.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "reuse_artifact_missing" in err
|
||||
assert SNAPSHOT_FILENAME in err
|
||||
|
||||
|
||||
# -- snapshot_corrupt_json axis -------------------------------------------
|
||||
|
||||
|
||||
def test_snapshot_corrupt_json_aborts(tmp_path: Path, monkeypatch, capsys):
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "prev_run_corrupt"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
new_run_dir = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
# Overwrite the snapshot with invalid JSON; copy will succeed,
|
||||
# validate_snapshot will fail with JSONDecodeError (raised inside
|
||||
# _load_and_validate_reuse_snapshot before validate_snapshot).
|
||||
(prev_run_dir / SNAPSHOT_FILENAME).write_text(
|
||||
"{ not valid json", encoding="utf-8"
|
||||
)
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
with pytest.raises(SystemExit) as ei:
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=_mdx_text(),
|
||||
)
|
||||
assert ei.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "snapshot_corrupt_json" in err
|
||||
assert SNAPSHOT_FILENAME in err
|
||||
assert "JSONDecodeError" in err
|
||||
|
||||
|
||||
# -- mdx_sha256_mismatch axis (own surface) -------------------------------
|
||||
|
||||
|
||||
def test_mdx_sha256_mismatch_aborts_with_own_axis(
|
||||
tmp_path: Path, monkeypatch, capsys
|
||||
):
|
||||
"""Distinct from generic snapshot_validation_failed — operator
|
||||
must be able to tell 'wrong --mdx-path for this prev_run_id' apart
|
||||
from 'snapshot file is broken'."""
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "prev_run_diff_mdx"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
new_run_dir = tmp_path / "new" / "phase_z2"
|
||||
|
||||
text_a = "# Slide A\n"
|
||||
text_b = "# Slide B (different bytes)\n"
|
||||
snap = _build_canonical_snapshot(mdx_source_text=text_a)
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
with pytest.raises(SystemExit) as ei:
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=text_b,
|
||||
)
|
||||
assert ei.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "mdx_sha256_mismatch" in err
|
||||
# Must NOT be reported as generic snapshot_validation_failed —
|
||||
# the mdx-sha case has its own axis.
|
||||
assert "snapshot_validation_failed" not in err
|
||||
assert "mdx_source_text" in err or "mdx_sha256" in err
|
||||
|
||||
|
||||
# -- snapshot_validation_failed axis --------------------------------------
|
||||
|
||||
|
||||
def test_snapshot_validation_failed_schema_version_aborts(
|
||||
tmp_path: Path, monkeypatch, capsys
|
||||
):
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "prev_run_schema_mismatch"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
new_run_dir = tmp_path / "new" / "phase_z2"
|
||||
|
||||
text = _mdx_text()
|
||||
snap = _build_canonical_snapshot(mdx_source_text=text)
|
||||
snap["schema_version"] = SNAPSHOT_VERSION + 1
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
with pytest.raises(SystemExit) as ei:
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=text,
|
||||
)
|
||||
assert ei.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "snapshot_validation_failed" in err
|
||||
assert "schema_version" in err
|
||||
# NOT the mdx-sha axis — separate fingerprint.
|
||||
assert "mdx_sha256_mismatch" not in err
|
||||
|
||||
|
||||
def test_snapshot_validation_failed_missing_required_key_aborts(
|
||||
tmp_path: Path, monkeypatch, capsys
|
||||
):
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "prev_run_missing_key"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
new_run_dir = tmp_path / "new" / "phase_z2"
|
||||
|
||||
text = _mdx_text()
|
||||
snap = _build_canonical_snapshot(mdx_source_text=text)
|
||||
del snap["units"]
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
with pytest.raises(SystemExit) as ei:
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=text,
|
||||
)
|
||||
assert ei.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "snapshot_validation_failed" in err
|
||||
assert "units" in err
|
||||
|
||||
|
||||
# -- reuse_copy_os_error axis (OSError != FileNotFoundError) -------------
|
||||
|
||||
|
||||
def test_copy_os_error_aborts_with_own_axis(
|
||||
tmp_path: Path, monkeypatch, capsys
|
||||
):
|
||||
"""Codex #6 stage_3_edit fixup — OSError raised inside
|
||||
``_copy_reuse_artifacts_from_prev_run`` (e.g. PermissionError on
|
||||
the destination, OSError(errno.EXDEV) on cross-device copy) must
|
||||
translate to fail-closed (stderr + SystemExit(2)) instead of
|
||||
escaping as a raw traceback.
|
||||
|
||||
Implementation must catch ``FileNotFoundError`` BEFORE the bare
|
||||
``OSError`` handler (FNF is a subclass of OSError), otherwise the
|
||||
missing-artifact case would be mis-bucketed under
|
||||
``reuse_copy_os_error``.
|
||||
"""
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "prev_run_perm_denied"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
new_run_dir = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
def _raise_perm(src, dst, *args, **kwargs):
|
||||
raise PermissionError(f"simulated permission denied: {dst}")
|
||||
|
||||
monkeypatch.setattr(_pz2.shutil, "copyfile", _raise_perm)
|
||||
|
||||
with pytest.raises(SystemExit) as ei:
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=_mdx_text(),
|
||||
)
|
||||
assert ei.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "reuse_copy_os_error" in err
|
||||
assert "value:" in err
|
||||
assert "path:" in err
|
||||
assert "upstream:" in err
|
||||
assert "reason:" in err
|
||||
assert "PermissionError" in err
|
||||
assert "simulated permission denied" in err
|
||||
# Must NOT be mis-bucketed as the missing-artifact case.
|
||||
assert "reuse_artifact_missing" not in err
|
||||
|
||||
|
||||
def test_copy_filenotfounderror_still_uses_artifact_missing_axis(
|
||||
tmp_path: Path, monkeypatch, capsys
|
||||
):
|
||||
"""Subclass ordering regression guard — ``FileNotFoundError`` IS an
|
||||
``OSError`` subclass. If the bare-OSError handler ever moves above
|
||||
the FNF handler, the missing-artifact case would be mis-bucketed
|
||||
under ``reuse_copy_os_error``; this test pins the dispatch.
|
||||
"""
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "prev_run_fnf_ordering"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
new_run_dir = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
(prev_run_dir / "steps" / "step05_v4_evidence.json").unlink()
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=_mdx_text(),
|
||||
)
|
||||
err = capsys.readouterr().err
|
||||
assert "reuse_artifact_missing" in err
|
||||
assert "reuse_copy_os_error" not in err
|
||||
|
||||
|
||||
# -- snapshot_read_os_error axis (OSError != FileNotFoundError) ----------
|
||||
|
||||
|
||||
def test_snapshot_read_os_error_aborts_with_own_axis(
|
||||
tmp_path: Path, monkeypatch, capsys
|
||||
):
|
||||
"""OSError raised inside ``_load_and_validate_reuse_snapshot``
|
||||
(e.g. PermissionError on ``Path.read_text``, IsADirectoryError if
|
||||
the snapshot path resolves to a directory after copy) must
|
||||
translate to fail-closed instead of escaping as a raw traceback.
|
||||
"""
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "prev_run_snapshot_perm"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
new_run_dir = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
def _raise_perm(*args, **kwargs):
|
||||
raise PermissionError("simulated read denied on snapshot")
|
||||
|
||||
monkeypatch.setattr(
|
||||
_pz2, "_load_and_validate_reuse_snapshot", _raise_perm
|
||||
)
|
||||
|
||||
with pytest.raises(SystemExit) as ei:
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=_mdx_text(),
|
||||
)
|
||||
assert ei.value.code == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "snapshot_read_os_error" in err
|
||||
assert "value:" in err
|
||||
assert "path:" in err
|
||||
assert "upstream:" in err
|
||||
assert "reason:" in err
|
||||
assert "PermissionError" in err
|
||||
assert "simulated read denied on snapshot" in err
|
||||
# Must NOT be mis-bucketed as missing-after-copy or corrupt-json.
|
||||
assert "snapshot_missing_after_copy" not in err
|
||||
assert "snapshot_corrupt_json" not in err
|
||||
|
||||
|
||||
def test_snapshot_filenotfounderror_still_uses_missing_after_copy_axis(
|
||||
tmp_path: Path, monkeypatch, capsys
|
||||
):
|
||||
"""Subclass ordering regression guard for the load surface — FNF
|
||||
must keep its own ``snapshot_missing_after_copy`` axis even though
|
||||
the new bare-OSError branch sits below it.
|
||||
"""
|
||||
runs_root = tmp_path / "runs"
|
||||
prev_run_id = "prev_run_load_fnf_ordering"
|
||||
prev_run_dir = runs_root / prev_run_id / "phase_z2"
|
||||
new_run_dir = tmp_path / "new" / "phase_z2"
|
||||
snap = _build_canonical_snapshot()
|
||||
_seed_prev_run_dir(prev_run_dir, snapshot=snap)
|
||||
monkeypatch.setattr(_pz2, "RUNS_DIR", runs_root)
|
||||
|
||||
def _raise_fnf(*args, **kwargs):
|
||||
raise FileNotFoundError("simulated FNF on snapshot read")
|
||||
|
||||
monkeypatch.setattr(
|
||||
_pz2, "_load_and_validate_reuse_snapshot", _raise_fnf
|
||||
)
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
_pz2.execute_reuse_from_or_fail_closed(
|
||||
reuse_from=prev_run_id,
|
||||
new_run_dir=new_run_dir,
|
||||
mdx_source_text=_mdx_text(),
|
||||
)
|
||||
err = capsys.readouterr().err
|
||||
assert "snapshot_missing_after_copy" in err
|
||||
assert "snapshot_read_os_error" not in err
|
||||
|
||||
|
||||
# -- module surface anchor ------------------------------------------------
|
||||
|
||||
|
||||
def test_pipeline_exposes_u4b_surface():
|
||||
"""u5 wires ``execute_reuse_from_or_fail_closed`` into the entry
|
||||
point — the public callable + the closed-axis vocabulary must
|
||||
remain module-level attributes."""
|
||||
for name in (
|
||||
"execute_reuse_from_or_fail_closed",
|
||||
"_abort_reuse_from",
|
||||
"_paths_equivalent",
|
||||
"REUSE_FAIL_CLOSED_AXES",
|
||||
):
|
||||
assert hasattr(_pz2, name), f"u4b surface missing: {name}"
|
||||
|
||||
|
||||
def test_pipeline_run_signature_reuse_from_threaded_after_u5():
|
||||
"""u5 has now threaded ``reuse_from`` into ``run_phase_z2_mvp1`` as
|
||||
a keyword-only parameter with default ``None``. The previous
|
||||
``until_u5`` lock has flipped — this forward-direction lock
|
||||
ensures the kwarg never silently drifts (positional promotion,
|
||||
default change to a string, kind change). Mirror of the
|
||||
equivalent lock in test_phase_z2_reuse_from_entry.py and
|
||||
test_phase_z2_cli_reuse_from.py — kept in this file too so the
|
||||
fail-closed regression suite is self-contained."""
|
||||
import inspect
|
||||
|
||||
sig = inspect.signature(_pz2.run_phase_z2_mvp1)
|
||||
assert "reuse_from" in sig.parameters, (
|
||||
"u5 must thread reuse_from into run_phase_z2_mvp1 — kwarg missing. "
|
||||
f"current params: {list(sig.parameters)}"
|
||||
)
|
||||
param = sig.parameters["reuse_from"]
|
||||
assert param.kind is inspect.Parameter.KEYWORD_ONLY, (
|
||||
f"reuse_from must be keyword-only (after the ``*`` barrier); "
|
||||
f"got kind={param.kind}"
|
||||
)
|
||||
assert param.default is None, (
|
||||
f"reuse_from must default to None to preserve pre-u5 behaviour; "
|
||||
f"got default={param.default!r}"
|
||||
)
|
||||
493
tests/test_phase_z2_reuse_snapshot.py
Normal file
493
tests/test_phase_z2_reuse_snapshot.py
Normal file
@@ -0,0 +1,493 @@
|
||||
"""IMP-43 (#72) u2 — unit tests for ``src.phase_z2_reuse_snapshot``.
|
||||
|
||||
Scope mirror of the production module (Stage 2 u2):
|
||||
|
||||
* ``build_snapshot`` shape, provenance, JSON round-trip, required keys.
|
||||
* ``serialize_section`` / ``serialize_unit`` field preservation, including
|
||||
the duck-typed ``v4_candidates`` shape (template_id / frame_id /
|
||||
frame_number / confidence / label).
|
||||
* ``validate_snapshot`` fail-closed paths: non-dict input, schema
|
||||
version mismatch, missing/empty/non-string ``mdx_sha256``, sha
|
||||
mismatch, missing required keys, unwrapped wrapper, wrapper missing
|
||||
a provenance field.
|
||||
* Module-level constants exposed for u3 / u4 / u4b consumers.
|
||||
|
||||
The tests use synthetic duck-typed dataclasses so the snapshot module's
|
||||
external surface is exercised without coupling to the production
|
||||
``MdxSection`` / ``CompositionUnit`` / ``V4Match`` dataclass layouts.
|
||||
That mirrors the production module's intentional duck-typing (no
|
||||
imports from ``phase_z2_pipeline`` / ``phase_z2_composition``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from src.phase_z2_reuse_snapshot import (
|
||||
REQUIRED_TOP_LEVEL_KEYS,
|
||||
SNAPSHOT_FILENAME,
|
||||
SNAPSHOT_VERSION,
|
||||
SnapshotValidationError,
|
||||
build_snapshot,
|
||||
serialize_section,
|
||||
serialize_unit,
|
||||
validate_snapshot,
|
||||
)
|
||||
|
||||
|
||||
# -- synthetic duck-typed inputs ------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Section:
|
||||
section_id: str
|
||||
section_num: int
|
||||
title: str
|
||||
raw_content: str
|
||||
heading_number: Optional[str] = None
|
||||
v4_alias_keys: list = field(default_factory=list)
|
||||
sub_sections: list = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _V4Candidate:
|
||||
template_id: str
|
||||
frame_id: str
|
||||
frame_number: int
|
||||
confidence: float
|
||||
label: str
|
||||
v4_rank: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Unit:
|
||||
source_section_ids: list
|
||||
merge_type: str
|
||||
frame_template_id: str
|
||||
frame_id: str
|
||||
frame_number: int
|
||||
confidence: float
|
||||
label: str
|
||||
phase_z_status: str
|
||||
raw_content: str
|
||||
title: str
|
||||
score: float
|
||||
v4_rank: Optional[int] = 1
|
||||
selection_path: str = "rank_1"
|
||||
fallback_reason: Optional[str] = None
|
||||
rationale: dict = field(default_factory=dict)
|
||||
auto_selectable: bool = True
|
||||
filter_reasons: list = field(default_factory=list)
|
||||
notes: list = field(default_factory=list)
|
||||
v4_candidates: list = field(default_factory=list)
|
||||
provisional: bool = False
|
||||
|
||||
|
||||
def _make_section(**overrides: Any) -> _Section:
|
||||
base = dict(
|
||||
section_id="03-1",
|
||||
section_num=1,
|
||||
title="DX status",
|
||||
raw_content="- bullet one\n- bullet two",
|
||||
)
|
||||
base.update(overrides)
|
||||
return _Section(**base)
|
||||
|
||||
|
||||
def _make_unit(**overrides: Any) -> _Unit:
|
||||
cand = _V4Candidate(
|
||||
template_id="tpl_a",
|
||||
frame_id="fid_a",
|
||||
frame_number=13,
|
||||
confidence=0.91,
|
||||
label="use_as_is",
|
||||
)
|
||||
base: dict[str, Any] = dict(
|
||||
source_section_ids=["03-1"],
|
||||
merge_type="single",
|
||||
frame_template_id="tpl_a",
|
||||
frame_id="fid_a",
|
||||
frame_number=13,
|
||||
confidence=0.91,
|
||||
label="use_as_is",
|
||||
phase_z_status="auto_renderable",
|
||||
raw_content="- bullet one\n- bullet two",
|
||||
title="DX status",
|
||||
score=0.91,
|
||||
v4_candidates=[cand],
|
||||
)
|
||||
base.update(overrides)
|
||||
return _Unit(**base)
|
||||
|
||||
|
||||
def _make_build_kwargs(**overrides: Any) -> dict[str, Any]:
|
||||
kwargs: dict[str, Any] = dict(
|
||||
mdx_sha256="a" * 64,
|
||||
slide_title="Title",
|
||||
slide_footer="Footer",
|
||||
sections=[_make_section()],
|
||||
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
|
||||
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
|
||||
v4_evidence=[{"section_id": "03-1", "v4_candidates": []}],
|
||||
layout_preset_pre_override="horizontal-2",
|
||||
units=[_make_unit()],
|
||||
comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
|
||||
v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
|
||||
ai_preflight={"enabled": False, "skipped": True},
|
||||
)
|
||||
kwargs.update(overrides)
|
||||
return kwargs
|
||||
|
||||
|
||||
# -- module constants -----------------------------------------------------
|
||||
|
||||
|
||||
def test_snapshot_filename_constant():
|
||||
assert SNAPSHOT_FILENAME == "_reuse_snapshot.json"
|
||||
|
||||
|
||||
def test_snapshot_version_is_positive_int():
|
||||
assert isinstance(SNAPSHOT_VERSION, int)
|
||||
assert SNAPSHOT_VERSION >= 1
|
||||
|
||||
|
||||
def test_required_keys_include_contract_and_payload():
|
||||
# Bare contract / integrity keys.
|
||||
assert "schema_version" in REQUIRED_TOP_LEVEL_KEYS
|
||||
assert "mdx_sha256" in REQUIRED_TOP_LEVEL_KEYS
|
||||
# Payload axes per Stage 2 plan.
|
||||
for k in (
|
||||
"slide_title",
|
||||
"slide_footer",
|
||||
"sections",
|
||||
"stage0_adapter_diagnostics",
|
||||
"stage0_normalized_assets",
|
||||
"v4_evidence",
|
||||
"layout_preset_pre_override",
|
||||
"units",
|
||||
"comp_debug",
|
||||
"v4_fallback_traces",
|
||||
"ai_preflight",
|
||||
):
|
||||
assert k in REQUIRED_TOP_LEVEL_KEYS, f"missing from REQUIRED_TOP_LEVEL_KEYS: {k}"
|
||||
|
||||
|
||||
# -- build_snapshot -------------------------------------------------------
|
||||
|
||||
|
||||
def test_build_snapshot_round_trips_through_json():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
payload = json.dumps(snap)
|
||||
loaded = json.loads(payload)
|
||||
assert loaded["schema_version"] == SNAPSHOT_VERSION
|
||||
assert loaded["mdx_sha256"] == "a" * 64
|
||||
|
||||
|
||||
def test_build_snapshot_has_all_required_keys():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
for key in REQUIRED_TOP_LEVEL_KEYS:
|
||||
assert key in snap, f"build_snapshot missing required key: {key}"
|
||||
|
||||
|
||||
def test_build_snapshot_bare_keys_are_unwrapped_scalars():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
assert snap["schema_version"] == SNAPSHOT_VERSION
|
||||
assert snap["mdx_sha256"] == "a" * 64
|
||||
# bare keys MUST NOT be wrapped — u4b mdx_sha256 check reads directly.
|
||||
assert not isinstance(snap["schema_version"], dict)
|
||||
assert not isinstance(snap["mdx_sha256"], dict)
|
||||
|
||||
|
||||
def test_build_snapshot_provenance_wrapper_shape():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
bare = {"schema_version", "mdx_sha256"}
|
||||
for key, entry in snap.items():
|
||||
if key in bare:
|
||||
continue
|
||||
assert isinstance(entry, dict), f"{key} is not wrapped"
|
||||
assert set(entry.keys()) == {"value", "source_path", "upstream_step"}, key
|
||||
assert isinstance(entry["source_path"], str) and entry["source_path"]
|
||||
assert isinstance(entry["upstream_step"], str)
|
||||
assert entry["upstream_step"].startswith("step"), entry["upstream_step"]
|
||||
|
||||
|
||||
def test_build_snapshot_upstream_steps_stay_inside_reuse_boundary():
|
||||
"""No ``upstream_step`` may point outside the Step 0/2/5/6 reuse
|
||||
boundary (Stage 1 root_cause). A drift to e.g. ``step09`` would
|
||||
silently invite work outside the reuse window — fail loudly.
|
||||
|
||||
Step 01's contribution is the ``mdx_sha256`` integrity key (a bare
|
||||
contract scalar with no wrapper) so step01 does not need to appear
|
||||
in payload provenance.
|
||||
"""
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
allowed = {"step00", "step02", "step05", "step06"}
|
||||
for key, entry in snap.items():
|
||||
if key in {"schema_version", "mdx_sha256"}:
|
||||
continue
|
||||
assert entry["upstream_step"] in allowed, (
|
||||
f"key {key!r}: upstream_step {entry['upstream_step']!r} outside reuse boundary"
|
||||
)
|
||||
|
||||
|
||||
def test_build_snapshot_units_carry_v4_candidates():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
units = snap["units"]["value"]
|
||||
assert len(units) == 1
|
||||
assert units[0]["v4_candidates"][0]["template_id"] == "tpl_a"
|
||||
assert units[0]["v4_candidates"][0]["frame_number"] == 13
|
||||
assert units[0]["v4_candidates"][0]["confidence"] == pytest.approx(0.91)
|
||||
|
||||
|
||||
def test_build_snapshot_sections_preserve_alias_keys_and_subsections():
|
||||
sec = _make_section(
|
||||
section_id="04-2",
|
||||
v4_alias_keys=["04-2.1"],
|
||||
sub_sections=[{"id": "04-2-sub-1"}],
|
||||
heading_number="2.1",
|
||||
)
|
||||
snap = build_snapshot(**_make_build_kwargs(sections=[sec]))
|
||||
payload = snap["sections"]["value"]
|
||||
assert payload[0]["section_id"] == "04-2"
|
||||
assert payload[0]["v4_alias_keys"] == ["04-2.1"]
|
||||
assert payload[0]["sub_sections"] == [{"id": "04-2-sub-1"}]
|
||||
assert payload[0]["heading_number"] == "2.1"
|
||||
|
||||
|
||||
def test_build_snapshot_units_provenance_points_at_step06():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
assert "step06_composition_plan.json" in snap["units"]["source_path"]
|
||||
assert snap["units"]["upstream_step"] == "step06"
|
||||
|
||||
|
||||
def test_build_snapshot_v4_evidence_provenance_points_at_step05():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
assert "step05_v4_evidence.json" in snap["v4_evidence"]["source_path"]
|
||||
assert snap["v4_evidence"]["upstream_step"] == "step05"
|
||||
|
||||
|
||||
def test_build_snapshot_ai_preflight_provenance_points_at_step00():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
assert "step00_preconditions.json" in snap["ai_preflight"]["source_path"]
|
||||
assert snap["ai_preflight"]["upstream_step"] == "step00"
|
||||
|
||||
|
||||
def test_build_snapshot_rejects_unjsonable_input():
|
||||
bad_unit = _make_unit()
|
||||
bad_unit.notes.append(object()) # not JSON-safe
|
||||
with pytest.raises(TypeError):
|
||||
build_snapshot(**_make_build_kwargs(units=[bad_unit]))
|
||||
|
||||
|
||||
def test_build_snapshot_handles_none_optional_fields():
|
||||
snap = build_snapshot(
|
||||
**_make_build_kwargs(
|
||||
slide_title=None,
|
||||
slide_footer=None,
|
||||
stage0_adapter_diagnostics=None,
|
||||
stage0_normalized_assets=None,
|
||||
comp_debug=None,
|
||||
v4_fallback_traces=None,
|
||||
ai_preflight=None,
|
||||
)
|
||||
)
|
||||
# None inputs land as None / {} consistently — never raise.
|
||||
assert snap["slide_title"]["value"] is None
|
||||
assert snap["slide_footer"]["value"] is None
|
||||
assert snap["stage0_adapter_diagnostics"]["value"] == {}
|
||||
assert snap["stage0_normalized_assets"]["value"] == {}
|
||||
assert snap["comp_debug"]["value"] == {}
|
||||
assert snap["v4_fallback_traces"]["value"] == {}
|
||||
assert snap["ai_preflight"]["value"] == {}
|
||||
|
||||
|
||||
# -- serializer helpers ---------------------------------------------------
|
||||
|
||||
|
||||
def test_serialize_section_preserves_all_documented_fields():
|
||||
sec = _make_section(
|
||||
heading_number="1.1",
|
||||
v4_alias_keys=["03-1.x"],
|
||||
sub_sections=[{"id": "s"}],
|
||||
)
|
||||
out = serialize_section(sec)
|
||||
assert out["section_id"] == "03-1"
|
||||
assert out["section_num"] == 1
|
||||
assert out["title"] == "DX status"
|
||||
assert out["raw_content"].startswith("- bullet")
|
||||
assert out["heading_number"] == "1.1"
|
||||
assert out["v4_alias_keys"] == ["03-1.x"]
|
||||
assert out["sub_sections"] == [{"id": "s"}]
|
||||
|
||||
|
||||
def test_serialize_section_works_with_missing_optional_attrs():
|
||||
class _Minimal:
|
||||
section_id = "x"
|
||||
section_num = 0
|
||||
title = "t"
|
||||
raw_content = "r"
|
||||
out = serialize_section(_Minimal())
|
||||
assert out["heading_number"] is None
|
||||
assert out["v4_alias_keys"] == []
|
||||
assert out["sub_sections"] == []
|
||||
|
||||
|
||||
def test_serialize_unit_v4_candidates_unwrap_to_named_attrs():
|
||||
unit = _make_unit()
|
||||
out = serialize_unit(unit)
|
||||
cand = out["v4_candidates"][0]
|
||||
assert cand == {
|
||||
"template_id": "tpl_a",
|
||||
"frame_id": "fid_a",
|
||||
"frame_number": 13,
|
||||
"confidence": pytest.approx(0.91),
|
||||
"label": "use_as_is",
|
||||
# u4 follow-up — Step 9 application-plan payload reads
|
||||
# ``c.v4_rank`` off each rehydrated candidate. Snapshot
|
||||
# serializer persists it via ``getattr(c, 'v4_rank', None)`` so
|
||||
# legacy duck types (no v4_rank attr) get None and modern V4Match
|
||||
# instances carry their rank (1/2/3/...).
|
||||
"v4_rank": None,
|
||||
}
|
||||
|
||||
|
||||
def test_serialize_unit_v4_candidates_persist_v4_rank_when_present():
|
||||
"""A v4_candidate with v4_rank=2 (V4Match-shape duck type) round-trips."""
|
||||
ranked_cand = _V4Candidate(
|
||||
template_id="tpl_b",
|
||||
frame_id="fid_b",
|
||||
frame_number=14,
|
||||
confidence=0.82,
|
||||
label="light_edit",
|
||||
v4_rank=2,
|
||||
)
|
||||
unit = _make_unit(v4_candidates=[ranked_cand])
|
||||
out = serialize_unit(unit)
|
||||
assert out["v4_candidates"][0]["v4_rank"] == 2
|
||||
|
||||
|
||||
def test_serialize_unit_handles_empty_v4_candidates():
|
||||
unit = _make_unit(v4_candidates=[])
|
||||
out = serialize_unit(unit)
|
||||
assert out["v4_candidates"] == []
|
||||
|
||||
|
||||
def test_serialize_unit_provisional_default_false():
|
||||
unit = _make_unit()
|
||||
assert serialize_unit(unit)["provisional"] is False
|
||||
|
||||
|
||||
def test_serialize_unit_provisional_true_preserved():
|
||||
unit = _make_unit(provisional=True)
|
||||
assert serialize_unit(unit)["provisional"] is True
|
||||
|
||||
|
||||
def test_serialize_unit_round_trips_through_json():
|
||||
out = serialize_unit(_make_unit())
|
||||
reloaded = json.loads(json.dumps(out))
|
||||
assert reloaded["source_section_ids"] == ["03-1"]
|
||||
assert reloaded["frame_template_id"] == "tpl_a"
|
||||
|
||||
|
||||
# -- validate_snapshot ----------------------------------------------------
|
||||
|
||||
|
||||
def test_validate_snapshot_accepts_well_formed():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
||||
|
||||
|
||||
def test_validate_snapshot_rejects_non_dict_input():
|
||||
with pytest.raises(SnapshotValidationError):
|
||||
validate_snapshot("not a dict", expected_mdx_sha256="a" * 64)
|
||||
|
||||
|
||||
def test_validate_snapshot_rejects_version_mismatch():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
snap["schema_version"] = SNAPSHOT_VERSION + 999
|
||||
with pytest.raises(SnapshotValidationError) as exc:
|
||||
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
||||
assert "schema_version" in str(exc.value)
|
||||
|
||||
|
||||
def test_validate_snapshot_rejects_missing_sha():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
del snap["mdx_sha256"]
|
||||
with pytest.raises(SnapshotValidationError) as exc:
|
||||
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
||||
assert "mdx_sha256" in str(exc.value)
|
||||
|
||||
|
||||
def test_validate_snapshot_rejects_empty_sha():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
snap["mdx_sha256"] = ""
|
||||
with pytest.raises(SnapshotValidationError) as exc:
|
||||
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
||||
assert "mdx_sha256" in str(exc.value)
|
||||
|
||||
|
||||
def test_validate_snapshot_rejects_non_string_sha():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
snap["mdx_sha256"] = 12345
|
||||
with pytest.raises(SnapshotValidationError) as exc:
|
||||
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
||||
assert "mdx_sha256" in str(exc.value)
|
||||
|
||||
|
||||
def test_validate_snapshot_rejects_sha_mismatch():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
with pytest.raises(SnapshotValidationError) as exc:
|
||||
validate_snapshot(snap, expected_mdx_sha256="b" * 64)
|
||||
assert "mdx_sha256 mismatch" in str(exc.value)
|
||||
|
||||
|
||||
def test_validate_snapshot_rejects_missing_required_key():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
del snap["units"]
|
||||
with pytest.raises(SnapshotValidationError) as exc:
|
||||
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
||||
assert "units" in str(exc.value)
|
||||
|
||||
|
||||
def test_validate_snapshot_rejects_unwrapped_payload_key():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
snap["units"] = "not a dict"
|
||||
with pytest.raises(SnapshotValidationError) as exc:
|
||||
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
||||
assert "units" in str(exc.value)
|
||||
|
||||
|
||||
def test_validate_snapshot_rejects_wrapper_missing_value():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
snap["units"] = {"source_path": "x", "upstream_step": "step06"}
|
||||
with pytest.raises(SnapshotValidationError) as exc:
|
||||
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
||||
assert "value" in str(exc.value)
|
||||
|
||||
|
||||
def test_validate_snapshot_rejects_wrapper_missing_source_path():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
snap["units"] = {"value": [], "upstream_step": "step06"}
|
||||
with pytest.raises(SnapshotValidationError) as exc:
|
||||
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
||||
assert "source_path" in str(exc.value)
|
||||
|
||||
|
||||
def test_validate_snapshot_rejects_wrapper_missing_upstream_step():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
snap["units"] = {"value": [], "source_path": "x"}
|
||||
with pytest.raises(SnapshotValidationError) as exc:
|
||||
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
||||
assert "upstream_step" in str(exc.value)
|
||||
|
||||
|
||||
def test_validate_snapshot_error_subclasses_value_error():
|
||||
snap = build_snapshot(**_make_build_kwargs())
|
||||
snap["schema_version"] = 999
|
||||
# u4b will pre-catch SnapshotValidationError, but the broader
|
||||
# `except ValueError` net must still pick this up.
|
||||
with pytest.raises(ValueError):
|
||||
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
||||
282
tests/test_phase_z2_reuse_snapshot_write.py
Normal file
282
tests/test_phase_z2_reuse_snapshot_write.py
Normal file
@@ -0,0 +1,282 @@
|
||||
"""IMP-43 (#72) u3 — focused tests for the Step 6 reuse snapshot writer.
|
||||
|
||||
u3 scope (per the Stage 2 Exit Report):
|
||||
|
||||
- ``_write_reuse_snapshot`` writes ``run_dir/_reuse_snapshot.json`` *after*
|
||||
the Step 6 artifact lands; failure WARNS and CONTINUES (the helper does
|
||||
NOT raise out of the main pipeline run).
|
||||
- The Step 6 artifact data dict records the run_dir-relative sidecar path
|
||||
as ``data.reuse_snapshot_path`` (additive informational field, always
|
||||
set to ``SNAPSHOT_FILENAME`` regardless of write success — u4 will
|
||||
fail-closed on missing / invalid sidecar via u2's ``validate_snapshot``).
|
||||
|
||||
The helper is tested in isolation (no full pipeline run) — pipeline call
|
||||
site presence is asserted structurally so we exercise behaviour without
|
||||
re-running Step 0~6 inside the test process. End-to-end equivalence under
|
||||
``--reuse-from`` is u7a / u7b scope.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
import pytest
|
||||
|
||||
import src.phase_z2_pipeline as _pz2
|
||||
from src.phase_z2_reuse_snapshot import (
|
||||
SNAPSHOT_FILENAME,
|
||||
SNAPSHOT_VERSION,
|
||||
SnapshotValidationError,
|
||||
validate_snapshot,
|
||||
)
|
||||
|
||||
|
||||
# -- synthetic duck-typed inputs ------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Section:
|
||||
section_id: str
|
||||
section_num: int
|
||||
title: str
|
||||
raw_content: str
|
||||
heading_number: Optional[str] = None
|
||||
v4_alias_keys: list = field(default_factory=list)
|
||||
sub_sections: list = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _V4Candidate:
|
||||
template_id: str
|
||||
frame_id: str
|
||||
frame_number: int
|
||||
confidence: float
|
||||
label: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Unit:
|
||||
source_section_ids: list
|
||||
merge_type: str
|
||||
frame_template_id: str
|
||||
frame_id: str
|
||||
frame_number: int
|
||||
confidence: float
|
||||
label: str
|
||||
phase_z_status: str
|
||||
raw_content: str
|
||||
title: str
|
||||
score: float
|
||||
v4_rank: Optional[int] = 1
|
||||
selection_path: str = "rank_1"
|
||||
fallback_reason: Optional[str] = None
|
||||
rationale: dict = field(default_factory=dict)
|
||||
auto_selectable: bool = True
|
||||
filter_reasons: list = field(default_factory=list)
|
||||
notes: list = field(default_factory=list)
|
||||
v4_candidates: list = field(default_factory=list)
|
||||
provisional: bool = False
|
||||
|
||||
|
||||
def _make_kwargs(**overrides: Any) -> dict[str, Any]:
|
||||
cand = _V4Candidate(
|
||||
template_id="tpl_a",
|
||||
frame_id="fid_a",
|
||||
frame_number=13,
|
||||
confidence=0.91,
|
||||
label="use_as_is",
|
||||
)
|
||||
section = _Section(
|
||||
section_id="03-1",
|
||||
section_num=1,
|
||||
title="DX status",
|
||||
raw_content="- bullet one\n- bullet two",
|
||||
)
|
||||
unit = _Unit(
|
||||
source_section_ids=["03-1"],
|
||||
merge_type="single",
|
||||
frame_template_id="tpl_a",
|
||||
frame_id="fid_a",
|
||||
frame_number=13,
|
||||
confidence=0.91,
|
||||
label="use_as_is",
|
||||
phase_z_status="auto_renderable",
|
||||
raw_content="- bullet one\n- bullet two",
|
||||
title="DX status",
|
||||
score=0.91,
|
||||
v4_candidates=[cand],
|
||||
)
|
||||
kwargs: dict[str, Any] = dict(
|
||||
mdx_source_text="# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n",
|
||||
slide_title="Slide",
|
||||
slide_footer=None,
|
||||
sections=[section],
|
||||
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
|
||||
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
|
||||
v4_evidence=[
|
||||
{
|
||||
"section_id": "03-1",
|
||||
"v4_candidates": [
|
||||
{
|
||||
"template_id": "tpl_a",
|
||||
"frame_id": "fid_a",
|
||||
"frame_number": 13,
|
||||
"confidence": 0.91,
|
||||
"label": "use_as_is",
|
||||
}
|
||||
],
|
||||
"candidate_status": "ok",
|
||||
}
|
||||
],
|
||||
layout_preset_pre_override="single",
|
||||
units=[unit],
|
||||
comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
|
||||
v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
|
||||
ai_preflight={"enabled": False, "skipped": True},
|
||||
)
|
||||
kwargs.update(overrides)
|
||||
return kwargs
|
||||
|
||||
|
||||
# -- success path ---------------------------------------------------------
|
||||
|
||||
|
||||
def test_writes_snapshot_file_at_run_dir_root(tmp_path: Path):
|
||||
rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
|
||||
assert rv == SNAPSHOT_FILENAME
|
||||
fpath = tmp_path / SNAPSHOT_FILENAME
|
||||
assert fpath.exists(), f"snapshot not written at {fpath}"
|
||||
|
||||
|
||||
def test_written_snapshot_validates(tmp_path: Path):
|
||||
kwargs = _make_kwargs()
|
||||
rv = _pz2._write_reuse_snapshot(tmp_path, **kwargs)
|
||||
assert rv == SNAPSHOT_FILENAME
|
||||
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
|
||||
|
||||
# mdx_sha256 is derived from mdx_source_text — recompute to verify
|
||||
# the helper is hashing the UTF-8 bytes of the same source we passed.
|
||||
import hashlib as _hl
|
||||
|
||||
expected_sha = _hl.sha256(
|
||||
kwargs["mdx_source_text"].encode("utf-8")
|
||||
).hexdigest()
|
||||
validate_snapshot(snap, expected_mdx_sha256=expected_sha)
|
||||
|
||||
|
||||
def test_snapshot_has_correct_schema_version(tmp_path: Path):
|
||||
_pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
|
||||
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
|
||||
assert snap["schema_version"] == SNAPSHOT_VERSION
|
||||
|
||||
|
||||
def test_snapshot_records_layout_preset_pre_override(tmp_path: Path):
|
||||
_pz2._write_reuse_snapshot(
|
||||
tmp_path, **_make_kwargs(layout_preset_pre_override="horizontal-2")
|
||||
)
|
||||
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
|
||||
assert snap["layout_preset_pre_override"]["value"] == "horizontal-2"
|
||||
|
||||
|
||||
def test_snapshot_is_utf8_encoded_with_non_ascii_content(tmp_path: Path):
|
||||
_pz2._write_reuse_snapshot(
|
||||
tmp_path,
|
||||
**_make_kwargs(
|
||||
slide_title="설계 방식의 왜곡",
|
||||
mdx_source_text="# 설계 방식\n\n- 한글 bullet\n",
|
||||
),
|
||||
)
|
||||
# ensure_ascii=False is intentional so Korean text round-trips
|
||||
# readable; if a future refactor drops it the bytes change but the
|
||||
# JSON still parses — we assert the file is decodable AS utf-8 and
|
||||
# the value survives the round trip.
|
||||
raw = (tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8")
|
||||
snap = json.loads(raw)
|
||||
assert snap["slide_title"]["value"] == "설계 방식의 왜곡"
|
||||
|
||||
|
||||
# -- failure path ---------------------------------------------------------
|
||||
|
||||
|
||||
def test_failure_warns_and_returns_none(tmp_path: Path, monkeypatch, capsys):
|
||||
"""When ``build_snapshot`` raises, the helper must NOT propagate the
|
||||
exception — it WARNS on stderr and returns ``None`` so the main
|
||||
pipeline run continues."""
|
||||
|
||||
def _boom(**_kwargs):
|
||||
raise RuntimeError("synthetic build failure")
|
||||
|
||||
monkeypatch.setattr(_pz2, "build_snapshot", _boom)
|
||||
|
||||
rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
|
||||
|
||||
assert rv is None
|
||||
captured = capsys.readouterr()
|
||||
assert "reuse-snapshot" in captured.err
|
||||
assert "WARN" in captured.err
|
||||
assert "RuntimeError" in captured.err
|
||||
# File MUST NOT exist on failure (no partial JSON on disk).
|
||||
assert not (tmp_path / SNAPSHOT_FILENAME).exists()
|
||||
|
||||
|
||||
def test_failure_on_unwritable_run_dir_warns_and_returns_none(
|
||||
tmp_path: Path, monkeypatch, capsys
|
||||
):
|
||||
"""Simulate disk write failure: helper warns + returns None, never
|
||||
raises out to the caller (Stage 2 guardrail: optional sidecar)."""
|
||||
nonexistent = tmp_path / "does" / "not" / "exist"
|
||||
# nonexistent.exists() is False — Path.write_text raises FileNotFoundError.
|
||||
|
||||
rv = _pz2._write_reuse_snapshot(nonexistent, **_make_kwargs())
|
||||
|
||||
assert rv is None
|
||||
captured = capsys.readouterr()
|
||||
assert "reuse-snapshot" in captured.err
|
||||
assert "WARN" in captured.err
|
||||
# FileNotFoundError specifically — sanity-check the type surfaces in
|
||||
# the warning so debugging is not blind.
|
||||
assert "FileNotFoundError" in captured.err
|
||||
|
||||
|
||||
# -- pipeline integration anchors -----------------------------------------
|
||||
|
||||
|
||||
def test_pipeline_imports_helper_and_constant():
|
||||
"""The pipeline module must expose the helper for the post-Step-6
|
||||
call site, and the constant must round-trip from the snapshot
|
||||
module (single source of truth)."""
|
||||
assert hasattr(_pz2, "_write_reuse_snapshot")
|
||||
assert callable(_pz2._write_reuse_snapshot)
|
||||
assert _pz2.SNAPSHOT_FILENAME == "_reuse_snapshot.json"
|
||||
|
||||
|
||||
def test_pipeline_call_site_follows_step06_artifact_write():
|
||||
"""Structural guard: the helper must be invoked AFTER the Step 6
|
||||
artifact write in ``run_phase_z2_mvp1`` so the sidecar lands next
|
||||
to ``steps/step06_composition_plan.json`` (Stage 2 spec)."""
|
||||
source = Path(_pz2.__file__).read_text(encoding="utf-8")
|
||||
# Locate the step06 artifact write call site by its locked name arg.
|
||||
step06_marker = '6, "composition_plan"'
|
||||
idx_step06 = source.find(step06_marker)
|
||||
assert idx_step06 != -1, "step06 artifact write call site missing"
|
||||
# The helper call must appear AFTER the step06 marker.
|
||||
idx_helper = source.find("_write_reuse_snapshot(", idx_step06)
|
||||
assert idx_helper != -1, "u3 helper call missing after step06 write"
|
||||
|
||||
|
||||
def test_pipeline_step06_artifact_data_records_snapshot_path():
|
||||
"""Structural guard: the Step 6 artifact data dict must include the
|
||||
``reuse_snapshot_path`` field so a future ``--reuse-from`` consumer
|
||||
can locate the expected sidecar via the canonical step artifact
|
||||
(Stage 2 spec — informational; absence of the file is u4's
|
||||
fail-closed concern)."""
|
||||
source = Path(_pz2.__file__).read_text(encoding="utf-8")
|
||||
step06_marker = '6, "composition_plan"'
|
||||
idx_step06 = source.find(step06_marker)
|
||||
assert idx_step06 != -1
|
||||
# Search a generous window after the marker for the field key.
|
||||
window = source[idx_step06 : idx_step06 + 8000]
|
||||
assert '"reuse_snapshot_path"' in window
|
||||
assert "SNAPSHOT_FILENAME" in window
|
||||
Reference in New Issue
Block a user