feat(#72): IMP-43 u1~u8 --reuse-from incremental rerun (Step 0/1/2/5/6 reuse + Step 7+ re-execute)
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 25s

u1 argparse --reuse-from PREV_RUN_ID + post-merge fail-closed guard (rejects
layout/zone_geometry/zone_section/image override axes by name; only
--override-frame is preserved).
u2 src/phase_z2_reuse_snapshot.py — JSON-only Step 6 snapshot with mdx_sha256
integrity key and {value, source_path, upstream_step} provenance per axis
(pickle forbidden per Stage 2 guardrail).
u3 _write_reuse_snapshot at the Step 6 boundary; soft-fails to stderr without
aborting the seed run.
u4 prev_run_dir RO copy of step00/01/02/05/06 + _reuse_snapshot.json into
new run_dir, state rehydration, reuse marker, frame-override application on
restored units, Step 7+ resume.
u4b fail-closed for missing prev_run_dir / missing/corrupt/invalid snapshot /
mdx_sha256 mismatch / accidental new==prev write, with value+path+upstream
diagnostics per axis.
u5 reuse_from Optional[str] threaded through run_phase_z2_mvp1 signature and
CLI dispatch; default None preserves byte-identical pre-IMP-43 behavior.
u6 Front /api/run optional reuseFromRunId forwarding (vite.config.ts +
designAgentApi.ts + run_pipeline_reuse_from.test.ts).
u7a fast CI equivalence (1 mdx × 1 layout × 2 frames); step13 whitelist =
run_id/timestamps/prev_run_id only. u7b 3 layouts × 3 mdx × 32 frames
sweep gated by pytest.mark.sweep (registered in pyproject.toml; default CI
must use -m 'not sweep').
u8 scripts/measure_reuse_savings.py argv-driven A/B/C harness with frame
pin self-discovery + seed-time exclusion; status board §8 TBD anchor
(issue-body 50-70% / 10-20s→3-8s claim explicitly unverified, not mirrored).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-24 22:44:27 +09:00
parent 8648a468d9
commit b4be6c1cd0
15 changed files with 5128 additions and 656 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,301 @@
"""IMP-43 (#72) u2 — Step 6 reuse snapshot schema (JSON-only).
Stage 2 plan (locked) — ``--reuse-from PREV_RUN_ID`` reuses the
Step 0 / 1 / 2 / 5 / 6 deterministic artifact subset plus the
in-memory state that downstream steps need but that the existing
``step02_normalized.json`` / ``step05_v4_evidence.json`` /
``step06_composition_plan.json`` artifacts do not capture in a
deserialize-ready form (e.g. ``CompositionUnit`` instances,
``comp_debug``, ``v4_fallback_traces`` raw map, pre-override
``layout_preset``). This module owns the schema for the additional
``_reuse_snapshot.json`` sidecar written next to ``step06_composition_plan.json``.
Scope (u2 only, Stage 2 unit split):
* Pure schema + serializers + validator. No file I/O.
* JSON-only — pickle is forbidden per Stage 2 guardrails.
* Provenance per top-level field: ``{value, source_path, upstream_step}``.
* ``mdx_sha256`` integrity key — ``--reuse-from`` must fail closed when
the prev run's MDX bytes don't match the current MDX bytes.
* ``schema_version`` — bumped on any non-additive shape change.
Out of scope (deferred to later units):
* Writing the snapshot into the run_dir (u3).
* Copy / restore on ``--reuse-from`` (u4).
* Fail-closed snapshot/path errors at restore time (u4b).
* Threading ``reuse_from`` through ``run_phase_z2_mvp1`` (u5).
"""
from __future__ import annotations
import json
from typing import Any, Optional
SNAPSHOT_VERSION = 1
SNAPSHOT_FILENAME = "_reuse_snapshot.json"
# Required top-level keys. Bare scalars (no provenance wrapper):
# - schema_version (contract key)
# - mdx_sha256 (integrity key)
# All other keys are wrapped {value, source_path, upstream_step}.
REQUIRED_TOP_LEVEL_KEYS: tuple[str, ...] = (
"schema_version",
"mdx_sha256",
"slide_title",
"slide_footer",
"sections",
"stage0_adapter_diagnostics",
"stage0_normalized_assets",
"v4_evidence",
"layout_preset_pre_override",
"units",
"comp_debug",
"v4_fallback_traces",
"ai_preflight",
)
_BARE_KEYS: frozenset[str] = frozenset({"schema_version", "mdx_sha256"})
def _wrap(value: Any, *, source_path: str, upstream_step: str) -> dict[str, Any]:
return {
"value": value,
"source_path": source_path,
"upstream_step": upstream_step,
}
def serialize_section(section: Any) -> dict[str, Any]:
"""Serialize an ``MdxSection``-shaped object into a JSON-safe dict.
Duck-typed: accepts the production ``MdxSection`` dataclass or any
object exposing the same attribute names. Preserves the subset of
fields needed to reconstruct downstream pipeline behavior on the
reuse path.
"""
return {
"section_id": section.section_id,
"section_num": section.section_num,
"title": section.title,
"raw_content": section.raw_content,
"heading_number": getattr(section, "heading_number", None),
"v4_alias_keys": list(getattr(section, "v4_alias_keys", []) or []),
"sub_sections": list(getattr(section, "sub_sections", []) or []),
}
def serialize_unit(unit: Any) -> dict[str, Any]:
"""Serialize a ``CompositionUnit``-shaped object into a JSON-safe dict.
``v4_candidates`` entries are V4Match-duck-typed per the
CompositionUnit docstring; each is unwrapped to its 6 named
attributes so the snapshot file does not pin V4Match's dataclass
layout. ``v4_rank`` is included so the reuse path's Step 9
application-plan payload (``_build_application_plan_unit``)
remains byte-equivalent to the full-rerun path — full rerun stamps
each candidate's rank via ``_v4_match_from_judgment`` (e.g. 1, 2,
3, …) and Step 9 surfaces it under ``v4_candidates[i].v4_rank``.
Persisting it here lets the rehydrated ``_RehydratedV4Candidate``
expose the same attribute end-to-end and avoids None drift in the
Step 13 equivalence comparison (u7a).
"""
return {
"source_section_ids": list(unit.source_section_ids),
"merge_type": unit.merge_type,
"frame_template_id": unit.frame_template_id,
"frame_id": unit.frame_id,
"frame_number": unit.frame_number,
"confidence": float(unit.confidence),
"label": unit.label,
"phase_z_status": unit.phase_z_status,
"raw_content": unit.raw_content,
"title": unit.title,
"v4_rank": unit.v4_rank,
"selection_path": unit.selection_path,
"fallback_reason": unit.fallback_reason,
"score": float(unit.score),
"rationale": dict(unit.rationale or {}),
"auto_selectable": bool(unit.auto_selectable),
"filter_reasons": list(unit.filter_reasons or []),
"notes": list(unit.notes or []),
"v4_candidates": [
{
"template_id": c.template_id,
"frame_id": c.frame_id,
"frame_number": c.frame_number,
"confidence": float(c.confidence),
"label": c.label,
"v4_rank": getattr(c, "v4_rank", None),
}
for c in (unit.v4_candidates or [])
],
"provisional": bool(getattr(unit, "provisional", False)),
}
def build_snapshot(
*,
mdx_sha256: str,
slide_title: Optional[str],
slide_footer: Optional[str],
sections: list,
stage0_adapter_diagnostics: Optional[dict],
stage0_normalized_assets: Optional[dict],
v4_evidence: list,
layout_preset_pre_override: Optional[str],
units: list,
comp_debug: Optional[dict],
v4_fallback_traces: Optional[dict],
ai_preflight: Optional[dict],
) -> dict[str, Any]:
"""Build a JSON-serializable Step 6 reuse snapshot with provenance.
Each top-level entry — except the two bare contract / integrity
keys (``schema_version``, ``mdx_sha256``) — is wrapped with
``{value, source_path, upstream_step}``.
The function calls ``json.dumps(snapshot)`` at the end to enforce
JSON-safety at build time: any latent non-JSON value (set, Path,
dataclass instance, etc.) raises ``TypeError`` at the call site,
not later at restore.
"""
snapshot: dict[str, Any] = {
"schema_version": SNAPSHOT_VERSION,
"mdx_sha256": mdx_sha256,
"slide_title": _wrap(
slide_title,
source_path="steps/step02_normalized.json#/slide_title",
upstream_step="step02",
),
"slide_footer": _wrap(
slide_footer,
source_path="steps/step02_normalized.json#/slide_footer",
upstream_step="step02",
),
"sections": _wrap(
[serialize_section(s) for s in sections],
source_path="steps/step02_normalized.json#/sections",
upstream_step="step02",
),
"stage0_adapter_diagnostics": _wrap(
dict(stage0_adapter_diagnostics or {}),
source_path="steps/step02_normalized.json#/stage0_adapter_diagnostics",
upstream_step="step02",
),
"stage0_normalized_assets": _wrap(
dict(stage0_normalized_assets or {}),
source_path="steps/step02_normalized.json#/stage0_normalized_assets",
upstream_step="step02",
),
"v4_evidence": _wrap(
list(v4_evidence or []),
source_path="steps/step05_v4_evidence.json#/evidence_per_section",
upstream_step="step05",
),
"layout_preset_pre_override": _wrap(
layout_preset_pre_override,
source_path="steps/step06_composition_plan.json#/layout_preset_decided",
upstream_step="step06",
),
"units": _wrap(
[serialize_unit(u) for u in units],
source_path="steps/step06_composition_plan.json#/selected_units",
upstream_step="step06",
),
"comp_debug": _wrap(
dict(comp_debug or {}),
source_path="steps/step06_composition_plan.json#/*",
upstream_step="step06",
),
"v4_fallback_traces": _wrap(
dict(v4_fallback_traces or {}),
# v4_fallback_traces is assembled inside run_phase_z2_mvp1
# (see phase_z2_pipeline.py around the Step 5/6 boundary) and
# surfaces only partially into step06_composition_plan.json
# via the v4_fallback_summary / imp48_resplit fields. The
# canonical untruncated source is the in-memory dict at end
# of Step 6 — that's what the reuse path needs.
source_path="phase_z2_pipeline.run_phase_z2_mvp1::v4_fallback_traces",
upstream_step="step06",
),
"ai_preflight": _wrap(
dict(ai_preflight or {}),
source_path="steps/step00_preconditions.json#/ai_preflight",
upstream_step="step00",
),
}
json.dumps(snapshot)
return snapshot
class SnapshotValidationError(ValueError):
"""Raised by ``validate_snapshot`` when the snapshot is structurally
unusable or fails the ``mdx_sha256`` integrity check.
Subclass of ``ValueError`` so existing ``except ValueError`` callers
(u4b will add a tighter ``except SnapshotValidationError``) still
catch it without escaping to the outer CLI.
"""
def validate_snapshot(
snapshot: Any,
*,
expected_mdx_sha256: str,
) -> None:
"""Validate a loaded snapshot dict (fail-closed).
Raises ``SnapshotValidationError`` when:
* ``snapshot`` is not a dict
* ``schema_version`` is missing or != ``SNAPSHOT_VERSION``
* ``mdx_sha256`` is missing, non-string, or doesn't match
``expected_mdx_sha256``
* any required top-level key is missing
* a wrapped entry doesn't expose ``{value, source_path, upstream_step}``
Returns ``None`` on success.
Callers (u4b) translate the raised error into an exit-code-2 abort
with the failing axis surfaced as `value + path + upstream`
(factual-verification guardrail).
"""
if not isinstance(snapshot, dict):
raise SnapshotValidationError(
f"snapshot is not a dict (got {type(snapshot).__name__})"
)
version = snapshot.get("schema_version")
if version != SNAPSHOT_VERSION:
raise SnapshotValidationError(
f"schema_version mismatch: expected {SNAPSHOT_VERSION!r}, got {version!r}"
)
actual_sha = snapshot.get("mdx_sha256")
if not isinstance(actual_sha, str) or not actual_sha:
raise SnapshotValidationError(
f"mdx_sha256 missing or non-string: got {actual_sha!r}"
)
if actual_sha != expected_mdx_sha256:
raise SnapshotValidationError(
f"mdx_sha256 mismatch: snapshot={actual_sha!r} "
f"expected={expected_mdx_sha256!r}"
)
missing = [k for k in REQUIRED_TOP_LEVEL_KEYS if k not in snapshot]
if missing:
raise SnapshotValidationError(
f"missing required keys: {missing!r}"
)
for key, entry in snapshot.items():
if key in _BARE_KEYS:
continue
if not isinstance(entry, dict):
raise SnapshotValidationError(
f"key {key!r}: expected wrapper dict, got {type(entry).__name__}"
)
for field_name in ("value", "source_path", "upstream_step"):
if field_name not in entry:
raise SnapshotValidationError(
f"key {key!r}: wrapper missing {field_name!r}"
)