"""IMP-52 (#80) u1 — user_overrides.json persistence layer (backend IO). Persists the CLI-wired override axes per MDX so a subsequent render auto-restores user choices without re-clicking. Source of truth = MDX-keyed file (stem of the MDX path), NOT ``data/runs//`` which mints a fresh run_id per ``/api/run`` invocation. Schema (9 axes; stable order; IMP-51 #79 u1 added ``image_overrides``; IMP-45 #74 u1 added ``slide_css``; IMP-55 #93 u1 added ``manual_section_assignment`` as a bool intent marker so the backend can distinguish a user drag-drop from frontend auto-carry zone_sections; IMP-56 #90 u1 added ``text_overrides`` as a Step-22 text-edit persist axis keyed by ``{zone_id: {text_path: value}}`` where ``text_path`` is the ``{slot_key}.{line_index}`` stamp emitted by u8; IMP-56 #90 u2 added ``structure_overrides`` as a Step-22 structure-edit persist axis keyed by ``{zone_id: {"slot_order": [, ...], "hidden_slots": [, ...]}}`` — scope is intentionally LOCKED to slot reorder + hide; frame swap stays on the existing ``frames`` axis to prevent the Phase Z regression of AI-driven HTML structure mutation): { "layout": , "zone_geometries": {: {"x": float, "y": float, "w": float, "h": float}}, "zone_sections": {: [, ...]}, "frames": {: }, "image_overrides": {: {"x": float, "y": float, "w": float, "h": float}}, "slide_css": , "manual_section_assignment": , "text_overrides": {: {: }}, "structure_overrides": {: {"slot_order": [, ...], "hidden_slots": [, ...]}} } ``image_id`` is the stable identifier emitted by the user-content image stamper (IMP-51 u4) and matched via the selector ``.slide img[data-image-role="user-content"]``. Coordinates are percent-of-slide (zone-agnostic, slide-absolute) to match the SlideCanvas edit-mode handle conventions in IMP-51 u8~u11. ``unit_id`` is the convention already used by ``--override-frame`` : ``"+".join(source_section_ids)`` (e.g., ``"03-1"`` or ``"03-1+03-2"``). Behavior : - ``load(key)`` — file missing or corrupt → ``{}`` (warning to stderr on corrupt). - ``save(key, partial)`` — merges only the supplied axes onto the existing file, preserving (a) unknown top-level keys (foreign-key preserve) and (b) axes not present in the partial payload. Atomic write via tmp+rename. - ``override_path(key, root=None)`` — resolves the persistence path under ``data/user_overrides/.json``. Guardrails (refs : ``user_overrides_io`` Stage 2 lock) : - Deterministic code, no AI fallback. - ``key`` validation rejects path traversal / separators / dot-prefix. - ``save`` is a deep-shallow merge — per-axis dict mutation does not delete prior keys unless caller passes ``None`` for that axis (explicit clear). """ from __future__ import annotations import json import os import re import sys import tempfile from pathlib import Path from typing import Any, Optional # Persistence root — MDX-keyed, decoupled from data/runs//. # Resolved at call time so tests can monkeypatch via ``root=`` parameter. _PKG_ROOT = Path(__file__).resolve().parent.parent DEFAULT_OVERRIDES_ROOT = _PKG_ROOT / "data" / "user_overrides" # The nine in-scope axes (IMP-51 #79 u1 added ``image_overrides``; IMP-45 # #74 u1 added ``slide_css``; IMP-55 #93 u1 added # ``manual_section_assignment`` — bool intent marker that gates whether # persisted ``zone_sections`` are consumed by the backend pipeline; IMP-56 # #90 u1 added ``text_overrides`` — Step-22 text-edit persist axis keyed by # ``{zone_id: {text_path: value}}`` where ``text_path`` is the # ``{slot_key}.{line_index}`` stamp emitted by u8 / consumed by u4+u5; # IMP-56 #90 u2 added ``structure_overrides`` — Step-22 structure-edit # persist axis keyed by ``{zone_id: {"slot_order": [...], "hidden_slots": # [...]}}``, scope LOCKED to slot reorder + hide so the resolver (u6) / # Step-12 apply (u7) cannot mutate frame identity — frame swap stays on # the existing ``frames`` axis to keep Phase Z's no-AI-HTML-structure # invariant intact). Any other top-level key in the file is preserved but # ignored by callers — keeps the file forward-compatible with future axes # (e.g., zone_sizes) without a schema bump here. KNOWN_AXES: tuple[str, ...] = ( "layout", "zone_geometries", "zone_sections", "frames", "image_overrides", "slide_css", "manual_section_assignment", "text_overrides", "structure_overrides", ) # Key validation — MDX stem must be safe for filesystem use. Allow # alphanumerics, underscore, hyphen, and dot in the middle (sample stems # are e.g. ``01``, ``03``, ``03__DX...``). Reject leading dot, path # separators, and traversal. _KEY_RE = re.compile(r"^[A-Za-z0-9_][A-Za-z0-9_.\-]*$") class InvalidOverrideKey(ValueError): """Raised when ``key`` is not a safe MDX stem.""" def validate_key(key: str) -> str: """Validate that ``key`` is a safe MDX stem; return it unchanged. Rejects empty strings, path separators (``/`` ``\\``), traversal (``..``), and leading dot. Callers should pass ``Path(mdx_path).stem``. """ if not isinstance(key, str) or not key: raise InvalidOverrideKey(f"key must be a non-empty string, got: {key!r}") if not _KEY_RE.match(key): raise InvalidOverrideKey( f"key must match {_KEY_RE.pattern!r} (alphanumerics, '_', '-', '.'; " f"no leading dot, no separators); got: {key!r}" ) if ".." in key: raise InvalidOverrideKey(f"key must not contain '..'; got: {key!r}") return key def override_path(key: str, root: Optional[Path] = None) -> Path: """Resolve the on-disk path for ``key``'s override file.""" validate_key(key) base = Path(root) if root is not None else DEFAULT_OVERRIDES_ROOT return base / f"{key}.json" def load(key: str, root: Optional[Path] = None) -> dict[str, Any]: """Load persisted overrides for ``key``. Missing file → ``{}``. Corrupt JSON → warning to stderr + ``{}``. Returns the raw mapping (including any foreign keys); callers should pick the KNOWN_AXES they care about. """ path = override_path(key, root=root) if not path.exists(): return {} try: with path.open("r", encoding="utf-8") as f: data = json.load(f) except (OSError, json.JSONDecodeError) as exc: print( f"[user_overrides_io] warning: failed to read {path} ({exc}); " f"treating as empty.", file=sys.stderr, ) return {} if not isinstance(data, dict): print( f"[user_overrides_io] warning: {path} is not a JSON object " f"(got {type(data).__name__}); treating as empty.", file=sys.stderr, ) return {} return data def save(key: str, partial: dict[str, Any], root: Optional[Path] = None) -> Path: """Merge ``partial`` onto the persisted overrides for ``key`` and write atomically. Merge semantics : - Only keys present in ``partial`` are mutated. Other axes (including foreign keys outside KNOWN_AXES) are preserved verbatim. - For each axis present in ``partial``, the new value REPLACES the prior value (no per-zone deep-merge). Callers that want to add a single zone must read → mutate → save with the full updated axis dict. - Pass ``None`` for an axis to clear it (remove the key from the file). """ if not isinstance(partial, dict): raise TypeError( f"partial must be a dict, got {type(partial).__name__}: {partial!r}" ) path = override_path(key, root=root) path.parent.mkdir(parents=True, exist_ok=True) current = load(key, root=root) for axis_key, axis_value in partial.items(): if axis_value is None: current.pop(axis_key, None) else: current[axis_key] = axis_value _atomic_write_json(path, current) return path def _atomic_write_json(path: Path, data: dict[str, Any]) -> None: """Write ``data`` to ``path`` atomically via tmp file + os.replace.""" fd, tmp_name = tempfile.mkstemp( prefix=f".{path.name}.", suffix=".tmp", dir=str(path.parent) ) try: with os.fdopen(fd, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2, sort_keys=True) f.write("\n") os.replace(tmp_name, path) except BaseException: try: os.unlink(tmp_name) except OSError: pass raise