feat(IMP-13): A-3 — build-time frame preview generator (capture_slide_screenshot salvage)

scripts/generate_frame_previews.py iterates figma_to_html_agent/blocks/{frame_id}/index.html,
renders preview.png via Selenium headless (capture_slide_screenshot pattern reuse), and writes
_preview_manifest.json (schema v1) with idempotent stale-detect (mtime+sha256). Build-time only
— no runtime pipeline integration, no AI calls, no MDX/Jinja regen. Stage 2 baseline (commit
56619a0): total=33, renderable=20, missing_index_html=13, orphan=1 (1171281192).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-18 06:25:05 +09:00
parent 56619a0239
commit 7d5639ad72
2 changed files with 239 additions and 0 deletions

View File

@@ -0,0 +1,189 @@
"""IMP-13 build-time preview.png renderer for figma_to_html_agent/blocks/<frame_id> (u1-u6)."""
from __future__ import annotations
import argparse, hashlib, json, sys
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional
REPO_ROOT = Path(__file__).resolve().parent.parent
DEFAULT_BLOCKS_DIR = REPO_ROOT / "figma_to_html_agent" / "blocks"
DEFAULT_MANIFEST = DEFAULT_BLOCKS_DIR / "_preview_manifest.json"
@dataclass(frozen=True)
class FrameRow:
frame_id: str
block_dir: Path
index_html_path: Path
preview_png_path: Path
has_index: bool
has_preview: bool
def discover(blocks_dir: Path) -> List[FrameRow]:
if not blocks_dir.is_dir():
return []
rows: List[FrameRow] = []
for entry in sorted(blocks_dir.iterdir()):
if not entry.is_dir():
continue
idx, png = entry / "index.html", entry / "preview.png"
rows.append(FrameRow(entry.name, entry, idx, png, idx.is_file(), png.is_file()))
return rows
def _build_driver() -> Any:
"""Headless Chrome driver. Mirrors the run_overflow_check chromedriver-candidate + headless options pattern.
Inline per Stage 2 (no shared module). Per-frame window-size is set by the caller (u3), not here."""
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
options = Options()
options.add_argument("--headless=new")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
candidates = [REPO_ROOT / "chromedriver", REPO_ROOT / "chromedriver.exe"]
last_err: Exception | None = None
for path in candidates:
if path.is_file():
try:
return webdriver.Chrome(service=Service(str(path)), options=options)
except Exception as exc: # noqa: BLE001 — propagate via aggregated error
last_err = exc
try:
return webdriver.Chrome(options=options)
except Exception as exc: # noqa: BLE001
raise RuntimeError(f"selenium init failed: {last_err or exc}") from exc
def render_one(driver: Any, row: FrameRow) -> tuple[int, int, Path]:
"""Render row.index_html_path -> row.preview_png_path via WebElement screenshot. Returns (w, h, path) or raises.
Driver is injected (caller owns lifecycle). .slide bbox drives window-size; no hardcoded slide dimensions."""
if not row.has_index:
raise FileNotFoundError(f"missing index.html: {row.index_html_path}")
from selenium.webdriver.common.by import By
driver.get(row.index_html_path.resolve().as_uri())
driver.set_script_timeout(15)
driver.execute_async_script(
"const cb=arguments[arguments.length-1];"
"(document.fonts&&document.fonts.ready?document.fonts.ready:Promise.resolve()).then(()=>cb(true));"
)
rect = driver.execute_script(
"const el=document.querySelector('.slide');"
"if(!el)return null;"
"const r=el.getBoundingClientRect();"
"return [Math.round(r.width), Math.round(r.height)];"
)
if not rect:
raise RuntimeError(f".slide not found in {row.index_html_path}")
w, h = int(rect[0]), int(rect[1])
driver.set_window_size(w, h)
el = driver.find_element(By.CSS_SELECTOR, ".slide")
row.preview_png_path.write_bytes(el.screenshot_as_png)
return w, h, row.preview_png_path
def _sha256_file(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(65536), b""):
h.update(chunk)
return h.hexdigest()
def is_unchanged(row: FrameRow, last_entry: Optional[Dict[str, Any]]) -> bool:
"""Stale-detect short-circuit: True iff preview.png mtime >= index.html mtime AND sha256 matches last_entry.
Returns False when prior entry is absent, preview.png is missing, preview is older than index, or hash differs."""
if last_entry is None or not row.has_index or not row.has_preview:
return False
try:
idx_mtime = row.index_html_path.stat().st_mtime
png_mtime = row.preview_png_path.stat().st_mtime
except OSError:
return False
if png_mtime < idx_mtime:
return False
recorded = last_entry.get("index_sha256")
if not recorded:
return False
return _sha256_file(row.index_html_path) == recorded
def categorize(rows: List[FrameRow]) -> Dict[str, List[FrameRow]]:
"""Bucket discover() rows so nothing is silently skipped (Stage 2 guardrail).
renderable = has_index (eligible for render or skipped_unchanged decision in u6).
missing_index_html = no index.html (catalog gap; IMP-04 follow-up).
orphan = preview.png exists without index.html (subset of missing_index_html; stale artifact to flag).
Buckets are intentionally non-disjoint: orphan is a subset of missing_index_html,
matching the Stage 2 evidence counts (renderable=20, missing_index_html=13, orphan=1)."""
renderable = [r for r in rows if r.has_index]
missing = [r for r in rows if not r.has_index]
orphan = [r for r in missing if r.has_preview]
return {"renderable": renderable, "missing_index_html": missing, "orphan": orphan}
def _load_manifest(path: Path) -> Dict[str, Any]:
try:
data = json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {}
return data if isinstance(data, dict) else {}
def _render_entry(row: FrameRow, w: int, h: int) -> Dict[str, Any]:
return {"status": "rendered", "index_sha256": _sha256_file(row.index_html_path),
"index_mtime": row.index_html_path.stat().st_mtime,
"preview_mtime": row.preview_png_path.stat().st_mtime,
"viewport": {"w": w, "h": h}}
def main(argv: Iterable[str] | None = None) -> int:
p = argparse.ArgumentParser(prog="generate_frame_previews", description="IMP-13 build-time preview.png renderer.")
p.add_argument("--blocks-dir", type=Path, default=DEFAULT_BLOCKS_DIR)
p.add_argument("--manifest", type=Path, default=DEFAULT_MANIFEST)
p.add_argument("--dry-run", action="store_true")
args = p.parse_args(list(argv) if argv is not None else None)
rows = discover(args.blocks_dir)
if args.dry_run:
wi = sum(1 for r in rows if r.has_index)
wp = sum(1 for r in rows if r.has_preview)
print(f"discovered: total={len(rows)} with_index_html={wi} with_preview_png={wp}")
return 0
prev_frames = _load_manifest(args.manifest).get("frames") or {}
buckets = categorize(rows)
frames: Dict[str, Dict[str, Any]] = {}
counts = {"rendered": 0, "skipped_unchanged": 0, "error": 0}
driver = None
try:
for r in buckets["renderable"]:
last = prev_frames.get(r.frame_id) if isinstance(prev_frames, dict) else None
if is_unchanged(r, last):
frames[r.frame_id] = {**last, "status": "skipped_unchanged"}
counts["skipped_unchanged"] += 1
continue
if driver is None:
driver = _build_driver()
try:
w, h, _ = render_one(driver, r)
frames[r.frame_id] = _render_entry(r, w, h)
counts["rendered"] += 1
except Exception as exc: # noqa: BLE001
frames[r.frame_id] = {"status": "error", "error": str(exc)}
counts["error"] += 1
finally:
if driver is not None:
try: driver.quit()
except Exception: pass
orphan_ids = {r.frame_id for r in buckets["orphan"]}
for r in buckets["missing_index_html"]:
frames[r.frame_id] = {"status": "orphan" if r.frame_id in orphan_ids else "missing_index_html", "has_preview": r.has_preview}
summary = {"total": len(rows), "renderable": len(buckets["renderable"]), "missing_index_html": len(buckets["missing_index_html"]), "orphan": len(buckets["orphan"]), **counts}
payload = {"schema": 1, "generated_at": datetime.now(timezone.utc).isoformat(), "blocks_dir": str(args.blocks_dir), "summary": summary, "frames": frames}
args.manifest.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
print(f"coverage: total={summary['total']} renderable={summary['renderable']} rendered={counts['rendered']} skipped_unchanged={counts['skipped_unchanged']} missing_index_html={summary['missing_index_html']} orphan={summary['orphan']} error={counts['error']}")
return 1 if counts["error"] else 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,50 @@
"""IMP-13 u7 smoke — discovery, source invariants, dry-run, idempotency, manifest schema."""
from __future__ import annotations
import json, os, re, sys
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parent.parent
SCRIPT_PATH = REPO_ROOT / "scripts" / "generate_frame_previews.py"
sys.path.insert(0, str(SCRIPT_PATH.parent))
import generate_frame_previews as gfp # noqa: E402
def _fixture(root: Path) -> Path:
blocks = root / "blocks"
(blocks / "FRAME_A").mkdir(parents=True)
(blocks / "FRAME_A" / "index.html").write_text("<html><body class=slide></body></html>", encoding="utf-8")
(blocks / "FRAME_A" / "preview.png").write_bytes(b"\x89PNG\r\n\x1a\n")
(blocks / "FRAME_B").mkdir()
(blocks / "ORPHAN").mkdir()
(blocks / "ORPHAN" / "preview.png").write_bytes(b"x")
return blocks
def test_discover_counts(tmp_path: Path) -> None:
rows = gfp.discover(_fixture(tmp_path))
assert [r.frame_id for r in rows] == ["FRAME_A", "FRAME_B", "ORPHAN"]
assert sum(r.has_index for r in rows) == 1 and sum(r.has_preview for r in rows) == 2
def test_source_invariants() -> None:
src = SCRIPT_PATH.read_text(encoding="utf-8")
for t in ("anthropic", "openai", "jinja", "phase_z2", "slide_measurer"): assert t not in src, t
for lit in ("1280", "720", "1400", "900"): assert not re.search(rf"(?<!\d){lit}(?!\d)", src), lit
def test_dry_run_prints_counts(tmp_path: Path, capsys) -> None:
rc = gfp.main(["--blocks-dir", str(_fixture(tmp_path)), "--manifest", str(tmp_path / "m.json"), "--dry-run"])
assert rc == 0 and "discovered: total=3 with_index_html=1 with_preview_png=2" in capsys.readouterr().out
def test_idempotency_unchanged(tmp_path: Path) -> None:
row = gfp.discover(_fixture(tmp_path))[0]
mt = row.index_html_path.stat().st_mtime
os.utime(row.preview_png_path, (mt + 1, mt + 1))
sha = gfp._sha256_file(row.index_html_path)
assert gfp.is_unchanged(row, {"index_sha256": sha}) is True
assert gfp.is_unchanged(row, {"index_sha256": "x"}) is False
assert gfp.is_unchanged(row, None) is False
def test_manifest_schema(tmp_path: Path) -> None:
blocks = tmp_path / "blocks"; (blocks / "F").mkdir(parents=True); (blocks / "F" / "preview.png").write_bytes(b"x")
mf = tmp_path / "m.json"
assert gfp.main(["--blocks-dir", str(blocks), "--manifest", str(mf)]) == 0
data = json.loads(mf.read_text(encoding="utf-8"))
assert set(data) >= {"schema", "generated_at", "blocks_dir", "summary", "frames"} and data["schema"] == 1
assert set(data["summary"]) >= {"total", "renderable", "missing_index_html", "orphan", "rendered", "skipped_unchanged", "error"} and data["summary"]["orphan"] == 1 and data["frames"]["F"]["status"] == "orphan"