feat(IMP-16): Step 14 table_self_overflow detection

Add table self-overflow detection with element-identity wrapper dedup,
mirroring the image_aspect_mismatch axis pattern (#45).

JS layer: TABLE_SCROLL_TOL_PX=5 module constant; clippedWrapperMap
built as Map<Element,int> keyed by DOM node reference (NOT className)
so two wrappers with identical class strings remain distinguishable;
table_events collected via querySelectorAll('table').forEach with
closest()-ancestor walk resolving wrapper_clipped_index = int|null.

Py layer: aggregate result['table_events'] and append fail_reason
'table_self_overflow' only when (excess_x>TOL OR excess_y>TOL)
AND wrapper_clipped_index is None; wrapper-clipped path continues
to fail via existing clipped_inner reporting.

Tests (Selenium, chromedriver guard mirrored from image_check):
- Fixture D: standalone <table> overflow → table_self_overflow fail
- Fixture E: <table> in clipped wrapper → dedup suppresses table fail
- Fixture F (F1 acceptance): two wrappers with identical className
  f13b-cell, W1 clipped by non-table child, W2 hosts self-overflow
  <table> with W2 itself NOT clipped → element-identity ensures W2's
  table is not suppressed by W1's class; both fails emitted.

Out of scope: image_events behavior (intact from #45), classifier
pass/fail consumer (→실행-3), debug.json surfacing (→실행-4).

Refs: #46

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-18 21:06:01 +09:00
parent f3bff898fb
commit 2827622858
2 changed files with 421 additions and 1 deletions

View File

@@ -133,6 +133,11 @@ DEFAULT_ZONE_MIN_HEIGHT_PX = 100
# Spec doc row (PHASE-Z-FIT-CLASSIFIER-ROUTER-SPEC) update deferred to IMP-15 실행-4.
IMAGE_ASPECT_DELTA_TOL = 0.05
# Step 14 table_self_overflow tolerance — scrollWclientW or scrollHclientH > TOL ⇒ fail.
# Local anchor : IMP-15 실행-2 (Gitea issue #46) — table axis acceptance criteria.
# Mirrors existing inline 5px tolerance used by slide/zone/clipped scans in run_overflow_check.
TABLE_SCROLL_TOL_PX = 5
# content_weight 계산 가중치
CONTENT_WEIGHT_COEFFS = {
"text_per_chars": 800, # text_len / 800 = score
@@ -2131,6 +2136,14 @@ def run_overflow_check(html_path: Path) -> dict:
const zones = [];
const zone_geometries_px = [];
// IMP-15 실행-2 (issue #46) — element-identity dedup map for table_events.
// Map<Element, integer> keyed by DOM node reference (NOT class string) so that
// two wrappers sharing identical className resolve to distinct map entries.
// Populated alongside the existing per-zone clipped_inner scan below.
const clippedWrapperMap = new Map();
let clippedIdxCounter = 0;
slide.querySelectorAll('.zone').forEach((z) => {
const pos = z.getAttribute('data-zone-position') || 'unknown';
const tid = z.getAttribute('data-template-id') || '?';
@@ -2178,6 +2191,13 @@ def run_overflow_check(html_path: Path) -> dict:
scrollWidth: el.scrollWidth,
scrollHeight: el.scrollHeight,
});
// IMP-15 실행-2 (issue #46) — element-identity registration.
// Key by DOM node `el`, NOT className: two wrappers with identical
// class string still hash to distinct Map entries.
if (!clippedWrapperMap.has(el)) {
clippedWrapperMap.set(el, clippedIdxCounter);
clippedIdxCounter++;
}
}
});
m.clipped_inner = clipped;
@@ -2259,7 +2279,52 @@ def run_overflow_check(html_path: Path) -> dict:
});
});
return { slide: slideM, slide_body: bodyM, zones, frame_slot_metrics, zone_geometries_px, image_events };
// IMP-15 실행-2 (issue #46) — table_events[] for table_self_overflow detection.
// One entry per <table> under .slide. wrapper_clipped_index is the integer index
// (from clippedWrapperMap) of the nearest ancestor that is itself in the clipped
// wrapper set, or null. Element-identity walk (NOT className) so that two same-class
// wrappers (W1 clipped, W2 not) resolve independently for any contained <table>.
const table_events = [];
slide.querySelectorAll('table').forEach((tbl) => {
const parentZone = tbl.closest('.zone');
const zonePos = parentZone
? (parentZone.getAttribute('data-zone-position') || 'unknown')
: 'unknown';
const zoneTid = parentZone
? (parentZone.getAttribute('data-template-id') || '?')
: '?';
let wrapper_clipped_index = null;
let node = tbl.parentElement;
while (node && node !== slide) {
if (clippedWrapperMap.has(node)) {
wrapper_clipped_index = clippedWrapperMap.get(node);
break;
}
node = node.parentElement;
}
const tblRect = tbl.getBoundingClientRect();
const dx = tbl.scrollWidth - tbl.clientWidth;
const dy = tbl.scrollHeight - tbl.clientHeight;
table_events.push({
zone_position: zonePos,
zone_template_id: zoneTid,
clientWidth: tbl.clientWidth,
clientHeight: tbl.clientHeight,
scrollWidth: tbl.scrollWidth,
scrollHeight: tbl.scrollHeight,
excess_x: Math.max(0, dx),
excess_y: Math.max(0, dy),
wrapper_clipped_index: wrapper_clipped_index,
bbox: {
x: Math.round(tblRect.left - slideRect.left),
y: Math.round(tblRect.top - slideRect.top),
w: Math.round(tblRect.width),
h: Math.round(tblRect.height),
},
});
});
return { slide: slideM, slide_body: bodyM, zones, frame_slot_metrics, zone_geometries_px, image_events, table_events };
""")
screenshot_path = html_path.parent / "preview.png"
@@ -2318,6 +2383,27 @@ def run_overflow_check(html_path: Path) -> dict:
f"(template={tid}, tol={IMAGE_ASPECT_DELTA_TOL}, src={src})"
)
# IMP-15 실행-2 (issue #46) — table_self_overflow aggregation.
# Emit fail_reason only when (excess_x>TOL OR excess_y>TOL) AND wrapper_clipped_index is None.
# The clipped-wrapper case is already accounted for by the clipped_inner fail_reason above;
# element-identity dedup (clippedWrapperMap keyed by DOM node ref, NOT className) prevents
# double-counting and—critically—prevents two same-class wrappers from masking each other.
for ev in result.get("table_events", []):
if ev.get("wrapper_clipped_index") is not None:
continue
excess_x = ev.get("excess_x", 0) or 0
excess_y = ev.get("excess_y", 0) or 0
if excess_x > TABLE_SCROLL_TOL_PX or excess_y > TABLE_SCROLL_TOL_PX:
pos = ev.get("zone_position", "unknown")
tid = ev.get("zone_template_id", "?")
fail_reasons.append(
f"table self-overflow in zone--{pos}: "
f"excess {excess_y}px vert / {excess_x}px horiz "
f"(content {ev.get('scrollWidth')}x{ev.get('scrollHeight')} vs "
f"container {ev.get('clientWidth')}x{ev.get('clientHeight')}, "
f"template={tid}, tol={TABLE_SCROLL_TOL_PX})"
)
result["passed"] = len(fail_reasons) == 0
result["fail_reasons"] = fail_reasons
return result

View File

@@ -0,0 +1,334 @@
"""IMP-15 실행-2 (Gitea issue #46) — Step 14 table_self_overflow detection.
Tests Selenium-driven ``<table>`` self-overflow measurement and element-identity
wrapper dedup added to ``run_overflow_check``:
* Fixture D — standalone ``<table>`` self-overflow, no clipped wrapper ancestor →
``table_events`` entry reports ``wrapper_clipped_index = None`` and an
``excess_*`` exceeding ``TABLE_SCROLL_TOL_PX``; Python aggregation then emits
a ``table self-overflow`` fail_reason and flips ``result["passed"] = False``.
* Fixture E — ``<table>`` inside a clipped ``f13b`` wrapper. The wrapper itself
self-overflows (registers in ``clippedWrapperMap``) and the inner table also
self-overflows. Asserts dedup is honored: the table's ``wrapper_clipped_index``
resolves to the wrapper's map index (non-null) so the Python aggregation MUST
NOT emit a ``table self-overflow`` fail_reason — only the wrapper's pre-existing
``inner clipped`` fail line remains.
* Fixture F — two wrappers W1 / W2 share identical className ``f13b-cell``. W1
contains an overflowing inline-block child (no ``<table>``) → W1 self-overflows
and registers in ``clippedWrapperMap`` (emits ``inner clipped``). W2 contains
only a self-overflowing ``<table>``; W2's own scrollWidth equals its clientWidth
(the table's ``overflow:hidden`` keeps W2 itself uncliped). The element-identity
ancestor walk MUST resolve the W2 table's ``wrapper_clipped_index`` to ``None``
(W2 ≠ W1 by DOM reference, despite identical class string). A class-string
lookup would have falsely resolved the W2 table → W1 and suppressed the fail —
the test thereby proves ``Map<Element, int>`` distinguishes by node identity.
Chromedriver resolution mirrors the pipeline order
(``PROJECT_ROOT/chromedriver{,.exe}`` → PATH → Selenium Manager). When no driver
is resolvable the suite skips by default; under ``PHASE_Z_REQUIRE_SELENIUM=1``
the tests are marked ``xfail(strict=True)`` so CI cannot silently lose coverage.
"""
from __future__ import annotations
import os
import shutil
from pathlib import Path
import pytest
from src.phase_z2_pipeline import (
PROJECT_ROOT,
TABLE_SCROLL_TOL_PX,
run_overflow_check,
)
# ─── chromedriver skip / xfail guard ─────────────────────────────────
def _selenium_manager_resolvable() -> bool:
"""Probe ``webdriver.Chrome(options=...)`` — pipeline's third tier.
``src/phase_z2_pipeline.py`` (run_overflow_check) tries
``PROJECT_ROOT/chromedriver{,.exe}`` first, then falls back to
``webdriver.Chrome(options=options)`` which delegates to Selenium Manager
for driver auto-resolution. The test resolver must mirror that fallback
or ``PHASE_Z_REQUIRE_SELENIUM=1`` produces spurious strict-XPASS failures
on machines where Selenium Manager can satisfy the pipeline at runtime.
"""
try:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as _Opts
except Exception:
return False
opts = _Opts()
opts.add_argument("--headless=new")
opts.add_argument("--no-sandbox")
opts.add_argument("--disable-dev-shm-usage")
try:
drv = webdriver.Chrome(options=opts)
except Exception:
return False
try:
drv.quit()
except Exception:
pass
return True
def _chromedriver_resolvable() -> bool:
"""Mirror pipeline order: PROJECT_ROOT/chromedriver{,.exe} → PATH → Selenium Manager."""
for candidate in (PROJECT_ROOT / "chromedriver", PROJECT_ROOT / "chromedriver.exe"):
if candidate.is_file():
return True
if shutil.which("chromedriver") or shutil.which("chromedriver.exe"):
return True
return _selenium_manager_resolvable()
_REQUIRE_SELENIUM = os.environ.get("PHASE_Z_REQUIRE_SELENIUM") == "1"
_DRIVER_AVAILABLE = _chromedriver_resolvable()
if not _DRIVER_AVAILABLE:
if _REQUIRE_SELENIUM:
pytestmark = pytest.mark.xfail(
strict=True,
reason="PHASE_Z_REQUIRE_SELENIUM=1 but chromedriver is unresolvable",
)
else:
pytestmark = pytest.mark.skip(
reason=(
"chromedriver unresolvable (PROJECT_ROOT/chromedriver{,.exe} + PATH + Selenium Manager); "
"set PHASE_Z_REQUIRE_SELENIUM=1 to make this a hard failure"
),
)
# ─── HTML fixture helpers ────────────────────────────────────────────
_SLIDE_CSS = """
html, body { margin: 0; padding: 0; }
.slide { width: 1280px; height: 720px; position: relative; box-sizing: border-box; }
.zone { display: block; }
"""
def _write_slide_html(tmp_path: Path, body_inner: str, name: str = "slide.html") -> Path:
html = (
"<!doctype html><html><head><meta charset='utf-8'>"
f"<style>{_SLIDE_CSS}</style></head><body>"
'<div class="slide" data-page="1">'
f"{body_inner}"
"</div></body></html>"
)
path = tmp_path / name
path.write_text(html, encoding="utf-8")
return path
# ─── tests ───────────────────────────────────────────────────────────
def test_fixture_d_standalone_table_overflow(tmp_path: Path) -> None:
"""Fixture D — standalone ``<table>`` self-overflow, no clipped wrapper.
The table is forced into block layout with a fixed clientWidth (100px) and
``overflow: hidden``; the inner cell is 600px wide with ``white-space:nowrap``,
so the table's scrollWidth exceeds clientWidth by well over ``TABLE_SCROLL_TOL_PX``.
No ancestor carries an ``f13b/f29b/f16b`` class, so the element-identity walk
must report ``wrapper_clipped_index = None``. Python aggregation then emits a
``table self-overflow`` fail_reason and flips ``result["passed"]`` to ``False``.
"""
body = (
'<div class="zone" data-zone-position="primary" data-template-id="t_table">'
'<table style="display:block; width:100px; height:30px; overflow:hidden; '
'box-sizing:border-box; table-layout:fixed;">'
'<tr><td style="width:600px; white-space:nowrap;">'
'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
'</td></tr>'
'</table>'
'</div>'
)
html_path = _write_slide_html(tmp_path, body, name="fixture_d.html")
result = run_overflow_check(html_path)
assert "error" not in result, result
assert "table_events" in result, "run_overflow_check must expose table_events"
table_events = result["table_events"]
assert len(table_events) == 1, f"expected one table_events entry, got: {table_events}"
ev = table_events[0]
assert ev["zone_position"] == "primary", ev
assert ev["zone_template_id"] == "t_table", ev
assert ev["wrapper_clipped_index"] is None, (
f"standalone table must have null wrapper_clipped_index; got {ev['wrapper_clipped_index']}"
)
assert ev["excess_x"] > TABLE_SCROLL_TOL_PX, (
f"expected excess_x > {TABLE_SCROLL_TOL_PX}; got {ev['excess_x']} "
f"(clientWidth={ev['clientWidth']}, scrollWidth={ev['scrollWidth']})"
)
# Python aggregation: emitted fail_reason + passed flipped to False.
fail_reasons = result.get("fail_reasons", [])
table_fails = [r for r in fail_reasons if "table self-overflow" in r]
assert len(table_fails) == 1, (
f"expected exactly one 'table self-overflow' fail_reason; got fail_reasons={fail_reasons}"
)
assert "zone--primary" in table_fails[0], table_fails[0]
assert f"tol={TABLE_SCROLL_TOL_PX}" in table_fails[0], table_fails[0]
assert result["passed"] is False, (
f"table self-overflow must flip passed=False; got result={result}"
)
def test_fixture_e_table_in_clipped_wrapper_dedup(tmp_path: Path) -> None:
"""Fixture E — ``<table>`` inside a clipped ``f13b`` wrapper (dedup honored).
The wrapper (clientWidth=300, ``overflow:hidden``) contains a ``display:block``
table forced to width=500px → wrapper.scrollWidth (≈500) clientWidth (300) > 5px,
so the wrapper is registered in ``clippedWrapperMap`` (emits ``inner clipped`` fail).
The inner table is itself self-overflowing (clientWidth=500, content nowrap-cell
width=900 → scrollWidth ≈ 900). The element-identity ancestor walk MUST resolve
the table's ``wrapper_clipped_index`` to the wrapper's integer map index, and the
Python aggregation MUST then SKIP emitting a ``table self-overflow`` fail_reason
(the clipped wrapper already accounts for this).
"""
body = (
'<div class="zone" data-zone-position="primary" data-template-id="t_table_wrap">'
'<div class="f13b-cell" style="width:300px; height:60px; overflow:hidden; '
'box-sizing:border-box; position:relative;">'
'<table style="display:block; width:500px; height:40px; overflow:hidden; '
'box-sizing:border-box; table-layout:fixed;">'
'<tr><td style="width:900px; white-space:nowrap;">'
'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'
'</td></tr>'
'</table>'
'</div>'
'</div>'
)
html_path = _write_slide_html(tmp_path, body, name="fixture_e.html")
result = run_overflow_check(html_path)
assert "error" not in result, result
table_events = result.get("table_events", [])
assert len(table_events) == 1, f"expected one table_events entry, got: {table_events}"
ev = table_events[0]
# Dedup signal: ancestor walk must hit the f13b wrapper via Map.has(node).
assert ev["wrapper_clipped_index"] is not None, (
f"table inside clipped wrapper must inherit wrapper index; got ev={ev}"
)
assert isinstance(ev["wrapper_clipped_index"], int), ev
# The inner table is itself overflowing — proves the dedup is the only thing
# suppressing the table_self_overflow fail (not absence of overflow).
assert ev["excess_x"] > TABLE_SCROLL_TOL_PX, (
f"inner table must be self-overflowing for this test to be meaningful; ev={ev}"
)
fail_reasons = result.get("fail_reasons", [])
table_fails = [r for r in fail_reasons if "table self-overflow" in r]
assert table_fails == [], (
f"dedup must suppress table self-overflow fail when wrapper is clipped; "
f"got table_fails={table_fails} fail_reasons={fail_reasons}"
)
# Wrapper's clipped_inner fail line must still be present.
clipped_fails = [r for r in fail_reasons if "inner clipped" in r and "f13b" in r]
assert len(clipped_fails) >= 1, (
f"wrapper clipped_inner fail must remain; got fail_reasons={fail_reasons}"
)
assert result["passed"] is False, result
def test_fixture_f_two_same_class_wrappers_element_identity(tmp_path: Path) -> None:
"""Fixture F (F1 acceptance) — two same-class wrappers, element-identity dedup.
W1 and W2 share the identical className ``f13b-cell``. W1 (clientWidth=300,
``overflow:hidden``) contains an inline-block ``<div>`` of width 600px →
W1.scrollWidth clientWidth ≈ 300 > 5; W1 is registered in
``clippedWrapperMap`` and emits an ``inner clipped`` fail line. W2
(clientWidth=600, ``overflow:hidden``) contains a 500px-wide block-display
``<table>`` (matching the Fixture E table shape so the table is itself
self-overflowing with excess_x > 5). W2's clientWidth (600) is larger than
the table's outer width (500), so W2's own scrollWidth ≈ 500 < clientWidth
and W2 is NOT registered in ``clippedWrapperMap``.
The element-identity ancestor walk in the pipeline (L2298L2304) walks from
the W2 table upward via ``parentElement`` and queries
``clippedWrapperMap.has(node)`` — keyed by DOM node, NOT className. W2 is
a different ``Element`` reference from W1 despite identical class string,
so the lookup returns false at W2 and the walk terminates at ``.slide`` with
``wrapper_clipped_index = null``. A class-substring keyed map (the F1
regression scenario described in issue #46) would have resolved any
``[class*="f13b"]`` ancestor of the W2 table → W1's index and falsely
suppressed the W2 table_self_overflow fail.
Asserts:
* Exactly ONE ``inner clipped`` fail line (for W1) — proves W1 is in the map.
* Exactly ONE ``table self-overflow`` fail line (for W2's table) — proves
the W2 table is NOT suppressed by W1's identical class string.
* W2 table's ``table_events`` entry reports ``wrapper_clipped_index = None``
(element-identity contract) and ``excess_x > TABLE_SCROLL_TOL_PX``.
"""
body = (
'<div class="zone" data-zone-position="primary" '
'data-template-id="t_table_same_class">'
# W1 — same className, overflowing non-table child.
'<div class="f13b-cell" id="w1" style="width:300px; height:60px; '
'overflow:hidden; box-sizing:border-box; position:relative; '
'margin-bottom:8px;">'
'<div style="display:inline-block; width:600px; white-space:nowrap;">'
'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
'</div>'
'</div>'
# W2 — same className, NOT clipped (W2.clientWidth=600 > table.outer=500),
# but the inner table itself self-overflows (table width=500, td width=900).
'<div class="f13b-cell" id="w2" style="width:600px; height:60px; '
'overflow:hidden; box-sizing:border-box; position:relative;">'
'<table style="display:block; width:500px; height:40px; '
'overflow:hidden; box-sizing:border-box; table-layout:fixed;">'
'<tr><td style="width:900px; white-space:nowrap;">'
'YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY'
'</td></tr>'
'</table>'
'</div>'
'</div>'
)
html_path = _write_slide_html(tmp_path, body, name="fixture_f.html")
result = run_overflow_check(html_path)
assert "error" not in result, result
# Exactly one table_events entry (the W2 table — W1 has no <table>).
table_events = result.get("table_events", [])
assert len(table_events) == 1, f"expected one table_events entry, got: {table_events}"
ev = table_events[0]
# Element-identity contract: W2 ≠ W1, so the ancestor walk MUST NOT inherit
# W1's wrapper index merely because W2 shares W1's class string.
assert ev["wrapper_clipped_index"] is None, (
f"W2 (not itself clipped) must NOT inherit W1's index via class string; "
f"got wrapper_clipped_index={ev['wrapper_clipped_index']}. "
"This is the F1 regression — a class-substring map would have failed here."
)
assert ev["excess_x"] > TABLE_SCROLL_TOL_PX, (
f"W2's inner table must self-overflow for this test to be meaningful; ev={ev}"
)
fail_reasons = result.get("fail_reasons", [])
# W1: inner clipped fail emitted (W1 is in clippedWrapperMap, has overflowing inner div).
w1_clipped_fails = [r for r in fail_reasons if "inner clipped" in r and "f13b" in r]
assert len(w1_clipped_fails) == 1, (
f"expected exactly one W1 'inner clipped' fail; got fail_reasons={fail_reasons}"
)
# W2: table self-overflow fail emitted because element-identity dedup correctly
# reports wrapper_clipped_index=None for the W2 table (W2 ≠ W1 by DOM ref).
table_fails = [r for r in fail_reasons if "table self-overflow" in r]
assert len(table_fails) == 1, (
f"expected exactly one W2 'table self-overflow' fail (element-identity dedup); "
f"got fail_reasons={fail_reasons}"
)
assert "zone--primary" in table_fails[0], table_fails[0]
assert f"tol={TABLE_SCROLL_TOL_PX}" in table_fails[0], table_fails[0]
assert result["passed"] is False, result