diff --git a/src/phase_z2_pipeline.py b/src/phase_z2_pipeline.py index ab3c2a1..35098bb 100644 --- a/src/phase_z2_pipeline.py +++ b/src/phase_z2_pipeline.py @@ -133,6 +133,11 @@ DEFAULT_ZONE_MIN_HEIGHT_PX = 100 # Spec doc row (PHASE-Z-FIT-CLASSIFIER-ROUTER-SPEC) update deferred to IMP-15 실행-4. IMAGE_ASPECT_DELTA_TOL = 0.05 +# Step 14 table_self_overflow tolerance — scrollW−clientW or scrollH−clientH > TOL ⇒ fail. +# Local anchor : IMP-15 실행-2 (Gitea issue #46) — table axis acceptance criteria. +# Mirrors existing inline 5px tolerance used by slide/zone/clipped scans in run_overflow_check. +TABLE_SCROLL_TOL_PX = 5 + # content_weight 계산 가중치 CONTENT_WEIGHT_COEFFS = { "text_per_chars": 800, # text_len / 800 = score @@ -2131,6 +2136,14 @@ def run_overflow_check(html_path: Path) -> dict: const zones = []; const zone_geometries_px = []; + + // IMP-15 실행-2 (issue #46) — element-identity dedup map for table_events. + // Map keyed by DOM node reference (NOT class string) so that + // two wrappers sharing identical className resolve to distinct map entries. + // Populated alongside the existing per-zone clipped_inner scan below. + const clippedWrapperMap = new Map(); + let clippedIdxCounter = 0; + slide.querySelectorAll('.zone').forEach((z) => { const pos = z.getAttribute('data-zone-position') || 'unknown'; const tid = z.getAttribute('data-template-id') || '?'; @@ -2178,6 +2191,13 @@ def run_overflow_check(html_path: Path) -> dict: scrollWidth: el.scrollWidth, scrollHeight: el.scrollHeight, }); + // IMP-15 실행-2 (issue #46) — element-identity registration. + // Key by DOM node `el`, NOT className: two wrappers with identical + // class string still hash to distinct Map entries. + if (!clippedWrapperMap.has(el)) { + clippedWrapperMap.set(el, clippedIdxCounter); + clippedIdxCounter++; + } } }); m.clipped_inner = clipped; @@ -2259,7 +2279,52 @@ def run_overflow_check(html_path: Path) -> dict: }); }); - return { slide: slideM, slide_body: bodyM, zones, frame_slot_metrics, zone_geometries_px, image_events }; + // IMP-15 실행-2 (issue #46) — table_events[] for table_self_overflow detection. + // One entry per under .slide. wrapper_clipped_index is the integer index + // (from clippedWrapperMap) of the nearest ancestor that is itself in the clipped + // wrapper set, or null. Element-identity walk (NOT className) so that two same-class + // wrappers (W1 clipped, W2 not) resolve independently for any contained
. + const table_events = []; + slide.querySelectorAll('table').forEach((tbl) => { + const parentZone = tbl.closest('.zone'); + const zonePos = parentZone + ? (parentZone.getAttribute('data-zone-position') || 'unknown') + : 'unknown'; + const zoneTid = parentZone + ? (parentZone.getAttribute('data-template-id') || '?') + : '?'; + let wrapper_clipped_index = null; + let node = tbl.parentElement; + while (node && node !== slide) { + if (clippedWrapperMap.has(node)) { + wrapper_clipped_index = clippedWrapperMap.get(node); + break; + } + node = node.parentElement; + } + const tblRect = tbl.getBoundingClientRect(); + const dx = tbl.scrollWidth - tbl.clientWidth; + const dy = tbl.scrollHeight - tbl.clientHeight; + table_events.push({ + zone_position: zonePos, + zone_template_id: zoneTid, + clientWidth: tbl.clientWidth, + clientHeight: tbl.clientHeight, + scrollWidth: tbl.scrollWidth, + scrollHeight: tbl.scrollHeight, + excess_x: Math.max(0, dx), + excess_y: Math.max(0, dy), + wrapper_clipped_index: wrapper_clipped_index, + bbox: { + x: Math.round(tblRect.left - slideRect.left), + y: Math.round(tblRect.top - slideRect.top), + w: Math.round(tblRect.width), + h: Math.round(tblRect.height), + }, + }); + }); + + return { slide: slideM, slide_body: bodyM, zones, frame_slot_metrics, zone_geometries_px, image_events, table_events }; """) screenshot_path = html_path.parent / "preview.png" @@ -2318,6 +2383,27 @@ def run_overflow_check(html_path: Path) -> dict: f"(template={tid}, tol={IMAGE_ASPECT_DELTA_TOL}, src={src})" ) + # IMP-15 실행-2 (issue #46) — table_self_overflow aggregation. + # Emit fail_reason only when (excess_x>TOL OR excess_y>TOL) AND wrapper_clipped_index is None. + # The clipped-wrapper case is already accounted for by the clipped_inner fail_reason above; + # element-identity dedup (clippedWrapperMap keyed by DOM node ref, NOT className) prevents + # double-counting and—critically—prevents two same-class wrappers from masking each other. + for ev in result.get("table_events", []): + if ev.get("wrapper_clipped_index") is not None: + continue + excess_x = ev.get("excess_x", 0) or 0 + excess_y = ev.get("excess_y", 0) or 0 + if excess_x > TABLE_SCROLL_TOL_PX or excess_y > TABLE_SCROLL_TOL_PX: + pos = ev.get("zone_position", "unknown") + tid = ev.get("zone_template_id", "?") + fail_reasons.append( + f"table self-overflow in zone--{pos}: " + f"excess {excess_y}px vert / {excess_x}px horiz " + f"(content {ev.get('scrollWidth')}x{ev.get('scrollHeight')} vs " + f"container {ev.get('clientWidth')}x{ev.get('clientHeight')}, " + f"template={tid}, tol={TABLE_SCROLL_TOL_PX})" + ) + result["passed"] = len(fail_reasons) == 0 result["fail_reasons"] = fail_reasons return result diff --git a/tests/phase_z2/test_phase_z2_step14_table_check.py b/tests/phase_z2/test_phase_z2_step14_table_check.py new file mode 100644 index 0000000..1e1ef54 --- /dev/null +++ b/tests/phase_z2/test_phase_z2_step14_table_check.py @@ -0,0 +1,334 @@ +"""IMP-15 실행-2 (Gitea issue #46) — Step 14 table_self_overflow detection. + +Tests Selenium-driven ``
`` self-overflow measurement and element-identity +wrapper dedup added to ``run_overflow_check``: + +* Fixture D — standalone ``
`` self-overflow, no clipped wrapper ancestor → + ``table_events`` entry reports ``wrapper_clipped_index = None`` and an + ``excess_*`` exceeding ``TABLE_SCROLL_TOL_PX``; Python aggregation then emits + a ``table self-overflow`` fail_reason and flips ``result["passed"] = False``. +* Fixture E — ``
`` inside a clipped ``f13b`` wrapper. The wrapper itself + self-overflows (registers in ``clippedWrapperMap``) and the inner table also + self-overflows. Asserts dedup is honored: the table's ``wrapper_clipped_index`` + resolves to the wrapper's map index (non-null) so the Python aggregation MUST + NOT emit a ``table self-overflow`` fail_reason — only the wrapper's pre-existing + ``inner clipped`` fail line remains. + +* Fixture F — two wrappers W1 / W2 share identical className ``f13b-cell``. W1 + contains an overflowing inline-block child (no ``
``) → W1 self-overflows + and registers in ``clippedWrapperMap`` (emits ``inner clipped``). W2 contains + only a self-overflowing ``
``; W2's own scrollWidth equals its clientWidth + (the table's ``overflow:hidden`` keeps W2 itself uncliped). The element-identity + ancestor walk MUST resolve the W2 table's ``wrapper_clipped_index`` to ``None`` + (W2 ≠ W1 by DOM reference, despite identical class string). A class-string + lookup would have falsely resolved the W2 table → W1 and suppressed the fail — + the test thereby proves ``Map`` distinguishes by node identity. + +Chromedriver resolution mirrors the pipeline order +(``PROJECT_ROOT/chromedriver{,.exe}`` → PATH → Selenium Manager). When no driver +is resolvable the suite skips by default; under ``PHASE_Z_REQUIRE_SELENIUM=1`` +the tests are marked ``xfail(strict=True)`` so CI cannot silently lose coverage. +""" +from __future__ import annotations + +import os +import shutil +from pathlib import Path + +import pytest + +from src.phase_z2_pipeline import ( + PROJECT_ROOT, + TABLE_SCROLL_TOL_PX, + run_overflow_check, +) + + +# ─── chromedriver skip / xfail guard ───────────────────────────────── + +def _selenium_manager_resolvable() -> bool: + """Probe ``webdriver.Chrome(options=...)`` — pipeline's third tier. + + ``src/phase_z2_pipeline.py`` (run_overflow_check) tries + ``PROJECT_ROOT/chromedriver{,.exe}`` first, then falls back to + ``webdriver.Chrome(options=options)`` which delegates to Selenium Manager + for driver auto-resolution. The test resolver must mirror that fallback + or ``PHASE_Z_REQUIRE_SELENIUM=1`` produces spurious strict-XPASS failures + on machines where Selenium Manager can satisfy the pipeline at runtime. + """ + try: + from selenium import webdriver + from selenium.webdriver.chrome.options import Options as _Opts + except Exception: + return False + opts = _Opts() + opts.add_argument("--headless=new") + opts.add_argument("--no-sandbox") + opts.add_argument("--disable-dev-shm-usage") + try: + drv = webdriver.Chrome(options=opts) + except Exception: + return False + try: + drv.quit() + except Exception: + pass + return True + + +def _chromedriver_resolvable() -> bool: + """Mirror pipeline order: PROJECT_ROOT/chromedriver{,.exe} → PATH → Selenium Manager.""" + for candidate in (PROJECT_ROOT / "chromedriver", PROJECT_ROOT / "chromedriver.exe"): + if candidate.is_file(): + return True + if shutil.which("chromedriver") or shutil.which("chromedriver.exe"): + return True + return _selenium_manager_resolvable() + + +_REQUIRE_SELENIUM = os.environ.get("PHASE_Z_REQUIRE_SELENIUM") == "1" +_DRIVER_AVAILABLE = _chromedriver_resolvable() + +if not _DRIVER_AVAILABLE: + if _REQUIRE_SELENIUM: + pytestmark = pytest.mark.xfail( + strict=True, + reason="PHASE_Z_REQUIRE_SELENIUM=1 but chromedriver is unresolvable", + ) + else: + pytestmark = pytest.mark.skip( + reason=( + "chromedriver unresolvable (PROJECT_ROOT/chromedriver{,.exe} + PATH + Selenium Manager); " + "set PHASE_Z_REQUIRE_SELENIUM=1 to make this a hard failure" + ), + ) + + +# ─── HTML fixture helpers ──────────────────────────────────────────── + +_SLIDE_CSS = """ +html, body { margin: 0; padding: 0; } +.slide { width: 1280px; height: 720px; position: relative; box-sizing: border-box; } +.zone { display: block; } +""" + + +def _write_slide_html(tmp_path: Path, body_inner: str, name: str = "slide.html") -> Path: + html = ( + "" + f"" + '
' + f"{body_inner}" + "
" + ) + path = tmp_path / name + path.write_text(html, encoding="utf-8") + return path + + +# ─── tests ─────────────────────────────────────────────────────────── + +def test_fixture_d_standalone_table_overflow(tmp_path: Path) -> None: + """Fixture D — standalone ``
`` self-overflow, no clipped wrapper. + + The table is forced into block layout with a fixed clientWidth (100px) and + ``overflow: hidden``; the inner cell is 600px wide with ``white-space:nowrap``, + so the table's scrollWidth exceeds clientWidth by well over ``TABLE_SCROLL_TOL_PX``. + No ancestor carries an ``f13b/f29b/f16b`` class, so the element-identity walk + must report ``wrapper_clipped_index = None``. Python aggregation then emits a + ``table self-overflow`` fail_reason and flips ``result["passed"]`` to ``False``. + """ + body = ( + '
' + '
' + '' + '
' + 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' + '
' + '' + ) + html_path = _write_slide_html(tmp_path, body, name="fixture_d.html") + result = run_overflow_check(html_path) + + assert "error" not in result, result + assert "table_events" in result, "run_overflow_check must expose table_events" + table_events = result["table_events"] + assert len(table_events) == 1, f"expected one table_events entry, got: {table_events}" + + ev = table_events[0] + assert ev["zone_position"] == "primary", ev + assert ev["zone_template_id"] == "t_table", ev + assert ev["wrapper_clipped_index"] is None, ( + f"standalone table must have null wrapper_clipped_index; got {ev['wrapper_clipped_index']}" + ) + assert ev["excess_x"] > TABLE_SCROLL_TOL_PX, ( + f"expected excess_x > {TABLE_SCROLL_TOL_PX}; got {ev['excess_x']} " + f"(clientWidth={ev['clientWidth']}, scrollWidth={ev['scrollWidth']})" + ) + + # Python aggregation: emitted fail_reason + passed flipped to False. + fail_reasons = result.get("fail_reasons", []) + table_fails = [r for r in fail_reasons if "table self-overflow" in r] + assert len(table_fails) == 1, ( + f"expected exactly one 'table self-overflow' fail_reason; got fail_reasons={fail_reasons}" + ) + assert "zone--primary" in table_fails[0], table_fails[0] + assert f"tol={TABLE_SCROLL_TOL_PX}" in table_fails[0], table_fails[0] + assert result["passed"] is False, ( + f"table self-overflow must flip passed=False; got result={result}" + ) + + +def test_fixture_e_table_in_clipped_wrapper_dedup(tmp_path: Path) -> None: + """Fixture E — ```` inside a clipped ``f13b`` wrapper (dedup honored). + + The wrapper (clientWidth=300, ``overflow:hidden``) contains a ``display:block`` + table forced to width=500px → wrapper.scrollWidth (≈500) − clientWidth (300) > 5px, + so the wrapper is registered in ``clippedWrapperMap`` (emits ``inner clipped`` fail). + The inner table is itself self-overflowing (clientWidth=500, content nowrap-cell + width=900 → scrollWidth ≈ 900). The element-identity ancestor walk MUST resolve + the table's ``wrapper_clipped_index`` to the wrapper's integer map index, and the + Python aggregation MUST then SKIP emitting a ``table self-overflow`` fail_reason + (the clipped wrapper already accounts for this). + """ + body = ( + '
' + '
' + '
' + '' + '
' + 'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB' + '
' + '' + '' + ) + html_path = _write_slide_html(tmp_path, body, name="fixture_e.html") + result = run_overflow_check(html_path) + + assert "error" not in result, result + table_events = result.get("table_events", []) + assert len(table_events) == 1, f"expected one table_events entry, got: {table_events}" + + ev = table_events[0] + # Dedup signal: ancestor walk must hit the f13b wrapper via Map.has(node). + assert ev["wrapper_clipped_index"] is not None, ( + f"table inside clipped wrapper must inherit wrapper index; got ev={ev}" + ) + assert isinstance(ev["wrapper_clipped_index"], int), ev + # The inner table is itself overflowing — proves the dedup is the only thing + # suppressing the table_self_overflow fail (not absence of overflow). + assert ev["excess_x"] > TABLE_SCROLL_TOL_PX, ( + f"inner table must be self-overflowing for this test to be meaningful; ev={ev}" + ) + + fail_reasons = result.get("fail_reasons", []) + table_fails = [r for r in fail_reasons if "table self-overflow" in r] + assert table_fails == [], ( + f"dedup must suppress table self-overflow fail when wrapper is clipped; " + f"got table_fails={table_fails} fail_reasons={fail_reasons}" + ) + # Wrapper's clipped_inner fail line must still be present. + clipped_fails = [r for r in fail_reasons if "inner clipped" in r and "f13b" in r] + assert len(clipped_fails) >= 1, ( + f"wrapper clipped_inner fail must remain; got fail_reasons={fail_reasons}" + ) + assert result["passed"] is False, result + + +def test_fixture_f_two_same_class_wrappers_element_identity(tmp_path: Path) -> None: + """Fixture F (F1 acceptance) — two same-class wrappers, element-identity dedup. + + W1 and W2 share the identical className ``f13b-cell``. W1 (clientWidth=300, + ``overflow:hidden``) contains an inline-block ``
`` of width 600px → + W1.scrollWidth − clientWidth ≈ 300 > 5; W1 is registered in + ``clippedWrapperMap`` and emits an ``inner clipped`` fail line. W2 + (clientWidth=600, ``overflow:hidden``) contains a 500px-wide block-display + ```` (matching the Fixture E table shape so the table is itself + self-overflowing with excess_x > 5). W2's clientWidth (600) is larger than + the table's outer width (500), so W2's own scrollWidth ≈ 500 < clientWidth + and W2 is NOT registered in ``clippedWrapperMap``. + + The element-identity ancestor walk in the pipeline (L2298–L2304) walks from + the W2 table upward via ``parentElement`` and queries + ``clippedWrapperMap.has(node)`` — keyed by DOM node, NOT className. W2 is + a different ``Element`` reference from W1 despite identical class string, + so the lookup returns false at W2 and the walk terminates at ``.slide`` with + ``wrapper_clipped_index = null``. A class-substring keyed map (the F1 + regression scenario described in issue #46) would have resolved any + ``[class*="f13b"]`` ancestor of the W2 table → W1's index and falsely + suppressed the W2 table_self_overflow fail. + + Asserts: + * Exactly ONE ``inner clipped`` fail line (for W1) — proves W1 is in the map. + * Exactly ONE ``table self-overflow`` fail line (for W2's table) — proves + the W2 table is NOT suppressed by W1's identical class string. + * W2 table's ``table_events`` entry reports ``wrapper_clipped_index = None`` + (element-identity contract) and ``excess_x > TABLE_SCROLL_TOL_PX``. + """ + body = ( + '
' + # W1 — same className, overflowing non-table child. + '
' + '
' + 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' + '
' + '
' + # W2 — same className, NOT clipped (W2.clientWidth=600 > table.outer=500), + # but the inner table itself self-overflows (table width=500, td width=900). + '
' + '
' + '' + '
' + 'YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY' + '
' + '
' + '' + ) + html_path = _write_slide_html(tmp_path, body, name="fixture_f.html") + result = run_overflow_check(html_path) + + assert "error" not in result, result + + # Exactly one table_events entry (the W2 table — W1 has no ). + table_events = result.get("table_events", []) + assert len(table_events) == 1, f"expected one table_events entry, got: {table_events}" + + ev = table_events[0] + # Element-identity contract: W2 ≠ W1, so the ancestor walk MUST NOT inherit + # W1's wrapper index merely because W2 shares W1's class string. + assert ev["wrapper_clipped_index"] is None, ( + f"W2 (not itself clipped) must NOT inherit W1's index via class string; " + f"got wrapper_clipped_index={ev['wrapper_clipped_index']}. " + "This is the F1 regression — a class-substring map would have failed here." + ) + assert ev["excess_x"] > TABLE_SCROLL_TOL_PX, ( + f"W2's inner table must self-overflow for this test to be meaningful; ev={ev}" + ) + + fail_reasons = result.get("fail_reasons", []) + + # W1: inner clipped fail emitted (W1 is in clippedWrapperMap, has overflowing inner div). + w1_clipped_fails = [r for r in fail_reasons if "inner clipped" in r and "f13b" in r] + assert len(w1_clipped_fails) == 1, ( + f"expected exactly one W1 'inner clipped' fail; got fail_reasons={fail_reasons}" + ) + + # W2: table self-overflow fail emitted because element-identity dedup correctly + # reports wrapper_clipped_index=None for the W2 table (W2 ≠ W1 by DOM ref). + table_fails = [r for r in fail_reasons if "table self-overflow" in r] + assert len(table_fails) == 1, ( + f"expected exactly one W2 'table self-overflow' fail (element-identity dedup); " + f"got fail_reasons={fail_reasons}" + ) + assert "zone--primary" in table_fails[0], table_fails[0] + assert f"tol={TABLE_SCROLL_TOL_PX}" in table_fails[0], table_fails[0] + + assert result["passed"] is False, result