feat(IMP-16): Step 14 table_self_overflow detection

Add table self-overflow detection with element-identity wrapper dedup,
mirroring the image_aspect_mismatch axis pattern (#45).

JS layer: TABLE_SCROLL_TOL_PX=5 module constant; clippedWrapperMap
built as Map<Element,int> keyed by DOM node reference (NOT className)
so two wrappers with identical class strings remain distinguishable;
table_events collected via querySelectorAll('table').forEach with
closest()-ancestor walk resolving wrapper_clipped_index = int|null.

Py layer: aggregate result['table_events'] and append fail_reason
'table_self_overflow' only when (excess_x>TOL OR excess_y>TOL)
AND wrapper_clipped_index is None; wrapper-clipped path continues
to fail via existing clipped_inner reporting.

Tests (Selenium, chromedriver guard mirrored from image_check):
- Fixture D: standalone <table> overflow → table_self_overflow fail
- Fixture E: <table> in clipped wrapper → dedup suppresses table fail
- Fixture F (F1 acceptance): two wrappers with identical className
  f13b-cell, W1 clipped by non-table child, W2 hosts self-overflow
  <table> with W2 itself NOT clipped → element-identity ensures W2's
  table is not suppressed by W1's class; both fails emitted.

Out of scope: image_events behavior (intact from #45), classifier
pass/fail consumer (→실행-3), debug.json surfacing (→실행-4).

Refs: #46

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-18 21:06:01 +09:00
parent f3bff898fb
commit 2827622858
2 changed files with 421 additions and 1 deletions

View File

@@ -133,6 +133,11 @@ DEFAULT_ZONE_MIN_HEIGHT_PX = 100
# Spec doc row (PHASE-Z-FIT-CLASSIFIER-ROUTER-SPEC) update deferred to IMP-15 실행-4.
IMAGE_ASPECT_DELTA_TOL = 0.05
# Step 14 table_self_overflow tolerance — scrollWclientW or scrollHclientH > TOL ⇒ fail.
# Local anchor : IMP-15 실행-2 (Gitea issue #46) — table axis acceptance criteria.
# Mirrors existing inline 5px tolerance used by slide/zone/clipped scans in run_overflow_check.
TABLE_SCROLL_TOL_PX = 5
# content_weight 계산 가중치
CONTENT_WEIGHT_COEFFS = {
"text_per_chars": 800, # text_len / 800 = score
@@ -2131,6 +2136,14 @@ def run_overflow_check(html_path: Path) -> dict:
const zones = [];
const zone_geometries_px = [];
// IMP-15 실행-2 (issue #46) — element-identity dedup map for table_events.
// Map<Element, integer> keyed by DOM node reference (NOT class string) so that
// two wrappers sharing identical className resolve to distinct map entries.
// Populated alongside the existing per-zone clipped_inner scan below.
const clippedWrapperMap = new Map();
let clippedIdxCounter = 0;
slide.querySelectorAll('.zone').forEach((z) => {
const pos = z.getAttribute('data-zone-position') || 'unknown';
const tid = z.getAttribute('data-template-id') || '?';
@@ -2178,6 +2191,13 @@ def run_overflow_check(html_path: Path) -> dict:
scrollWidth: el.scrollWidth,
scrollHeight: el.scrollHeight,
});
// IMP-15 실행-2 (issue #46) — element-identity registration.
// Key by DOM node `el`, NOT className: two wrappers with identical
// class string still hash to distinct Map entries.
if (!clippedWrapperMap.has(el)) {
clippedWrapperMap.set(el, clippedIdxCounter);
clippedIdxCounter++;
}
}
});
m.clipped_inner = clipped;
@@ -2259,7 +2279,52 @@ def run_overflow_check(html_path: Path) -> dict:
});
});
return { slide: slideM, slide_body: bodyM, zones, frame_slot_metrics, zone_geometries_px, image_events };
// IMP-15 실행-2 (issue #46) — table_events[] for table_self_overflow detection.
// One entry per <table> under .slide. wrapper_clipped_index is the integer index
// (from clippedWrapperMap) of the nearest ancestor that is itself in the clipped
// wrapper set, or null. Element-identity walk (NOT className) so that two same-class
// wrappers (W1 clipped, W2 not) resolve independently for any contained <table>.
const table_events = [];
slide.querySelectorAll('table').forEach((tbl) => {
const parentZone = tbl.closest('.zone');
const zonePos = parentZone
? (parentZone.getAttribute('data-zone-position') || 'unknown')
: 'unknown';
const zoneTid = parentZone
? (parentZone.getAttribute('data-template-id') || '?')
: '?';
let wrapper_clipped_index = null;
let node = tbl.parentElement;
while (node && node !== slide) {
if (clippedWrapperMap.has(node)) {
wrapper_clipped_index = clippedWrapperMap.get(node);
break;
}
node = node.parentElement;
}
const tblRect = tbl.getBoundingClientRect();
const dx = tbl.scrollWidth - tbl.clientWidth;
const dy = tbl.scrollHeight - tbl.clientHeight;
table_events.push({
zone_position: zonePos,
zone_template_id: zoneTid,
clientWidth: tbl.clientWidth,
clientHeight: tbl.clientHeight,
scrollWidth: tbl.scrollWidth,
scrollHeight: tbl.scrollHeight,
excess_x: Math.max(0, dx),
excess_y: Math.max(0, dy),
wrapper_clipped_index: wrapper_clipped_index,
bbox: {
x: Math.round(tblRect.left - slideRect.left),
y: Math.round(tblRect.top - slideRect.top),
w: Math.round(tblRect.width),
h: Math.round(tblRect.height),
},
});
});
return { slide: slideM, slide_body: bodyM, zones, frame_slot_metrics, zone_geometries_px, image_events, table_events };
""")
screenshot_path = html_path.parent / "preview.png"
@@ -2318,6 +2383,27 @@ def run_overflow_check(html_path: Path) -> dict:
f"(template={tid}, tol={IMAGE_ASPECT_DELTA_TOL}, src={src})"
)
# IMP-15 실행-2 (issue #46) — table_self_overflow aggregation.
# Emit fail_reason only when (excess_x>TOL OR excess_y>TOL) AND wrapper_clipped_index is None.
# The clipped-wrapper case is already accounted for by the clipped_inner fail_reason above;
# element-identity dedup (clippedWrapperMap keyed by DOM node ref, NOT className) prevents
# double-counting and—critically—prevents two same-class wrappers from masking each other.
for ev in result.get("table_events", []):
if ev.get("wrapper_clipped_index") is not None:
continue
excess_x = ev.get("excess_x", 0) or 0
excess_y = ev.get("excess_y", 0) or 0
if excess_x > TABLE_SCROLL_TOL_PX or excess_y > TABLE_SCROLL_TOL_PX:
pos = ev.get("zone_position", "unknown")
tid = ev.get("zone_template_id", "?")
fail_reasons.append(
f"table self-overflow in zone--{pos}: "
f"excess {excess_y}px vert / {excess_x}px horiz "
f"(content {ev.get('scrollWidth')}x{ev.get('scrollHeight')} vs "
f"container {ev.get('clientWidth')}x{ev.get('clientHeight')}, "
f"template={tid}, tol={TABLE_SCROLL_TOL_PX})"
)
result["passed"] = len(fail_reasons) == 0
result["fail_reasons"] = fail_reasons
return result