"""u2 — pure HTML text extraction surface (IMP-16-U1). Locks the deterministic visible-text extraction contract: - " "" "

visible

" ) out = extract_text_from_html(html) assert "visible" in out joined = " ".join(out) assert "color: red" not in joined assert "keep_out" not in joined def test_extract_drops_whitespace_only_chunks_and_strips_survivors(): from src.phase_z2_verification_utils import extract_text_from_html html = "
\n\n
hello
world\t" out = extract_text_from_html(html) assert out == ["hello", "world"] def test_extract_preserves_korean_and_inline_markup_text(): from src.phase_z2_verification_utils import extract_text_from_html html = "

설계 방식의 왜곡

" out = extract_text_from_html(html) assert out == ["설계", "방식", "의 왜곡"] def test_extract_empty_input_returns_empty_list(): from src.phase_z2_verification_utils import extract_text_from_html assert extract_text_from_html("") == []