Files
C.E.L_Slide_test2/tests/phase_z2/test_pz2_vu_preservation.py
kyeongmin 23ba8b68cd feat(IMP-16): U1 H3 verification utility port + U2 wiring design
U1 (runtime, u1-u10): new Phase Z-owned deterministic verification module
src/phase_z2_verification_utils.py (335 LOC, stdlib only) porting H3 utility
surface — VerificationResult, extract_text_from_html, normalize_for_comparison,
extract_keywords, strip_meta_lines, split_into_sentences, verify_text_preservation,
detect_invented_text. 10 unit tests under tests/phase_z2/test_pz2_vu_*.py (56 tests).

u11 (design-only): docs/architecture/IMP-16-U2-WIRING-DESIGN.md fixes the Step
1/2/14/21/22 reverse-path contract, redesigned frame-contract pattern
reservation (IMP-20), and IMP-07 hard-gate criteria. No runtime wiring lands
in this commit — U2 stays blocked until IMP-07 reverse path is implemented +
verified + runtime-hit.

Guardrails: no src.content_verifier import; no FORBIDDEN_KEI_MEMOS /
generate_with_retry / REQUIRED_PATTERNS / verify_structure / verify_area /
verify_all_areas usage; no AI / Kei / httpx / SSE path; AI-isolation contract
upheld (utility is deterministic).

Tests: 56 targeted PASS (0.19s), 15 regression baseline PASS (7.59s).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 04:42:35 +09:00

120 lines
5.3 KiB
Python

"""Tests for IMP-16-U1 unit u8: ``verify_text_preservation``.
Locks the Phase Z port of the deterministic text-preservation check
(Phase Q reference: ``src/content_verifier.py:206-273``). The function
is pure and composes u2 (extract_text_from_html), u3
(normalize_for_comparison), u6 (split_into_sentences), and u7
(_sentence_matches_html). No Phase Q import is exercised.
"""
from __future__ import annotations
from src.phase_z2_verification_utils import (
VerificationResult,
_MISSING_SENTENCE_REPORT_LIMIT,
_MISSING_SENTENCE_TRUNCATE_LEN,
_TEXT_PRESERVATION_DEFAULT_THRESHOLD,
verify_text_preservation,
)
def test_verify_text_preservation_defaults_locked():
# Locks the Phase Q caller convention: threshold default = 0.70,
# missing-list report cap = 5, per-item truncate length = 60.
assert _TEXT_PRESERVATION_DEFAULT_THRESHOLD == 0.70
assert _MISSING_SENTENCE_REPORT_LIMIT == 5
assert _MISSING_SENTENCE_TRUNCATE_LEN == 60
def test_verify_text_preservation_empty_sentences_returns_passed():
# MDX that reduces to zero sentences after split_into_sentences
# (e.g. headers only) must return passed=True with score 1.0 and
# an empty errors/warnings surface. Phase Q parity: early return
# before any HTML extraction.
result = verify_text_preservation("# header only", "<p>anything</p>", "core")
assert isinstance(result, VerificationResult)
assert result.passed is True
assert result.area_name == "core"
assert result.checks == {"text_preservation": True}
assert result.score == 1.0
assert result.errors == []
assert result.warnings == []
def test_verify_text_preservation_full_match_passes():
# All MDX sentences preserved in HTML -> score 1.0, passed True,
# no warnings (warnings only attached when score < 1.0), no errors.
mdx = "데이터 분석은 핵심 과정입니다. 시각화로 의사 결정을 지원합니다."
html = (
"<p>데이터 분석은 핵심 과정입니다.</p>"
"<p>시각화로 의사 결정을 지원합니다.</p>"
)
result = verify_text_preservation(mdx, html, "body")
assert result.passed is True
assert result.score == 1.0
assert result.warnings == []
assert result.errors == []
def test_verify_text_preservation_below_threshold_reports_errors():
# Only one of two MDX sentences appears in the HTML -> score 0.5,
# below default threshold 0.70 -> passed False, errors list opens
# with the "누락 문장 (1/2):" header followed by quoted missing
# sentences (truncation gate not crossed).
mdx = (
"데이터 분석은 핵심 과정입니다.\n"
"전혀 다른 문맥의 두 번째 문장입니다."
)
html = "<p>데이터 분석은 핵심 과정입니다.</p>"
result = verify_text_preservation(mdx, html, "core")
assert result.passed is False
assert result.score == 0.5
assert result.checks == {"text_preservation": False}
assert result.errors[0] == "누락 문장 (1/2):"
assert any("두 번째 문장" in line for line in result.errors[1:])
assert result.warnings == ["보존율: 50% (1/2 문장)"]
def test_verify_text_preservation_truncates_long_missing_sentence():
# A missing sentence longer than 60 chars must be rendered with
# the "...\"" tail. Phase Z surface lifts the 60 constant to a
# named module value (_MISSING_SENTENCE_TRUNCATE_LEN) so the gate
# is auditable.
long_sentence = "엄청나게 긴 문장이 들어가서 절단 동작을 검증합니다." + ("" * 60)
mdx = long_sentence + "."
html = "<p>관련 없는 문구</p>"
result = verify_text_preservation(mdx, html, "footer", threshold=0.99)
assert result.passed is False
# Header + at least one missing-line entry; the entry must end with `..."`.
assert len(result.errors) >= 2
assert result.errors[-1].endswith("...\"")
truncated_body = result.errors[-1].split('"', 2)[1].rstrip(".")
assert len(truncated_body) == _MISSING_SENTENCE_TRUNCATE_LEN
def test_verify_text_preservation_caps_missing_report_at_limit():
# Generate seven MDX-only sentences with no HTML coverage.
# passed=False, errors list = 1 header + at most 5 missing entries
# (_MISSING_SENTENCE_REPORT_LIMIT). The header reports the true
# missing/total counts even though only 5 are surfaced.
mdx_lines = [f"전혀 다른 문맥의 문장 번호 {i} 입니다." for i in range(7)]
mdx = "\n".join(mdx_lines)
html = "<p>관련 없는 문구</p>"
result = verify_text_preservation(mdx, html, "core")
assert result.passed is False
assert result.errors[0] == "누락 문장 (7/7):"
assert len(result.errors) == 1 + _MISSING_SENTENCE_REPORT_LIMIT
def test_verify_text_preservation_custom_threshold_passes_at_50_percent():
# Lowering the threshold to 0.50 makes a 50% preservation pass.
mdx = (
"데이터 분석은 핵심 과정입니다.\n"
"전혀 다른 문맥의 두 번째 문장입니다."
)
html = "<p>데이터 분석은 핵심 과정입니다.</p>"
result = verify_text_preservation(mdx, html, "core", threshold=0.50)
assert result.passed is True
assert result.score == 0.5
# Score < 1.0 so the 보존율 warning is still attached for trace surface.
assert result.warnings == ["보존율: 50% (1/2 문장)"]