Files
C.E.L_Slide_test2/tests/phase_z2/test_pz2_vu_integration.py
kyeongmin 23ba8b68cd feat(IMP-16): U1 H3 verification utility port + U2 wiring design
U1 (runtime, u1-u10): new Phase Z-owned deterministic verification module
src/phase_z2_verification_utils.py (335 LOC, stdlib only) porting H3 utility
surface — VerificationResult, extract_text_from_html, normalize_for_comparison,
extract_keywords, strip_meta_lines, split_into_sentences, verify_text_preservation,
detect_invented_text. 10 unit tests under tests/phase_z2/test_pz2_vu_*.py (56 tests).

u11 (design-only): docs/architecture/IMP-16-U2-WIRING-DESIGN.md fixes the Step
1/2/14/21/22 reverse-path contract, redesigned frame-contract pattern
reservation (IMP-20), and IMP-07 hard-gate criteria. No runtime wiring lands
in this commit — U2 stays blocked until IMP-07 reverse path is implemented +
verified + runtime-hit.

Guardrails: no src.content_verifier import; no FORBIDDEN_KEI_MEMOS /
generate_with_retry / REQUIRED_PATTERNS / verify_structure / verify_area /
verify_all_areas usage; no AI / Kei / httpx / SSE path; AI-isolation contract
upheld (utility is deterministic).

Tests: 56 targeted PASS (0.19s), 15 regression baseline PASS (7.59s).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 04:42:35 +09:00

107 lines
4.3 KiB
Python

"""Tests for IMP-16-U1 unit u10: sample-backed smoke without pipeline import.
End-to-end smoke of the deterministic chain (extract_text_from_html ∘
normalize_for_comparison ∘ split_into_sentences ∘ _sentence_matches_html
→ verify_text_preservation / detect_invented_text) on a real
``samples/mdx_batch`` MDX file. Per Stage 2 rationale: smoke coverage
uses the sample but does NOT hardcode a sample-specific pass.
Also locks the AI-isolation contract for the verification axis: this
test and the production module MUST NOT import orchestrator /
phase_z2_pipeline / Phase Q content_verifier / Kei client.
"""
from __future__ import annotations
import ast
from pathlib import Path
from src.phase_z2_verification_utils import (
VerificationResult,
detect_invented_text,
verify_text_preservation,
)
_REPO_ROOT = Path(__file__).resolve().parents[2]
_SAMPLE_MDX_PATH = _REPO_ROOT / "samples" / "mdx_batch" / "02.mdx"
_FORBIDDEN_IMPORT_ROOTS = (
"orchestrator",
"src.phase_z2_pipeline",
"src.content_verifier",
"src.kei_client",
)
def _module_imports(path: Path) -> set[str]:
tree = ast.parse(path.read_text(encoding="utf-8"))
names: set[str] = set()
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
names.add(alias.name)
elif isinstance(node, ast.ImportFrom) and node.module:
names.add(node.module)
return names
def test_integration_sample_mdx_exists():
# Smoke fixture availability gate; explicit so a missing sample
# surfaces as a fixture problem, not a downstream assertion failure.
assert _SAMPLE_MDX_PATH.exists(), f"sample missing: {_SAMPLE_MDX_PATH}"
def test_integration_full_chain_runs_on_real_sample():
# Locks API contract over the full chain on a real MDX: returns a
# VerificationResult, area_name passthrough works, score within
# [0.0, 1.0], and detect_invented_text returns a list. No assertion
# is made about a specific score so the sample is not hardcoded as
# the pipeline's pass rule (Stage 2 u10 rationale).
mdx = _SAMPLE_MDX_PATH.read_text(encoding="utf-8")
html = f"<div>{mdx}</div>"
result = verify_text_preservation(mdx, html, "smoke")
assert isinstance(result, VerificationResult)
assert result.area_name == "smoke"
assert 0.0 <= result.score <= 1.0
assert isinstance(detect_invented_text(mdx, html), list)
def test_integration_mirrored_html_passes_default_threshold():
# When the HTML side mirrors the MDX text verbatim, the deterministic
# preservation check must pass the Phase Q-default threshold (0.70).
# This is the integration-level guarantee for the B-2 reverse path:
# round-tripped HTML that preserves the MDX text must verify.
mdx = _SAMPLE_MDX_PATH.read_text(encoding="utf-8")
html = f"<div>{mdx}</div>"
result = verify_text_preservation(mdx, html, "smoke")
assert result.passed is True
def test_integration_fabricated_html_flags_invented_text():
# Locks the hallucination-guard end-to-end: HTML text that has no
# keyword anchor in the source MDX must be flagged. Synthetic
# sentence chosen so its keywords (완전히, 만들어낸, 원본, 등장 …)
# do not appear in samples/mdx_batch/02.mdx.
mdx = _SAMPLE_MDX_PATH.read_text(encoding="utf-8")
fabricated_html = (
"<p>완전히 새로 만들어낸 문장으로 원본에는 전혀 등장하지 않는 내용입니다.</p>"
)
invented = detect_invented_text(mdx, fabricated_html)
assert isinstance(invented, list)
assert len(invented) >= 1
def test_integration_no_forbidden_imports():
# AI-isolation + Phase Z scope-lock guard. Production module and
# this test file must not import orchestrator / phase_z2_pipeline /
# Phase Q content_verifier / Kei client. AST scan of the on-disk
# source (not the imported module) so re-exports cannot mask a leak.
for path in (
_REPO_ROOT / "src" / "phase_z2_verification_utils.py",
Path(__file__).resolve(),
):
modules = _module_imports(path)
for module in modules:
for forbidden in _FORBIDDEN_IMPORT_ROOTS:
assert not (module == forbidden or module.startswith(forbidden + ".")), (
f"{path.name} imports forbidden module: {module}"
)