"""Tests for IMP-16-U1 unit u10: sample-backed smoke without pipeline import. End-to-end smoke of the deterministic chain (extract_text_from_html ∘ normalize_for_comparison ∘ split_into_sentences ∘ _sentence_matches_html → verify_text_preservation / detect_invented_text) on a real ``samples/mdx_batch`` MDX file. Per Stage 2 rationale: smoke coverage uses the sample but does NOT hardcode a sample-specific pass. Also locks the AI-isolation contract for the verification axis: this test and the production module MUST NOT import orchestrator / phase_z2_pipeline / Phase Q content_verifier / Kei client. """ from __future__ import annotations import ast from pathlib import Path from src.phase_z2_verification_utils import ( VerificationResult, detect_invented_text, verify_text_preservation, ) _REPO_ROOT = Path(__file__).resolve().parents[2] _SAMPLE_MDX_PATH = _REPO_ROOT / "samples" / "mdx_batch" / "02.mdx" _FORBIDDEN_IMPORT_ROOTS = ( "orchestrator", "src.phase_z2_pipeline", "src.content_verifier", "src.kei_client", ) def _module_imports(path: Path) -> set[str]: tree = ast.parse(path.read_text(encoding="utf-8")) names: set[str] = set() for node in ast.walk(tree): if isinstance(node, ast.Import): for alias in node.names: names.add(alias.name) elif isinstance(node, ast.ImportFrom) and node.module: names.add(node.module) return names def test_integration_sample_mdx_exists(): # Smoke fixture availability gate; explicit so a missing sample # surfaces as a fixture problem, not a downstream assertion failure. assert _SAMPLE_MDX_PATH.exists(), f"sample missing: {_SAMPLE_MDX_PATH}" def test_integration_full_chain_runs_on_real_sample(): # Locks API contract over the full chain on a real MDX: returns a # VerificationResult, area_name passthrough works, score within # [0.0, 1.0], and detect_invented_text returns a list. No assertion # is made about a specific score so the sample is not hardcoded as # the pipeline's pass rule (Stage 2 u10 rationale). mdx = _SAMPLE_MDX_PATH.read_text(encoding="utf-8") html = f"
완전히 새로 만들어낸 문장으로 원본에는 전혀 등장하지 않는 내용입니다.
" ) invented = detect_invented_text(mdx, fabricated_html) assert isinstance(invented, list) assert len(invented) >= 1 def test_integration_no_forbidden_imports(): # AI-isolation + Phase Z scope-lock guard. Production module and # this test file must not import orchestrator / phase_z2_pipeline / # Phase Q content_verifier / Kei client. AST scan of the on-disk # source (not the imported module) so re-exports cannot mask a leak. for path in ( _REPO_ROOT / "src" / "phase_z2_verification_utils.py", Path(__file__).resolve(), ): modules = _module_imports(path) for module in modules: for forbidden in _FORBIDDEN_IMPORT_ROOTS: assert not (module == forbidden or module.startswith(forbidden + ".")), ( f"{path.name} imports forbidden module: {module}" )