Files
C.E.L_Slide_test2/tests/test_phase_z2_subsection_schema.py
kyeongmin 5191acad85 feat(IMP-08): U2 — aligner canonical sub-id + N-R5 decimal alias guard
align_sections_to_v4_granularity now emits canonical sub-section ids
of the form ${section_id}-sub-${ordinal} (e.g., "04-2-sub-1"), matching
the frontend drag/drop schema. Each drilled sub-section populates
heading_number (decimal "2.1" / integer "1" / None for undecorated)
and v4_alias_keys for legacy V4 keys.

N-R5 decimal-only alias guard : v4_alias_keys is populated only when
heading_number matches re.fullmatch(r"\d+\.\d+", ...). Integer-only
H3 headings (e.g., MDX 05's "### 1", "### 2") and bare H3 headings
produce no alias to avoid sibling-parent V4 collisions (RULE 0
generalization — applies to all 32-frame MDX, not MDX 05-specific).

The drill regex is broadened from r"^###\s+(\d+\.\d+)\s+..." to
r"^###\s+(?:(\d+(?:\.\d+)?)\s+)?(.+?)$" so integer-only and bare H3
headings are now recognised as sub-sections; they previously failed
the regex and were silently kept under the parent section.

Tests : 7 new cases (MdxSection default 4-positional callers, V4 exact
passthrough, decimal drill with alias, integer-only no-alias guard,
bare H3 no-alias, no-H3 passthrough, end-to-end aligner -> resolver
round-trip with legacy V4 alias). 15/15 in test_phase_z2_subsection_schema
+ 14 override + 8 fallback baseline = 37/37 PASS.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 22:33:49 +09:00

181 lines
6.9 KiB
Python

"""IMP-08 B-3 sub-section drag/drop — schema + V4 alias resolver tests.
Fully synthetic per Codex #7 generalization guardrail:
NO real catalog template_id / frame_id, NO ``v4_full32_result.yaml`` dependency,
NO MDX-specific section ids beyond canonical id format.
Locked scope (Stage 3 R8) :
A. ``derive_parent_id`` canonical ordinal recognition + legacy decimal fallback.
B. ``_resolve_v4_section_key`` exact > alias > None (no parent/sibling promotion).
C. ``align_sections_to_v4_granularity`` canonical ordinal id emit + N-R5
decimal-only alias guard + MdxSection default-construction stability.
"""
from __future__ import annotations
from src.phase_z2_composition import derive_parent_id
from src.phase_z2_pipeline import (
MdxSection,
_resolve_v4_section_key,
align_sections_to_v4_granularity,
)
# ─── A. derive_parent_id ────────────────────────────────────────────────────
def test_derive_parent_id_ordinal_sub():
assert derive_parent_id("03-1-sub-2") == "03-1"
assert derive_parent_id("04-2-sub-1") == "04-2"
def test_derive_parent_id_decimal_legacy_alias():
# Legacy V4 decimal id retains existing behaviour for alias path.
assert derive_parent_id("04-2.1") == "04-2"
def test_derive_parent_id_top_level_none():
assert derive_parent_id("04-1") is None
assert derive_parent_id("04") is None
assert derive_parent_id("nonsense") is None
# ─── B. _resolve_v4_section_key ─────────────────────────────────────────────
def _fake_v4(*keys):
return {"mdx_sections": {k: {"judgments_full32": []} for k in keys}}
def test_alias_resolver_exact_match_wins():
v4 = _fake_v4("04-2-sub-1", "04-2.1")
assert _resolve_v4_section_key(v4, "04-2-sub-1") == "04-2-sub-1"
assert (
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"])
== "04-2-sub-1"
)
def test_alias_resolver_decimal_alias_when_metadata_present():
v4 = _fake_v4("04-2.1")
assert (
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"])
== "04-2.1"
)
def test_alias_resolver_no_parent_promotion():
# parent V4 entry must not be promoted into a sibling sub-section lookup.
v4 = _fake_v4("04-2")
assert _resolve_v4_section_key(v4, "04-2-sub-1") is None
assert (
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2"])
== "04-2"
) # alias is opt-in; only resolves when caller explicitly provides it
def test_alias_resolver_no_sibling_promotion():
# sibling sub-section entry must not be auto-promoted without an alias.
v4 = _fake_v4("04-2-sub-2")
assert _resolve_v4_section_key(v4, "04-2-sub-1") is None
def test_alias_resolver_miss_returns_none():
v4 = _fake_v4("99-1")
assert _resolve_v4_section_key(v4, "04-2-sub-1") is None
assert (
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"])
is None
)
# ─── C. align_sections_to_v4_granularity ────────────────────────────────────
def _section(section_id, num, title, raw_content):
"""Build an MdxSection with default sub-section schema fields."""
return MdxSection(
section_id=section_id,
section_num=num,
title=title,
raw_content=raw_content,
)
def test_mdx_section_default_construction_preserves_4_positional_callers():
# IMP-08 B-3 : MdxSection still accepts the legacy 4-positional shape
# (defaults for heading_number / v4_alias_keys / sub_sections).
s = MdxSection("04-1", 1, "1. Top", "body")
assert s.heading_number is None
assert s.v4_alias_keys == []
assert s.sub_sections == []
def test_align_passthrough_when_v4_key_exact_match():
# Section already aligned to V4 key — aligner keeps it untouched.
sections = [_section("04-1", 1, "1. Top", "body")]
v4 = {"mdx_sections": {"04-1": {"judgments_full32": []}}}
out = align_sections_to_v4_granularity(sections, v4)
assert len(out) == 1
assert out[0].section_id == "04-1"
def test_align_drill_emits_canonical_ordinal_id_with_decimal_alias():
# Decimal H3 headings -> canonical ordinal id + decimal alias (legacy V4 key).
raw = "### 2.1 First\nbody1\n### 2.2 Second\nbody2\n"
sections = [_section("04-2", 2, "2. Parent", raw)]
v4 = {"mdx_sections": {}} # forces drill (no exact key)
out = align_sections_to_v4_granularity(sections, v4)
assert [s.section_id for s in out] == ["04-2-sub-1", "04-2-sub-2"]
assert [s.heading_number for s in out] == ["2.1", "2.2"]
# N-R5 : decimal headings -> alias emitted.
assert out[0].v4_alias_keys == ["04-2.1"]
assert out[1].v4_alias_keys == ["04-2.2"]
def test_align_drill_integer_only_h3_emits_no_alias_n_r5_guard():
# N-R5 : integer-only H3 (e.g., "### 1 Title") must NOT generate an alias,
# otherwise it would collide with sibling parent V4 entries (`{mdx_id}-1`).
raw = "### 1 Alpha\nbody1\n### 2 Beta\nbody2\n"
sections = [_section("05-2", 2, "2. Parent", raw)]
v4 = {"mdx_sections": {}}
out = align_sections_to_v4_granularity(sections, v4)
assert [s.section_id for s in out] == ["05-2-sub-1", "05-2-sub-2"]
assert [s.heading_number for s in out] == ["1", "2"]
assert out[0].v4_alias_keys == []
assert out[1].v4_alias_keys == []
def test_align_drill_undecorated_h3_emits_no_alias():
# Plain `### Title` without numeric prefix -> heading_number=None, no alias.
raw = "### Alpha\nbody1\n### Beta\nbody2\n"
sections = [_section("03-3", 3, "3. Parent", raw)]
v4 = {"mdx_sections": {}}
out = align_sections_to_v4_granularity(sections, v4)
assert [s.section_id for s in out] == ["03-3-sub-1", "03-3-sub-2"]
assert [s.heading_number for s in out] == [None, None]
assert all(s.v4_alias_keys == [] for s in out)
def test_align_no_h3_passes_section_through_unchanged():
# No H3 sub-headings in raw_content -> aligner keeps the section.
sections = [_section("04-1", 1, "1. Top", "no subheadings here\njust prose")]
v4 = {"mdx_sections": {}}
out = align_sections_to_v4_granularity(sections, v4)
assert len(out) == 1
assert out[0].section_id == "04-1"
def test_align_resolver_round_trip_with_legacy_v4_alias():
# End-to-end : aligner emits canonical id + alias keys; resolver finds the
# legacy decimal key in V4 via alias path (no parent promotion).
raw = "### 2.1 First\nbody1\n"
sections = [_section("04-2", 2, "2. Parent", raw)]
v4 = {"mdx_sections": {"04-2.1": {"judgments_full32": []}}}
out = align_sections_to_v4_granularity(sections, v4)
sub = out[0]
assert sub.section_id == "04-2-sub-1"
resolved = _resolve_v4_section_key(
v4, sub.section_id, alias_keys=sub.v4_alias_keys
)
assert resolved == "04-2.1"