"""IMP-08 B-3 sub-section drag/drop — schema + V4 alias resolver tests. Fully synthetic per Codex #7 generalization guardrail: NO real catalog template_id / frame_id, NO ``v4_full32_result.yaml`` dependency, NO MDX-specific section ids beyond canonical id format. Locked scope (Stage 3 R8) : A. ``derive_parent_id`` canonical ordinal recognition + legacy decimal fallback. B. ``_resolve_v4_section_key`` exact > alias > None (no parent/sibling promotion). C. ``align_sections_to_v4_granularity`` canonical ordinal id emit + N-R5 decimal-only alias guard + MdxSection default-construction stability. """ from __future__ import annotations from src.phase_z2_composition import derive_parent_id from src.phase_z2_pipeline import ( MdxSection, _resolve_v4_section_key, align_sections_to_v4_granularity, ) # ─── A. derive_parent_id ──────────────────────────────────────────────────── def test_derive_parent_id_ordinal_sub(): assert derive_parent_id("03-1-sub-2") == "03-1" assert derive_parent_id("04-2-sub-1") == "04-2" def test_derive_parent_id_decimal_legacy_alias(): # Legacy V4 decimal id retains existing behaviour for alias path. assert derive_parent_id("04-2.1") == "04-2" def test_derive_parent_id_top_level_none(): assert derive_parent_id("04-1") is None assert derive_parent_id("04") is None assert derive_parent_id("nonsense") is None # ─── B. _resolve_v4_section_key ───────────────────────────────────────────── def _fake_v4(*keys): return {"mdx_sections": {k: {"judgments_full32": []} for k in keys}} def test_alias_resolver_exact_match_wins(): v4 = _fake_v4("04-2-sub-1", "04-2.1") assert _resolve_v4_section_key(v4, "04-2-sub-1") == "04-2-sub-1" assert ( _resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"]) == "04-2-sub-1" ) def test_alias_resolver_decimal_alias_when_metadata_present(): v4 = _fake_v4("04-2.1") assert ( _resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"]) == "04-2.1" ) def test_alias_resolver_no_parent_promotion(): # parent V4 entry must not be promoted into a sibling sub-section lookup. v4 = _fake_v4("04-2") assert _resolve_v4_section_key(v4, "04-2-sub-1") is None assert ( _resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2"]) == "04-2" ) # alias is opt-in; only resolves when caller explicitly provides it def test_alias_resolver_no_sibling_promotion(): # sibling sub-section entry must not be auto-promoted without an alias. v4 = _fake_v4("04-2-sub-2") assert _resolve_v4_section_key(v4, "04-2-sub-1") is None def test_alias_resolver_miss_returns_none(): v4 = _fake_v4("99-1") assert _resolve_v4_section_key(v4, "04-2-sub-1") is None assert ( _resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"]) is None ) # ─── C. align_sections_to_v4_granularity ──────────────────────────────────── def _section(section_id, num, title, raw_content): """Build an MdxSection with default sub-section schema fields.""" return MdxSection( section_id=section_id, section_num=num, title=title, raw_content=raw_content, ) def test_mdx_section_default_construction_preserves_4_positional_callers(): # IMP-08 B-3 : MdxSection still accepts the legacy 4-positional shape # (defaults for heading_number / v4_alias_keys / sub_sections). s = MdxSection("04-1", 1, "1. Top", "body") assert s.heading_number is None assert s.v4_alias_keys == [] assert s.sub_sections == [] def test_align_passthrough_when_v4_key_exact_match(): # Section already aligned to V4 key (no override target): aligner # keeps it untouched. Parent-level V4 evidence flows via exact-match # lookup. sections = [_section("04-1", 1, "1. Top", "body")] v4 = {"mdx_sections": {"04-1": {"judgments_full32": []}}} out = align_sections_to_v4_granularity(sections, v4) assert len(out) == 1 assert out[0].section_id == "04-1" def test_align_parent_v4_exact_keeps_section_when_no_override_targets_sub(): # Backward-compat axis: when V4 carries the parent exact key and no # drag/drop override targets a sub-id of this section, the aligner # MUST keep the parent (preserves V4 evidence at parent granularity). raw = "### 2.1 First\nbody1\n### 2.2 Second\nbody2\n" sections = [_section("03-2", 2, "2. Parent", raw)] v4 = {"mdx_sections": {"03-2": {"judgments_full32": []}}} out = align_sections_to_v4_granularity(sections, v4) assert [s.section_id for s in out] == ["03-2"] def test_align_force_drills_when_override_targets_sub_id_with_parent_in_v4(): # Stage 5 R2 blocker-fix regression: when V4 has the parent exact key # AND an override targets a sub-id of that section, the aligner MUST # drill regardless of V4 parent presence. This makes drag/drop # addressing deterministic across all V4 yaml shapes. raw = "### 2.1 First\nbody1\n### 2.2 Second\nbody2\n" sections = [_section("04-2", 2, "2. Parent", raw)] v4 = { "mdx_sections": { "04-2": {"judgments_full32": []}, # parent V4 entry present "04-2.1": {"judgments_full32": []}, # plus decimal sub entries "04-2.2": {"judgments_full32": []}, } } out = align_sections_to_v4_granularity( sections, v4, override_target_section_ids=["04-2-sub-1"] ) # Force-drill: parent id MUST be replaced by canonical sub-ids. assert [s.section_id for s in out] == ["04-2-sub-1", "04-2-sub-2"] # Decimal aliases preserved (N-R5: decimal heading_number). assert out[0].v4_alias_keys == ["04-2.1"] assert out[1].v4_alias_keys == ["04-2.2"] def test_align_top_level_override_target_does_not_force_drill_other_sections(): # Top-level override target ("primary=03-1") has no derive_parent_id, # so it MUST NOT force-drill any section. Only "X-sub-N" targets # trigger force-drill on parent X. raw = "### 2.1 First\nbody1\n" sections = [ _section("03-1", 1, "1. Top", "body"), _section("03-2", 2, "2. Parent", raw), ] v4 = { "mdx_sections": { "03-1": {"judgments_full32": []}, "03-2": {"judgments_full32": []}, } } out = align_sections_to_v4_granularity( sections, v4, override_target_section_ids=["03-1"] ) # No sub-id target -> both sections kept at parent granularity. assert [s.section_id for s in out] == ["03-1", "03-2"] def test_align_drill_emits_canonical_ordinal_id_with_decimal_alias(): # Decimal H3 headings -> canonical ordinal id + decimal alias (legacy V4 key). raw = "### 2.1 First\nbody1\n### 2.2 Second\nbody2\n" sections = [_section("04-2", 2, "2. Parent", raw)] v4 = {"mdx_sections": {}} # forces drill (no exact key) out = align_sections_to_v4_granularity(sections, v4) assert [s.section_id for s in out] == ["04-2-sub-1", "04-2-sub-2"] assert [s.heading_number for s in out] == ["2.1", "2.2"] # N-R5 : decimal headings -> alias emitted. assert out[0].v4_alias_keys == ["04-2.1"] assert out[1].v4_alias_keys == ["04-2.2"] def test_align_drill_integer_only_h3_emits_no_alias_n_r5_guard(): # N-R5 : integer-only H3 (e.g., "### 1 Title") must NOT generate an alias, # otherwise it would collide with sibling parent V4 entries (`{mdx_id}-1`). raw = "### 1 Alpha\nbody1\n### 2 Beta\nbody2\n" sections = [_section("05-2", 2, "2. Parent", raw)] v4 = {"mdx_sections": {}} out = align_sections_to_v4_granularity(sections, v4) assert [s.section_id for s in out] == ["05-2-sub-1", "05-2-sub-2"] assert [s.heading_number for s in out] == ["1", "2"] assert out[0].v4_alias_keys == [] assert out[1].v4_alias_keys == [] def test_align_drill_undecorated_h3_emits_no_alias(): # Plain `### Title` without numeric prefix -> heading_number=None, no alias. raw = "### Alpha\nbody1\n### Beta\nbody2\n" sections = [_section("03-3", 3, "3. Parent", raw)] v4 = {"mdx_sections": {}} out = align_sections_to_v4_granularity(sections, v4) assert [s.section_id for s in out] == ["03-3-sub-1", "03-3-sub-2"] assert [s.heading_number for s in out] == [None, None] assert all(s.v4_alias_keys == [] for s in out) def test_align_no_h3_passes_section_through_unchanged(): # No H3 sub-headings in raw_content -> aligner keeps the section. sections = [_section("04-1", 1, "1. Top", "no subheadings here\njust prose")] v4 = {"mdx_sections": {}} out = align_sections_to_v4_granularity(sections, v4) assert len(out) == 1 assert out[0].section_id == "04-1" def test_align_resolver_round_trip_with_legacy_v4_alias(): # End-to-end : aligner emits canonical id + alias keys; resolver finds the # legacy decimal key in V4 via alias path (no parent promotion). raw = "### 2.1 First\nbody1\n" sections = [_section("04-2", 2, "2. Parent", raw)] v4 = {"mdx_sections": {"04-2.1": {"judgments_full32": []}}} out = align_sections_to_v4_granularity(sections, v4) sub = out[0] assert sub.section_id == "04-2-sub-1" resolved = _resolve_v4_section_key( v4, sub.section_id, alias_keys=sub.v4_alias_keys ) assert resolved == "04-2.1"