Codex #1 (Stage 5) reproduced a smoke regression on the actual checkout : when V4 carries the parent exact key (e.g., `04-2`) AND the drag/drop override targets a sub-id (`primary=04-2-sub-1`), the aligner kept the parent at parent granularity and emit `['04-1', '04-2']`, so the override flag failed with `unknown section_id(s) ['04-2-sub-1']`. Fix : `align_sections_to_v4_granularity` gains an optional `override_target_section_ids` keyword. From each canonical `${parent}-sub-N` target it derives the parent id and adds it to a `force_drill_parents` set. Sections in that set are drilled into sub-sections regardless of whether V4 carries the parent exact key. Top-level override targets (no derived parent) do not trigger force-drill, so backward-compat is preserved for parent-granularity overrides. The call site in `run_phase_z2_mvp1` collects sub-ids from `override_section_assignments` and forwards them to the aligner. Generalization (RULE 0) : - Trigger is the override schema (`X-sub-N`), not a specific MDX / section / frame id. Applies to all 32-frame MDX uniformly. - Decision is deterministic on the override target shape, independent of V4 yaml content. - Default (no override) path is unchanged byte-for-byte. Side fixes (forward-only RULE 1 cleanup, no history rewrite) : - `align_sections_to_v4_granularity` docstring rewritten in English (overwrites the Korean docstring committed in5191aca). - Step 9 diagnostic comment quoted-string rewritten in English (overwrites `"V4 entry 없음"` committed ina422d72). Tests : 3 new cases in `test_phase_z2_subsection_schema.py` — `test_align_parent_v4_exact_keeps_section_when_no_override_targets_sub` (backward-compat axis), `test_align_force_drills_when_override_targets_sub_id_with_parent_in_v4` (blocker regression), `test_align_top_level_override_target_does_not_force_drill_other_sections` (force-drill scope guard). Pytest scope-qualified result : `test_phase_z2_subsection_schema.py` + `_section_assignment_override.py` + `_v4_fallback.py` = 40 / 40 PASS. Smoke (axis = sub-id override -> aligner -> assignment plan, both V4 yaml shapes) : - HEAD V4 yaml (`04-1`, `04-2.1`, `04-2.2` only) : `--override-section-assignment primary=04-2-sub-1` -> `aligned_section_ids=['04-1', '04-2-sub-1', '04-2-sub-2']`, `plan[0].assignment_source='cli_override'`, `plan[0].source_section_ids=['04-2-sub-1']`. - V4 yaml with `04-2` exact key (Codex's stress case) : identical aligned output and identical assignment plan. Downstream `composition_planner` abort (`phase_z_status_not_allowed:extract_matched_zone`) is IMP-05 territory, unchanged in both shapes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
240 lines
9.4 KiB
Python
240 lines
9.4 KiB
Python
"""IMP-08 B-3 sub-section drag/drop — schema + V4 alias resolver tests.
|
|
|
|
Fully synthetic per Codex #7 generalization guardrail:
|
|
NO real catalog template_id / frame_id, NO ``v4_full32_result.yaml`` dependency,
|
|
NO MDX-specific section ids beyond canonical id format.
|
|
|
|
Locked scope (Stage 3 R8) :
|
|
A. ``derive_parent_id`` canonical ordinal recognition + legacy decimal fallback.
|
|
B. ``_resolve_v4_section_key`` exact > alias > None (no parent/sibling promotion).
|
|
C. ``align_sections_to_v4_granularity`` canonical ordinal id emit + N-R5
|
|
decimal-only alias guard + MdxSection default-construction stability.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from src.phase_z2_composition import derive_parent_id
|
|
from src.phase_z2_pipeline import (
|
|
MdxSection,
|
|
_resolve_v4_section_key,
|
|
align_sections_to_v4_granularity,
|
|
)
|
|
|
|
|
|
# ─── A. derive_parent_id ────────────────────────────────────────────────────
|
|
|
|
|
|
def test_derive_parent_id_ordinal_sub():
|
|
assert derive_parent_id("03-1-sub-2") == "03-1"
|
|
assert derive_parent_id("04-2-sub-1") == "04-2"
|
|
|
|
|
|
def test_derive_parent_id_decimal_legacy_alias():
|
|
# Legacy V4 decimal id retains existing behaviour for alias path.
|
|
assert derive_parent_id("04-2.1") == "04-2"
|
|
|
|
|
|
def test_derive_parent_id_top_level_none():
|
|
assert derive_parent_id("04-1") is None
|
|
assert derive_parent_id("04") is None
|
|
assert derive_parent_id("nonsense") is None
|
|
|
|
|
|
# ─── B. _resolve_v4_section_key ─────────────────────────────────────────────
|
|
|
|
|
|
def _fake_v4(*keys):
|
|
return {"mdx_sections": {k: {"judgments_full32": []} for k in keys}}
|
|
|
|
|
|
def test_alias_resolver_exact_match_wins():
|
|
v4 = _fake_v4("04-2-sub-1", "04-2.1")
|
|
assert _resolve_v4_section_key(v4, "04-2-sub-1") == "04-2-sub-1"
|
|
assert (
|
|
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"])
|
|
== "04-2-sub-1"
|
|
)
|
|
|
|
|
|
def test_alias_resolver_decimal_alias_when_metadata_present():
|
|
v4 = _fake_v4("04-2.1")
|
|
assert (
|
|
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"])
|
|
== "04-2.1"
|
|
)
|
|
|
|
|
|
def test_alias_resolver_no_parent_promotion():
|
|
# parent V4 entry must not be promoted into a sibling sub-section lookup.
|
|
v4 = _fake_v4("04-2")
|
|
assert _resolve_v4_section_key(v4, "04-2-sub-1") is None
|
|
assert (
|
|
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2"])
|
|
== "04-2"
|
|
) # alias is opt-in; only resolves when caller explicitly provides it
|
|
|
|
|
|
def test_alias_resolver_no_sibling_promotion():
|
|
# sibling sub-section entry must not be auto-promoted without an alias.
|
|
v4 = _fake_v4("04-2-sub-2")
|
|
assert _resolve_v4_section_key(v4, "04-2-sub-1") is None
|
|
|
|
|
|
def test_alias_resolver_miss_returns_none():
|
|
v4 = _fake_v4("99-1")
|
|
assert _resolve_v4_section_key(v4, "04-2-sub-1") is None
|
|
assert (
|
|
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"])
|
|
is None
|
|
)
|
|
|
|
|
|
# ─── C. align_sections_to_v4_granularity ────────────────────────────────────
|
|
|
|
|
|
def _section(section_id, num, title, raw_content):
|
|
"""Build an MdxSection with default sub-section schema fields."""
|
|
return MdxSection(
|
|
section_id=section_id,
|
|
section_num=num,
|
|
title=title,
|
|
raw_content=raw_content,
|
|
)
|
|
|
|
|
|
def test_mdx_section_default_construction_preserves_4_positional_callers():
|
|
# IMP-08 B-3 : MdxSection still accepts the legacy 4-positional shape
|
|
# (defaults for heading_number / v4_alias_keys / sub_sections).
|
|
s = MdxSection("04-1", 1, "1. Top", "body")
|
|
assert s.heading_number is None
|
|
assert s.v4_alias_keys == []
|
|
assert s.sub_sections == []
|
|
|
|
|
|
def test_align_passthrough_when_v4_key_exact_match():
|
|
# Section already aligned to V4 key (no override target): aligner
|
|
# keeps it untouched. Parent-level V4 evidence flows via exact-match
|
|
# lookup.
|
|
sections = [_section("04-1", 1, "1. Top", "body")]
|
|
v4 = {"mdx_sections": {"04-1": {"judgments_full32": []}}}
|
|
out = align_sections_to_v4_granularity(sections, v4)
|
|
assert len(out) == 1
|
|
assert out[0].section_id == "04-1"
|
|
|
|
|
|
def test_align_parent_v4_exact_keeps_section_when_no_override_targets_sub():
|
|
# Backward-compat axis: when V4 carries the parent exact key and no
|
|
# drag/drop override targets a sub-id of this section, the aligner
|
|
# MUST keep the parent (preserves V4 evidence at parent granularity).
|
|
raw = "### 2.1 First\nbody1\n### 2.2 Second\nbody2\n"
|
|
sections = [_section("03-2", 2, "2. Parent", raw)]
|
|
v4 = {"mdx_sections": {"03-2": {"judgments_full32": []}}}
|
|
out = align_sections_to_v4_granularity(sections, v4)
|
|
assert [s.section_id for s in out] == ["03-2"]
|
|
|
|
|
|
def test_align_force_drills_when_override_targets_sub_id_with_parent_in_v4():
|
|
# Stage 5 R2 blocker-fix regression: when V4 has the parent exact key
|
|
# AND an override targets a sub-id of that section, the aligner MUST
|
|
# drill regardless of V4 parent presence. This makes drag/drop
|
|
# addressing deterministic across all V4 yaml shapes.
|
|
raw = "### 2.1 First\nbody1\n### 2.2 Second\nbody2\n"
|
|
sections = [_section("04-2", 2, "2. Parent", raw)]
|
|
v4 = {
|
|
"mdx_sections": {
|
|
"04-2": {"judgments_full32": []}, # parent V4 entry present
|
|
"04-2.1": {"judgments_full32": []}, # plus decimal sub entries
|
|
"04-2.2": {"judgments_full32": []},
|
|
}
|
|
}
|
|
out = align_sections_to_v4_granularity(
|
|
sections, v4, override_target_section_ids=["04-2-sub-1"]
|
|
)
|
|
# Force-drill: parent id MUST be replaced by canonical sub-ids.
|
|
assert [s.section_id for s in out] == ["04-2-sub-1", "04-2-sub-2"]
|
|
# Decimal aliases preserved (N-R5: decimal heading_number).
|
|
assert out[0].v4_alias_keys == ["04-2.1"]
|
|
assert out[1].v4_alias_keys == ["04-2.2"]
|
|
|
|
|
|
def test_align_top_level_override_target_does_not_force_drill_other_sections():
|
|
# Top-level override target ("primary=03-1") has no derive_parent_id,
|
|
# so it MUST NOT force-drill any section. Only "X-sub-N" targets
|
|
# trigger force-drill on parent X.
|
|
raw = "### 2.1 First\nbody1\n"
|
|
sections = [
|
|
_section("03-1", 1, "1. Top", "body"),
|
|
_section("03-2", 2, "2. Parent", raw),
|
|
]
|
|
v4 = {
|
|
"mdx_sections": {
|
|
"03-1": {"judgments_full32": []},
|
|
"03-2": {"judgments_full32": []},
|
|
}
|
|
}
|
|
out = align_sections_to_v4_granularity(
|
|
sections, v4, override_target_section_ids=["03-1"]
|
|
)
|
|
# No sub-id target -> both sections kept at parent granularity.
|
|
assert [s.section_id for s in out] == ["03-1", "03-2"]
|
|
|
|
|
|
def test_align_drill_emits_canonical_ordinal_id_with_decimal_alias():
|
|
# Decimal H3 headings -> canonical ordinal id + decimal alias (legacy V4 key).
|
|
raw = "### 2.1 First\nbody1\n### 2.2 Second\nbody2\n"
|
|
sections = [_section("04-2", 2, "2. Parent", raw)]
|
|
v4 = {"mdx_sections": {}} # forces drill (no exact key)
|
|
out = align_sections_to_v4_granularity(sections, v4)
|
|
assert [s.section_id for s in out] == ["04-2-sub-1", "04-2-sub-2"]
|
|
assert [s.heading_number for s in out] == ["2.1", "2.2"]
|
|
# N-R5 : decimal headings -> alias emitted.
|
|
assert out[0].v4_alias_keys == ["04-2.1"]
|
|
assert out[1].v4_alias_keys == ["04-2.2"]
|
|
|
|
|
|
def test_align_drill_integer_only_h3_emits_no_alias_n_r5_guard():
|
|
# N-R5 : integer-only H3 (e.g., "### 1 Title") must NOT generate an alias,
|
|
# otherwise it would collide with sibling parent V4 entries (`{mdx_id}-1`).
|
|
raw = "### 1 Alpha\nbody1\n### 2 Beta\nbody2\n"
|
|
sections = [_section("05-2", 2, "2. Parent", raw)]
|
|
v4 = {"mdx_sections": {}}
|
|
out = align_sections_to_v4_granularity(sections, v4)
|
|
assert [s.section_id for s in out] == ["05-2-sub-1", "05-2-sub-2"]
|
|
assert [s.heading_number for s in out] == ["1", "2"]
|
|
assert out[0].v4_alias_keys == []
|
|
assert out[1].v4_alias_keys == []
|
|
|
|
|
|
def test_align_drill_undecorated_h3_emits_no_alias():
|
|
# Plain `### Title` without numeric prefix -> heading_number=None, no alias.
|
|
raw = "### Alpha\nbody1\n### Beta\nbody2\n"
|
|
sections = [_section("03-3", 3, "3. Parent", raw)]
|
|
v4 = {"mdx_sections": {}}
|
|
out = align_sections_to_v4_granularity(sections, v4)
|
|
assert [s.section_id for s in out] == ["03-3-sub-1", "03-3-sub-2"]
|
|
assert [s.heading_number for s in out] == [None, None]
|
|
assert all(s.v4_alias_keys == [] for s in out)
|
|
|
|
|
|
def test_align_no_h3_passes_section_through_unchanged():
|
|
# No H3 sub-headings in raw_content -> aligner keeps the section.
|
|
sections = [_section("04-1", 1, "1. Top", "no subheadings here\njust prose")]
|
|
v4 = {"mdx_sections": {}}
|
|
out = align_sections_to_v4_granularity(sections, v4)
|
|
assert len(out) == 1
|
|
assert out[0].section_id == "04-1"
|
|
|
|
|
|
def test_align_resolver_round_trip_with_legacy_v4_alias():
|
|
# End-to-end : aligner emits canonical id + alias keys; resolver finds the
|
|
# legacy decimal key in V4 via alias path (no parent promotion).
|
|
raw = "### 2.1 First\nbody1\n"
|
|
sections = [_section("04-2", 2, "2. Parent", raw)]
|
|
v4 = {"mdx_sections": {"04-2.1": {"judgments_full32": []}}}
|
|
out = align_sections_to_v4_granularity(sections, v4)
|
|
sub = out[0]
|
|
assert sub.section_id == "04-2-sub-1"
|
|
resolved = _resolve_v4_section_key(
|
|
v4, sub.section_id, alias_keys=sub.v4_alias_keys
|
|
)
|
|
assert resolved == "04-2.1"
|