diff --git a/orchestrator.py b/orchestrator.py index cb10c25..60d0497 100644 --- a/orchestrator.py +++ b/orchestrator.py @@ -607,6 +607,32 @@ RULE 7: No hardcoding. RULE 8: AI finds 1px first. RULE 9: LLM classifies, code RULE 10: Don't uncritically accept. RULE 11: Checkpoint. RULE 12: Full paths. RULE 13: Anchor sync. PZ-1: AI=0 normal. PZ-2: 1turn=1step. PZ-3: No speculative. PZ-4: No silent shrink. +=== COMMENT FORMAT (P5b 2026-05-20 β€” STRICT, OVERRIDES ALL STAGE-SPECIFIC BODY RULES) === +The FIRST non-empty line of EVERY Gitea comment MUST start with one of: + [Claude #N] + [Codex #N] + +This rule applies to ALL stages (Stage 1 ~ Stage 6) and ALL issue types +(regular, execution-issue, audit-only). No prefix, no decoration, no banner, +no audit anchor before the agent header. Examples: + +CORRECT: + [Codex #3] Stage 2 simulation-plan review β€” IMP-24 + + πŸ“Œ Verification table + ... + +WRONG (orchestrator detect_agent will fail; stage cannot advance): + πŸ“Œ **[Claude #3] Stage 2 ...** + ## [Codex #3] Stage 2 ... + === IMPLEMENTATION_UNITS === (header missing entirely) + Audit anchor: ... (preface before header) + +This first-line-strict rule OVERRIDES any stage-specific "body MUST contain +ONLY" rule (e.g., COMPACT_PLAN_RULE). Those body rules apply AFTER the +mandatory first-line agent header. Decorations / banners / anchors go on +line 2 or later. + === CONSENSUS + REWIND (2026-05-16 lock) === Final line of every Codex review comment MUST be exactly one of: FINAL_CONSENSUS: YES @@ -901,10 +927,14 @@ def _check_dormant_triggers(): COMPACT_PLAN_RULE = """ COMPACT PLAN REQUIREMENTS (strict): +- The FIRST non-empty line of your comment MUST be the agent header + ([Claude #N] ... or [Codex #N] ...). This is enforced by RULES (P5b 2026-05-20) + and OVERRIDES the "body" constraints below. The Stage 2 compact body begins + AFTER the first-line agent header β€” NOT on line 1. - Total Stage 2 plan body MUST be ≀ 5,000 chars (4,000 chars target). - NO code snippets in this comment. Code goes in Stage 3 (code-edit), not Stage 2 plan. References to file:line locations are fine. Inline code blocks are forbidden. -- The Stage 2 plan body MUST contain ONLY: +- After the first-line agent header, the Stage 2 plan body MUST contain ONLY: a) === IMPLEMENTATION_UNITS === YAML block (units with id/summary/files/tests/estimate_lines) b) Brief per-unit rationale (≀ 3 lines per unit, no full code) c) Out-of-scope notes @@ -1328,20 +1358,40 @@ def run_stage(n, title, body, sid): last = comments[-1]["body"] is_codex = detect_agent(last) == "codex" if not is_codex: - log(" Codex 응닡 미감지 β€” continuing") - # P5 (2026-05-20) β€” audit-mode μ—μ„œ detect_agent None 의 ν”ν•œ 원인 = - # agent κ°€ audit anchor / preface λ₯Ό 첫 쀄에 λ°•μ•„μ„œ P0-1 strict κ°€ λͺ» 찾음. - # μžλ™ supplement 둜 format ꡐ정 μš”μ²­ β†’ λ¬΄ν•œ 루프 μžλ™ break. - if _audit_mode(title): + log(f" Codex 응닡 미감지 β€” first line: {last.lstrip().splitlines()[0][:80]!r}" if last and last.strip() else " Codex 응닡 미감지 β€” empty body") + # P5b (2026-05-20) β€” detect_agent None μ‹œ supplement κ°€λ“œ. + # λ²”μœ„ λ³€κ²½: audit-only μ œν•œ ν•΄μ œ β€” λͺ¨λ“  issue μ—μ„œ μž‘λ™ (#24 같은 일반 이슈 silent loop fix). + # Throttle: ν˜„μž¬ stage μ•ˆμ— 이미 N (=2) 회 supplement κ°€ λˆ„μ λ˜λ©΄ stop + user-action-required. + # 직전 N supplement κ°€ λ°•ν˜€λ„ LLM 이 또 μœ„λ°˜ν•˜λ©΄ 4 번째 round λΆ€ν„°λŠ” hard stop. + SUPP_MAX = 2 + SUPP_MARKER = "⚠️ **[Orchestrator]** Agent header missing" + stage_cmts = comments[start_cnt:] + supp_count = sum(1 for c in stage_cmts if (c.get("body") or "").lstrip().startswith(SUPP_MARKER)) + if supp_count >= SUPP_MAX: + log(f"β›” Agent header supplement {supp_count}/{SUPP_MAX} reached β€” STOP (user action required)") try: gitea(f"issues/{n}/comments", "POST", {"body": - "⚠️ **[Orchestrator]** Codex 응닡 미감지 β€” `detect_agent` κ°€ 첫 μ€„μ—μ„œ " - "`[Codex #N]` λ˜λŠ” `[Claude #N]` νŒ¨ν„΄μ„ μ°Ύμ§€ λͺ»ν•¨.\n\n" - "AUDIT-ONLY mode 의 ν”ν•œ 원인: `Audit anchor:` 같은 preface κ°€ 첫 쀄에 있음.\n\n" - "λ‹€μŒ round λΆ€ν„° λͺ¨λ“  comment 의 **FIRST non-empty line 은 λ°˜λ“œμ‹œ**:\n" - " `[Codex #N] ` λ˜λŠ” `[Claude #N] `\n" - "Audit anchor / banner / preface λŠ” line 2 이후 μ—λ§Œ. μ•ˆ 그러면 orchestrator κ°€ " - "stage μ§„ν–‰ λͺ» 함 (P0-1 first-line strict)."}) + f"β›” **[Orchestrator]** STOP β€” Stage `{sid}` cannot advance.\n\n" + f"`detect_agent` failed {supp_count}+ times in this stage. The LLM is not honoring " + f"the first-line agent header contract despite supplements.\n\n" + "**Action required (human)**: review last few comments, ensure FIRST non-empty line is " + "`[Claude #N]` or `[Codex #N]`, then restart `python -u .\\orchestrator.py --issue {n}`.\n\n" + "Orchestrator run is exiting this issue to prevent further token waste."}) except: pass + return False # exit run_stage β†’ run_issue treats as external close β†’ moves on + try: gitea(f"issues/{n}/comments", "POST", {"body": + f"{SUPP_MARKER} β€” orchestrator `detect_agent` could not find " + "`[Claude #N]` or `[Codex #N]` on the first non-empty line.\n\n" + "**Comment format contract (P5b 2026-05-20, see RULES)**:\n" + "The FIRST non-empty line of EVERY Gitea comment (both Claude and Codex, ALL stages) MUST be:\n" + " `[Claude #N] `\n" + " `[Codex #N] `\n\n" + "No prefix. No decoration. No banner. No audit anchor before the header.\n" + "Decorations (`πŸ“Œ`, `##`, `**`, audit anchor, etc.) go on line 2 or later.\n\n" + "This rule OVERRIDES any stage-specific 'body MUST contain ONLY' rule (e.g., COMPACT_PLAN_RULE) β€” " + "those body rules apply AFTER the mandatory first-line agent header.\n\n" + f"Supplement count for this stage: {supp_count + 1}/{SUPP_MAX}. " + f"At {SUPP_MAX}+ violations the orchestrator will hard-stop this issue."}) + except: pass continue status, target = parse_consensus(last) diff --git a/tests/orchestrator_unit/test_orchestrator_core.py b/tests/orchestrator_unit/test_orchestrator_core.py index af88d89..62b4063 100644 --- a/tests/orchestrator_unit/test_orchestrator_core.py +++ b/tests/orchestrator_unit/test_orchestrator_core.py @@ -128,6 +128,76 @@ Addressing [Codex #2] findings ... ) assert detect_agent(body_header_first) == "codex" + # P5b (2026-05-20) β€” Stage 2 compact-plan first-line conflict regression. + # #24 IMP-24 K6: Codex r1~r3 κ°€ 첫 쀄을 '=== IMPLEMENTATION_UNITS ===' 둜 μ‹œμž‘ β†’ + # detect_agent None β†’ orchestrator silent loop. fix path = comment format strict, + # NOT detect_agent μ™„ν™” (P0-1 κ°•ν™” κ·ΈλŒ€λ‘œ μœ μ§€). + + def test_implementation_units_first_line_breaks_detection(self): + """=== IMPLEMENTATION_UNITS === κ°€ 첫 쀄이면 detect_agent None (P0-1 strict 정상 λ™μž‘).""" + body = ( + "=== IMPLEMENTATION_UNITS ===\n" + "- id: u1\n" + " summary: ...\n" + " files:\n" + " - docs/architecture/PHASE-Q-AUDIT.md\n" + " tests:\n" + " - pytest -q tests\n" + " estimate_lines: 1\n" + "\n" + "FINAL_CONSENSUS: YES\n" + ) + assert detect_agent(body) is None, ( + "=== IMPLEMENTATION_UNITS === as first line MUST cause detect_agent None " + "(P0-1 strict). Fix path: enforce agent header first-line in prompt, not relax detect_agent." + ) + + def test_compact_plan_with_header_first_works(self): + """μ˜¬λ°”λ₯Έ Stage 2 compact format: [Codex #N] 첫 쀄 β†’ === IMPLEMENTATION_UNITS === λ‘˜μ§Έ 쀄+.""" + body = ( + "[Codex #4] Stage 2 simulation-plan review - IMP-24 K6\n" + "\n" + "=== IMPLEMENTATION_UNITS ===\n" + "- id: u1\n" + " summary: ...\n" + " tests:\n" + " - pytest -q tests\n" + "\n" + "FINAL_CONSENSUS: YES\n" + ) + assert detect_agent(body) == "codex" + + def test_markdown_prefix_breaks_detection(self): + """P5b β€” `## [Codex #N]` 같은 markdown header prefix 도 detect_agent None. + (#21 Stage 4 μ—μ„œ κ΄€μ°°λœ latent silent loop 원인.)""" + body_hash = "## [Codex #1] Stage 4 test-verify Round #1\n\nVerdict: PASS\n" + body_emoji = "πŸ“Œ **[Claude #1] Stage 2 plan**\n\nbody\n" + body_bold = "**[Codex #1] Stage 4**\n\nbody\n" + assert detect_agent(body_hash) is None + assert detect_agent(body_emoji) is None + assert detect_agent(body_bold) is None + + +class TestRulesAndCompactPlanFirstLineContract: + """P5b (2026-05-20) β€” RULES 와 COMPACT_PLAN_RULE λ‘˜ λ‹€ first-line agent header + rule 을 λͺ…μ‹œν•΄μ•Ό 함. wording 검증.""" + + def test_rules_has_first_line_strict(self): + from orchestrator import RULES + # RULES μ•ˆμ— first-line strict + λͺ¨λ“  stage 적용 λͺ…μ‹œ μžˆμ–΄μ•Ό 함. + assert "FIRST non-empty line" in RULES + assert "[Claude #N]" in RULES and "[Codex #N]" in RULES + # P5b OVERRIDES ν‚€μ›Œλ“œ β€” body rule 듀이 first-line rule 보닀 μš°μ„ ν•˜μ§€ μ•ŠμŒμ„ κ°•μ‘° + assert "OVERRIDES" in RULES or "overrides" in RULES.lower() + + def test_compact_plan_rule_carves_out_first_line(self): + from orchestrator import COMPACT_PLAN_RULE + # "body" λŠ” first-line agent header λ‹€μŒλΆ€ν„° μ‹œμž‘ν•œλ‹€κ³  λͺ…μ‹œ + assert "FIRST non-empty line" in COMPACT_PLAN_RULE or "first-line agent header" in COMPACT_PLAN_RULE + # "after the first-line" 같은 carve-out wording 검증 + body_lower = COMPACT_PLAN_RULE.lower() + assert "after the first" in body_lower or "after the agent header" in body_lower + # ───────────────────────────────────────────────────────────────── # parse_consensus β€” YES/NO + rewind_target