From 9389b8425b366aa217bb06f8a5675a45afcbea28 Mon Sep 17 00:00:00 2001
From: kyeongmin <b24009@hanmaceng.co.kr>
Date: Wed, 20 May 2026 07:03:12 +0900
Subject: [PATCH] fix(orchestrator): P5 audit-anchor-first-line regression
 guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug discovered during #56 INTEGRATION-AUDIT-02 execution (2026-05-20):
- Both Claude and Codex put "Audit anchor: ..." as the FIRST line of every
  Gitea comment per the #56 issue body instruction "cite anchor at start
  of every stage".
- detect_agent (P0-1 strict, first-line only) then returns None for these
  comments because the first line is "Audit anchor:..." not "[Codex #N]"
  or "[Claude #N]".
- Result: orchestrator's "is_codex" check (line ~1288) flips false →
  "Codex 응답 미감지 — continuing" → infinite Stage 4 loop. #56 reached
  Round #14 (>300 comments, ~2 hours wasted token).

Fix path (NOT relaxing detect_agent — that would revive the original #45
pre-P0-1 bug where [Claude #N] citations inside Codex bodies caused
mis-detection):

1. AUDIT_ONLY_NOTE updated to enforce comment format:
   - FIRST non-empty line MUST be `[Claude #N] <stage>` or `[Codex #N] <stage>`
   - Audit anchor / banners / prefaces MUST appear line 2 or later
   - Concrete CORRECT example included
   - Explicit warning that violation breaks stage advance

2. is_codex None guard auto-supplements:
   - When _audit_mode(title) AND detect_agent returns None, orchestrator
     posts a Gitea supplement comment requesting the correct format
   - Next round's Claude/Codex see the supplement and correct
   - Breaks the infinite loop automatically (no manual ctrl-C needed)

3. Regression tests in TestDetectAgent (test_orchestrator_core.py):
   - test_audit_anchor_preface_breaks_detection: confirms P0-1 strict
     correctly returns None when anchor is first line
   - test_audit_anchor_after_header_works: correct format passes

Total: 96/96 pytest pass (94 prior + 2 P5 regression).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 orchestrator.py                               | 30 +++++++++++++++++-
 .../test_orchestrator_core.py                 | 31 +++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/orchestrator.py b/orchestrator.py
index 89c7a03..0e6b671 100644
--- a/orchestrator.py
+++ b/orchestrator.py
@@ -900,7 +900,22 @@ AUDIT-ONLY MODE (this issue is an integration audit / report-only):
   status_integrity / report_assembly / followup_proposal). Each unit's tests: field MUST list verification
   commands or report artifacts (NOT pytest tests:[] which the orchestrator rejects).
 - Stage 5 commit = only audit report files. pipeline run artifacts under data/runs/ or .orchestrator/
-  are evidence-only and must NOT be staged for commit."""
+  are evidence-only and must NOT be staged for commit.
+- COMMENT FORMAT (CRITICAL — orchestrator detect_agent is first-line strict, P0-1):
+    The FIRST non-empty line of every Gitea comment MUST be exactly one of:
+      [Claude #<N>] <stage description>
+      [Codex #<N>] <stage description>
+    Audit anchor citation, banners, prefaces of any kind MUST appear AFTER the first line
+    (line 2 or later). If you put `Audit anchor:` or any other preface BEFORE the [Claude #N] /
+    [Codex #N] header, the orchestrator will fail to detect the agent and the stage cannot
+    advance — your work will be discarded and re-attempted with token waste.
+    Correct example:
+      [Codex #14] Stage 4 test-verify — INTEGRATION-AUDIT-02
+
+      Audit anchor: This audit verifies pipeline contracts...
+      ...
+      FINAL_CONSENSUS: YES
+"""
 
 
 def build_context_pack(n, title, body, sid, agent, rnd, start_cnt, compact=None):
@@ -1273,6 +1288,19 @@ def run_stage(n, title, body, sid):
         is_codex = detect_agent(last) == "codex"
         if not is_codex:
             log("  Codex 응답 미감지 — continuing")
+            # P5 (2026-05-20) — audit-mode 에서 detect_agent None 의 흔한 원인 =
+            # agent 가 audit anchor / preface 를 첫 줄에 박아서 P0-1 strict 가 못 찾음.
+            # 자동 supplement 로 format 교정 요청 → 무한 루프 자동 break.
+            if _audit_mode(title):
+                try: gitea(f"issues/{n}/comments", "POST", {"body":
+                    "⚠️ **[Orchestrator]** Codex 응답 미감지 — `detect_agent` 가 첫 줄에서 "
+                    "`[Codex #N]` 또는 `[Claude #N]` 패턴을 찾지 못함.\n\n"
+                    "AUDIT-ONLY mode 의 흔한 원인: `Audit anchor:` 같은 preface 가 첫 줄에 있음.\n\n"
+                    "다음 round 부터 모든 comment 의 **FIRST non-empty line 은 반드시**:\n"
+                    "  `[Codex #N] <stage description>` 또는 `[Claude #N] <stage description>`\n"
+                    "Audit anchor / banner / preface 는 line 2 이후 에만. 안 그러면 orchestrator 가 "
+                    "stage 진행 못 함 (P0-1 first-line strict)."})
+                except: pass
             continue
 
         status, target = parse_consensus(last)
diff --git a/tests/orchestrator_unit/test_orchestrator_core.py b/tests/orchestrator_unit/test_orchestrator_core.py
index 1460071..af88d89 100644
--- a/tests/orchestrator_unit/test_orchestrator_core.py
+++ b/tests/orchestrator_unit/test_orchestrator_core.py
@@ -97,6 +97,37 @@ Addressing [Codex #2] findings ...
         assert detect_agent("[Codex#1] hello") == "codex"
         assert detect_agent("[Claude#5] hi") == "claude"
 
+    def test_audit_anchor_preface_breaks_detection(self):
+        """P5 (2026-05-20) — regression: AUDIT-ONLY mode 의 'Audit anchor:' preface 가
+        첫 줄에 박히면 detect_agent 는 None 반환 (P0-1 strict 의도된 동작).
+        이게 #56 (INTEGRATION-AUDIT-02) 의 Stage 4 Round #14 infinite loop 의 직접 원인.
+        해결책 = detect_agent 완화 X, AUDIT_ONLY_NOTE 가 agent header 를 first line 으로 강제."""
+        body_anchor_first = (
+            "Audit anchor: This audit verifies pipeline contracts...\n"
+            "It does not implement runtime code.\n"
+            "\n"
+            "[Codex #14] Stage 4 (test-verify) Round #14 - INTEGRATION-AUDIT-02\n"
+            "\n"
+            "Verdict: PASS. Stage 3 satisfies all criteria.\n"
+            "FINAL_CONSENSUS: YES\n"
+        )
+        assert detect_agent(body_anchor_first) is None, (
+            "audit anchor preface as first line MUST cause detect_agent None "
+            "(P0-1 strict). Fix path: comment format, not detect_agent."
+        )
+
+    def test_audit_anchor_after_header_works(self):
+        """P5 (2026-05-20) — 올바른 format: agent header first line, anchor line 2+."""
+        body_header_first = (
+            "[Codex #14] Stage 4 (test-verify) Round #14 - INTEGRATION-AUDIT-02\n"
+            "\n"
+            "Audit anchor: This audit verifies pipeline contracts...\n"
+            "\n"
+            "Verdict: PASS.\n"
+            "FINAL_CONSENSUS: YES\n"
+        )
+        assert detect_agent(body_header_first) == "codex"
+
 
 # ─────────────────────────────────────────────────────────────────
 # parse_consensus — YES/NO + rewind_target