feat(orchestrator): P3 wrapper input/encoding fix + P4 audit-only mode

P3 hotfix (2026-05-18 — verified during #46 retry attempt): - _run_with_tree_kill: encode input only when Popen is in binary mode. Previously force-encoded str→bytes even with encoding= set, breaking text-mode stdin pipes with: write() argument must be str, not bytes. - run_claude path was the only affected call site. - 3 new C7 regression tests (input+encoding / bytes+binary / auto-encode). - C3/C6 test fixtures hardened with DEVNULL stdio isolation. P4 audit-only mode (2026-05-19, prep for #50 integration audit): - _is_audit_issue: title-based detection for [INTEGRATION-AUDIT*], [AUDIT-ONLY], or "integration audit" phrase. - _audit_mode + --audit-only CLI flag: manual override regardless of title. - AUDIT_ONLY_NOTE injected into context pack across all stages/rounds. - Stage 3 (code-edit) YES gate: deterministic git status check. Changes touching src/**, templates/**, tests/** auto-reject Stage 3 YES and post a supplement-request comment. LLM-independent enforcement. - 26 new audit-mode tests (title detection, CLI override, forbidden prefix detection, allowed paths pass, Windows backslash normalization, quoted paths with spaces, git error fail-open, constants sanity). Total: 75/75 pytest pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 10:18:28 +09:00
parent cbbc163860
commit 4289a500b6
3 changed files with 384 additions and 18 deletions
--- a/orchestrator.py
+++ b/orchestrator.py
@@ -186,9 +186,18 @@ def _run_with_tree_kill(cmd, *, input=None, timeout=None, **popen_kwargs):
    mon = threading.Thread(target=_monitor, daemon=True)
    mon.start()

-    encode = isinstance(input, str)
-    inp = input.encode("utf-8") if encode else input
-    text_mode = popen_kwargs.get("text", False) or popen_kwargs.get("encoding")
+    # P3 fix (2026-05-18) — Popen 이 encoding= 또는 text=True 를 받으면 자기가 알아서
+    # text 모드로 stdin/stdout/stderr 처리. wrapper 가 input 을 미리 encode/decode 하면
+    # 텍스트 모드 pipe 에 bytes 쓰려다 TypeError. Popen 의 mode 에 맞춰 input 타입만 정렬.
+    text_mode = bool(popen_kwargs.get("text") or popen_kwargs.get("encoding"))
+    empty_out = "" if text_mode else b""
+    inp = input
+    if input is not None:
+        if text_mode and isinstance(input, bytes):
+            try: inp = input.decode(popen_kwargs.get("encoding") or "utf-8", "replace")
+            except Exception: inp = input
+        elif (not text_mode) and isinstance(input, str):
+            inp = input.encode("utf-8")

    try:
        stdout, stderr = proc.communicate(input=inp, timeout=timeout)
@@ -199,7 +208,7 @@ def _run_with_tree_kill(cmd, *, input=None, timeout=None, **popen_kwargs):
        try:
            stdout, stderr = proc.communicate()
        except Exception:
-            stdout, stderr = b"", b""
+            stdout, stderr = empty_out, empty_out
        # TimeoutExpired 가 가진 partial output 보존을 위해 raise 직전 cleanup.
        stop_event.set(); mon.join(timeout=2.0)
        _kill_tracked(list(tracked))
@@ -220,20 +229,7 @@ def _run_with_tree_kill(cmd, *, input=None, timeout=None, **popen_kwargs):
            for s in tracked: _SPAWNED.discard(s)
            if root_sig: _SPAWNED.discard(root_sig)

-    # text/encoding 처리 — Popen 은 bytes 로만 받고, 호출부의 encoding= 옵션 흉내.
-    enc = popen_kwargs.get("encoding")
-    errors = popen_kwargs.get("errors", "strict")
-    if enc:
-        try: stdout = stdout.decode(enc, errors)
-        except Exception: pass
-        try: stderr = stderr.decode(enc, errors)
-        except Exception: pass
-    elif text_mode:
-        try: stdout = stdout.decode("utf-8", "replace")
-        except Exception: pass
-        try: stderr = stderr.decode("utf-8", "replace")
-        except Exception: pass
-
+    # Popen 이 이미 mode 에 맞는 타입으로 반환 — 별도 decode 불필요.
    return subprocess.CompletedProcess(args=cmd, returncode=rc, stdout=stdout, stderr=stderr)

 def _orchestrator_exit_cleanup():
@@ -740,6 +736,59 @@ def _is_execution_issue(title):
    if not title: return False
    return bool(re.search(r"\b실행[-\s]\d+\b", title)) or bool(re.search(r"\bexec[-\s]?\d+\b", title, re.IGNORECASE))

+# P4 (2026-05-19) — audit-only mode.
+# Title-based detection ([INTEGRATION-AUDIT-NN], [AUDIT-ONLY]) + --audit-only CLI 강제.
+# 목적: integration audit 류 이슈에서 LLM 이 production code 를 수정하지 못하게 deterministic 가드.
+AUDIT_ONLY_OVERRIDE = False  # CLI --audit-only 로 main() 에서 set
+
+def _is_audit_issue(title):
+    """Title 에 audit 마커 있으면 audit-only mode."""
+    if not title: return False
+    if re.search(r"\[(INTEGRATION-AUDIT(?:-\d+)?|AUDIT-ONLY)\b", title, re.IGNORECASE):
+        return True
+    return "integration audit" in title.lower()
+
+def _audit_mode(title):
+    """audit-only mode 여부. CLI override 또는 title 기반."""
+    return AUDIT_ONLY_OVERRIDE or _is_audit_issue(title)
+
+# src/ templates/ tests/ = production code surface. audit issue 는 절대 손대면 안 됨.
+# 블랙리스트 — 화이트리스트보다 false positive 적음 (data/runs, .orchestrator artifacts 등 자연 통과).
+AUDIT_ONLY_FORBIDDEN_PREFIXES = ("src/", "templates/", "tests/")
+
+def _check_audit_only_violations():
+    """git status --porcelain 검사. AUDIT_ONLY_FORBIDDEN_PREFIXES 매치 변경 list 반환.
+    Returns: list of violating paths (빈 list = 통과)."""
+    try:
+        r = subprocess.run(
+            ["git", "status", "--porcelain"],
+            capture_output=True, text=True, encoding="utf-8", errors="replace",
+            cwd=PROJECT_DIR, timeout=30,
+        )
+        if r.returncode != 0:
+            # git error — fail open (가드 자체 실패는 false 알람 만들지 않음).
+            return []
+    except Exception:
+        return []
+    bad = []
+    for line in r.stdout.splitlines():
+        if len(line) < 4: continue
+        # porcelain format: "XY path" — XY 는 staged/unstaged 2-char.
+        path = line[3:].strip()
+        # rename: "XY old -> new" — destination 만 검사.
+        if " -> " in path:
+            path = path.split(" -> ")[-1].strip()
+        # quoted path (special chars) — strip wrapping quotes.
+        if path.startswith('"') and path.endswith('"'):
+            path = path[1:-1]
+        # forward-slash 통일 (Windows backslash 도 처리).
+        norm = path.replace("\\", "/")
+        for prefix in AUDIT_ONLY_FORBIDDEN_PREFIXES:
+            if norm.startswith(prefix):
+                bad.append(norm)
+                break
+    return bad
+
 # P1-5 (2026-05-18) — Stage 2 compact rule (모든 issue 적용).
 # Stage 2 의 c-role 에 size budget + code snippet 금지 명시. 29 KB plan 차단.
 COMPACT_PLAN_RULE = """
@@ -768,6 +817,25 @@ EXECUTION-ISSUE MODE (this issue title contains '실행-N' or 'exec-N'):
  Do NOT enumerate parent's axes; focus on THIS issue's single axis.
 - Skip deep architectural analysis already done in the parent."""

+# P4 (2026-05-19) — audit-only mode prompt block.
+# Stage 3 이름이 "코드 수정" 이지만 audit 이슈에서는 절대 source 수정 금지.
+# orchestrator 가 Stage 3 YES 게이트에서 git status 직접 검사해 violation 시 자동 rewind.
+AUDIT_ONLY_NOTE = """
+
+AUDIT-ONLY MODE (this issue is an integration audit / report-only):
+- This issue does NOT modify production source code. Stage 3 = audit report writing, NOT code editing.
+- Allowed file changes:
+    docs/architecture/INTEGRATION-AUDIT-*.md
+    docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md (only if explicitly in plan)
+- FORBIDDEN file changes (orchestrator will auto-reject Stage 3 YES if any of these touched):
+    src/**, templates/**, tests/**
+- If a blocker is found during audit, propose a FOLLOW-UP ISSUE in the report — do NOT modify code in this issue.
+- Stage 3 IMPLEMENTATION_UNITS should be audit subtasks (scope_myopia / pipeline_map / conflict_check /
+  status_integrity / report_assembly / followup_proposal). Each unit's tests: field MUST list verification
+  commands or report artifacts (NOT pytest tests:[] which the orchestrator rejects).
+- Stage 5 commit = only audit report files. pipeline run artifacts under data/runs/ or .orchestrator/
+  are evidence-only and must NOT be staged for commit."""
+

 def build_context_pack(n, title, body, sid, agent, rnd, start_cnt, compact=None):
    idx = STAGE_IDS.index(sid); si = STAGES[idx]
@@ -776,11 +844,14 @@ def build_context_pack(n, title, body, sid, agent, rnd, start_cnt, compact=None)
    prior = load_all_exit_reports(n, idx)

    # P1-4/P1-5 (2026-05-18) — execution-issue + Stage 2 compact rule
+    # P4 (2026-05-19) — audit-only mode injection (모든 stage 에 prompt 가드 + Stage 3 git diff 가드 별도)
    extras = []
    if sid == "simulation-plan":
        extras.append(COMPACT_PLAN_RULE)
    if _is_execution_issue(title):
        extras.append(EXECUTION_ISSUE_NOTE)
+    if _audit_mode(title):
+        extras.append(AUDIT_ONLY_NOTE)
    extras_text = "".join(extras)

    # 검증 실패 보고서 (rewind 시 이전 실패 맥락 전달).
@@ -1211,6 +1282,27 @@ def run_stage(n, title, body, sid):
                        except: pass
                        continue

+                # P4 (2026-05-19) — AUDIT-ONLY guard: Stage 3 (code-edit) YES 직전 git status 검사.
+                # src/templates/tests 변경 있으면 자동 reject + supplement 요청. LLM 양심 무관 deterministic.
+                if sid == "code-edit" and _audit_mode(title):
+                    bad = _check_audit_only_violations()
+                    if bad:
+                        log(f"⚠️ AUDIT-ONLY violation — Stage 3 YES rejected: {len(bad)} forbidden file change(s)")
+                        log(f"   violations (first 5): {bad[:5]}")
+                        try: gitea(f"issues/{n}/comments", "POST", {"body":
+                            "⚠️ **[Orchestrator]** AUDIT-ONLY mode violation: Stage 3 YES rejected.\n\n"
+                            "This issue is in audit-only mode. Production code changes are forbidden.\n\n"
+                            f"Forbidden file changes detected ({len(bad)} file(s)):\n" +
+                            "\n".join(f"  - `{v}`" for v in bad[:20]) +
+                            ("\n  - ... (truncated)" if len(bad) > 20 else "") + "\n\n"
+                            "Revert these changes and limit Stage 3 outputs to:\n"
+                            "- `docs/architecture/INTEGRATION-AUDIT-*.md`\n"
+                            "- `docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md` (only if planned)\n\n"
+                            "If a blocker was found, propose a follow-up issue in the audit report — "
+                            "do NOT modify production code in this audit issue."})
+                        except: pass
+                        continue
+
                log(f"✅ {si['label']} — YES (evidence verified)")
                # stage 완료 = unit counter + remaining tracker 모두 reset
                update_issue_state(n, continue_same_count=0, last_remaining_units=None)
@@ -1469,7 +1561,14 @@ def main():
    p.add_argument("--issue", "-i", type=int); p.add_argument("--status", "-s", action="store_true")
    p.add_argument("--from", dest="sf", type=int); p.add_argument("--until", choices=STAGE_IDS)
    p.add_argument("--reset", type=int, metavar="N"); p.add_argument("--reset-all", action="store_true")
+    p.add_argument("--audit-only", action="store_true",
+                   help="P4: force audit-only mode (no src/templates/tests edits, Stage 3 guard active)")
    a = p.parse_args()
+    # P4 — CLI override 가 title 검사를 강제. title 에 marker 없어도 audit-only 로 잠금.
+    if a.audit_only:
+        global AUDIT_ONLY_OVERRIDE
+        AUDIT_ONLY_OVERRIDE = True
+        log("  --audit-only flag: audit mode forced (src/templates/tests changes will be blocked)")
    if a.reset: clear_state(a.reset); log(f"Cleared #{a.reset}")
    elif a.reset_all: clear_state(); log("All cleared")
    elif a.status: show_status(a.issue)