feat(orchestrator): P3 wrapper input/encoding fix + P4 audit-only mode

P3 hotfix (2026-05-18 — verified during #46 retry attempt):
- _run_with_tree_kill: encode input only when Popen is in binary mode.
  Previously force-encoded str→bytes even with encoding= set, breaking
  text-mode stdin pipes with: write() argument must be str, not bytes.
- run_claude path was the only affected call site.
- 3 new C7 regression tests (input+encoding / bytes+binary / auto-encode).
- C3/C6 test fixtures hardened with DEVNULL stdio isolation.

P4 audit-only mode (2026-05-19, prep for #50 integration audit):
- _is_audit_issue: title-based detection for [INTEGRATION-AUDIT*],
  [AUDIT-ONLY], or "integration audit" phrase.
- _audit_mode + --audit-only CLI flag: manual override regardless of title.
- AUDIT_ONLY_NOTE injected into context pack across all stages/rounds.
- Stage 3 (code-edit) YES gate: deterministic git status check.
  Changes touching src/**, templates/**, tests/** auto-reject Stage 3 YES
  and post a supplement-request comment. LLM-independent enforcement.
- 26 new audit-mode tests (title detection, CLI override, forbidden
  prefix detection, allowed paths pass, Windows backslash normalization,
  quoted paths with spaces, git error fail-open, constants sanity).

Total: 75/75 pytest pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-19 10:18:28 +09:00
parent cbbc163860
commit 4289a500b6
3 changed files with 384 additions and 18 deletions

View File

@@ -186,9 +186,18 @@ def _run_with_tree_kill(cmd, *, input=None, timeout=None, **popen_kwargs):
mon = threading.Thread(target=_monitor, daemon=True)
mon.start()
encode = isinstance(input, str)
inp = input.encode("utf-8") if encode else input
text_mode = popen_kwargs.get("text", False) or popen_kwargs.get("encoding")
# P3 fix (2026-05-18) — Popen 이 encoding= 또는 text=True 를 받으면 자기가 알아서
# text 모드로 stdin/stdout/stderr 처리. wrapper 가 input 을 미리 encode/decode 하면
# 텍스트 모드 pipe 에 bytes 쓰려다 TypeError. Popen 의 mode 에 맞춰 input 타입만 정렬.
text_mode = bool(popen_kwargs.get("text") or popen_kwargs.get("encoding"))
empty_out = "" if text_mode else b""
inp = input
if input is not None:
if text_mode and isinstance(input, bytes):
try: inp = input.decode(popen_kwargs.get("encoding") or "utf-8", "replace")
except Exception: inp = input
elif (not text_mode) and isinstance(input, str):
inp = input.encode("utf-8")
try:
stdout, stderr = proc.communicate(input=inp, timeout=timeout)
@@ -199,7 +208,7 @@ def _run_with_tree_kill(cmd, *, input=None, timeout=None, **popen_kwargs):
try:
stdout, stderr = proc.communicate()
except Exception:
stdout, stderr = b"", b""
stdout, stderr = empty_out, empty_out
# TimeoutExpired 가 가진 partial output 보존을 위해 raise 직전 cleanup.
stop_event.set(); mon.join(timeout=2.0)
_kill_tracked(list(tracked))
@@ -220,20 +229,7 @@ def _run_with_tree_kill(cmd, *, input=None, timeout=None, **popen_kwargs):
for s in tracked: _SPAWNED.discard(s)
if root_sig: _SPAWNED.discard(root_sig)
# text/encoding 처리 — Popen 은 bytes 로만 받고, 호출부의 encoding= 옵션 흉내.
enc = popen_kwargs.get("encoding")
errors = popen_kwargs.get("errors", "strict")
if enc:
try: stdout = stdout.decode(enc, errors)
except Exception: pass
try: stderr = stderr.decode(enc, errors)
except Exception: pass
elif text_mode:
try: stdout = stdout.decode("utf-8", "replace")
except Exception: pass
try: stderr = stderr.decode("utf-8", "replace")
except Exception: pass
# Popen 이 이미 mode 에 맞는 타입으로 반환 — 별도 decode 불필요.
return subprocess.CompletedProcess(args=cmd, returncode=rc, stdout=stdout, stderr=stderr)
def _orchestrator_exit_cleanup():
@@ -740,6 +736,59 @@ def _is_execution_issue(title):
if not title: return False
return bool(re.search(r"\b실행[-\s]\d+\b", title)) or bool(re.search(r"\bexec[-\s]?\d+\b", title, re.IGNORECASE))
# P4 (2026-05-19) — audit-only mode.
# Title-based detection ([INTEGRATION-AUDIT-NN], [AUDIT-ONLY]) + --audit-only CLI 강제.
# 목적: integration audit 류 이슈에서 LLM 이 production code 를 수정하지 못하게 deterministic 가드.
AUDIT_ONLY_OVERRIDE = False # CLI --audit-only 로 main() 에서 set
def _is_audit_issue(title):
"""Title 에 audit 마커 있으면 audit-only mode."""
if not title: return False
if re.search(r"\[(INTEGRATION-AUDIT(?:-\d+)?|AUDIT-ONLY)\b", title, re.IGNORECASE):
return True
return "integration audit" in title.lower()
def _audit_mode(title):
"""audit-only mode 여부. CLI override 또는 title 기반."""
return AUDIT_ONLY_OVERRIDE or _is_audit_issue(title)
# src/ templates/ tests/ = production code surface. audit issue 는 절대 손대면 안 됨.
# 블랙리스트 — 화이트리스트보다 false positive 적음 (data/runs, .orchestrator artifacts 등 자연 통과).
AUDIT_ONLY_FORBIDDEN_PREFIXES = ("src/", "templates/", "tests/")
def _check_audit_only_violations():
"""git status --porcelain 검사. AUDIT_ONLY_FORBIDDEN_PREFIXES 매치 변경 list 반환.
Returns: list of violating paths (빈 list = 통과)."""
try:
r = subprocess.run(
["git", "status", "--porcelain"],
capture_output=True, text=True, encoding="utf-8", errors="replace",
cwd=PROJECT_DIR, timeout=30,
)
if r.returncode != 0:
# git error — fail open (가드 자체 실패는 false 알람 만들지 않음).
return []
except Exception:
return []
bad = []
for line in r.stdout.splitlines():
if len(line) < 4: continue
# porcelain format: "XY path" — XY 는 staged/unstaged 2-char.
path = line[3:].strip()
# rename: "XY old -> new" — destination 만 검사.
if " -> " in path:
path = path.split(" -> ")[-1].strip()
# quoted path (special chars) — strip wrapping quotes.
if path.startswith('"') and path.endswith('"'):
path = path[1:-1]
# forward-slash 통일 (Windows backslash 도 처리).
norm = path.replace("\\", "/")
for prefix in AUDIT_ONLY_FORBIDDEN_PREFIXES:
if norm.startswith(prefix):
bad.append(norm)
break
return bad
# P1-5 (2026-05-18) — Stage 2 compact rule (모든 issue 적용).
# Stage 2 의 c-role 에 size budget + code snippet 금지 명시. 29 KB plan 차단.
COMPACT_PLAN_RULE = """
@@ -768,6 +817,25 @@ EXECUTION-ISSUE MODE (this issue title contains '실행-N' or 'exec-N'):
Do NOT enumerate parent's axes; focus on THIS issue's single axis.
- Skip deep architectural analysis already done in the parent."""
# P4 (2026-05-19) — audit-only mode prompt block.
# Stage 3 이름이 "코드 수정" 이지만 audit 이슈에서는 절대 source 수정 금지.
# orchestrator 가 Stage 3 YES 게이트에서 git status 직접 검사해 violation 시 자동 rewind.
AUDIT_ONLY_NOTE = """
AUDIT-ONLY MODE (this issue is an integration audit / report-only):
- This issue does NOT modify production source code. Stage 3 = audit report writing, NOT code editing.
- Allowed file changes:
docs/architecture/INTEGRATION-AUDIT-*.md
docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md (only if explicitly in plan)
- FORBIDDEN file changes (orchestrator will auto-reject Stage 3 YES if any of these touched):
src/**, templates/**, tests/**
- If a blocker is found during audit, propose a FOLLOW-UP ISSUE in the report — do NOT modify code in this issue.
- Stage 3 IMPLEMENTATION_UNITS should be audit subtasks (scope_myopia / pipeline_map / conflict_check /
status_integrity / report_assembly / followup_proposal). Each unit's tests: field MUST list verification
commands or report artifacts (NOT pytest tests:[] which the orchestrator rejects).
- Stage 5 commit = only audit report files. pipeline run artifacts under data/runs/ or .orchestrator/
are evidence-only and must NOT be staged for commit."""
def build_context_pack(n, title, body, sid, agent, rnd, start_cnt, compact=None):
idx = STAGE_IDS.index(sid); si = STAGES[idx]
@@ -776,11 +844,14 @@ def build_context_pack(n, title, body, sid, agent, rnd, start_cnt, compact=None)
prior = load_all_exit_reports(n, idx)
# P1-4/P1-5 (2026-05-18) — execution-issue + Stage 2 compact rule
# P4 (2026-05-19) — audit-only mode injection (모든 stage 에 prompt 가드 + Stage 3 git diff 가드 별도)
extras = []
if sid == "simulation-plan":
extras.append(COMPACT_PLAN_RULE)
if _is_execution_issue(title):
extras.append(EXECUTION_ISSUE_NOTE)
if _audit_mode(title):
extras.append(AUDIT_ONLY_NOTE)
extras_text = "".join(extras)
# 검증 실패 보고서 (rewind 시 이전 실패 맥락 전달).
@@ -1211,6 +1282,27 @@ def run_stage(n, title, body, sid):
except: pass
continue
# P4 (2026-05-19) — AUDIT-ONLY guard: Stage 3 (code-edit) YES 직전 git status 검사.
# src/templates/tests 변경 있으면 자동 reject + supplement 요청. LLM 양심 무관 deterministic.
if sid == "code-edit" and _audit_mode(title):
bad = _check_audit_only_violations()
if bad:
log(f"⚠️ AUDIT-ONLY violation — Stage 3 YES rejected: {len(bad)} forbidden file change(s)")
log(f" violations (first 5): {bad[:5]}")
try: gitea(f"issues/{n}/comments", "POST", {"body":
"⚠️ **[Orchestrator]** AUDIT-ONLY mode violation: Stage 3 YES rejected.\n\n"
"This issue is in audit-only mode. Production code changes are forbidden.\n\n"
f"Forbidden file changes detected ({len(bad)} file(s)):\n" +
"\n".join(f" - `{v}`" for v in bad[:20]) +
("\n - ... (truncated)" if len(bad) > 20 else "") + "\n\n"
"Revert these changes and limit Stage 3 outputs to:\n"
"- `docs/architecture/INTEGRATION-AUDIT-*.md`\n"
"- `docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md` (only if planned)\n\n"
"If a blocker was found, propose a follow-up issue in the audit report — "
"do NOT modify production code in this audit issue."})
except: pass
continue
log(f"{si['label']} — YES (evidence verified)")
# stage 완료 = unit counter + remaining tracker 모두 reset
update_issue_state(n, continue_same_count=0, last_remaining_units=None)
@@ -1469,7 +1561,14 @@ def main():
p.add_argument("--issue", "-i", type=int); p.add_argument("--status", "-s", action="store_true")
p.add_argument("--from", dest="sf", type=int); p.add_argument("--until", choices=STAGE_IDS)
p.add_argument("--reset", type=int, metavar="N"); p.add_argument("--reset-all", action="store_true")
p.add_argument("--audit-only", action="store_true",
help="P4: force audit-only mode (no src/templates/tests edits, Stage 3 guard active)")
a = p.parse_args()
# P4 — CLI override 가 title 검사를 강제. title 에 marker 없어도 audit-only 로 잠금.
if a.audit_only:
global AUDIT_ONLY_OVERRIDE
AUDIT_ONLY_OVERRIDE = True
log(" --audit-only flag: audit mode forced (src/templates/tests changes will be blocked)")
if a.reset: clear_state(a.reset); log(f"Cleared #{a.reset}")
elif a.reset_all: clear_state(); log("All cleared")
elif a.status: show_status(a.issue)

View File

@@ -0,0 +1,221 @@
"""P4 (2026-05-19) — audit-only mode verification.
Covers:
- _is_audit_issue: title pattern detection (positive + negative)
- _audit_mode: title-based + CLI override (AUDIT_ONLY_OVERRIDE)
- _check_audit_only_violations: forbidden prefix detection via mocked git status
- AUDIT_ONLY_NOTE injection into context pack (via build_context_pack contract)
Run: pytest -q tests/orchestrator_unit/test_audit_mode.py
"""
import sys
import subprocess
from pathlib import Path
import pytest
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
import orchestrator # noqa: E402
from orchestrator import ( # noqa: E402
_is_audit_issue,
_audit_mode,
_check_audit_only_violations,
AUDIT_ONLY_FORBIDDEN_PREFIXES,
AUDIT_ONLY_NOTE,
)
# ─────────────────────────────────────────────────────────────────
# _is_audit_issue — title detection
# ─────────────────────────────────────────────────────────────────
class TestIsAuditIssue:
def test_integration_audit_bracket(self):
assert _is_audit_issue("[INTEGRATION-AUDIT-01] cumulative review") is True
assert _is_audit_issue("[INTEGRATION-AUDIT-02] something") is True
assert _is_audit_issue("[INTEGRATION-AUDIT] no number") is True
def test_audit_only_bracket(self):
assert _is_audit_issue("[AUDIT-ONLY] doc consistency check") is True
def test_case_insensitive(self):
assert _is_audit_issue("[integration-audit-03] foo") is True
assert _is_audit_issue("[Audit-Only] bar") is True
def test_plain_integration_audit_phrase(self):
assert _is_audit_issue("Quarterly integration audit for closed issues") is True
assert _is_audit_issue("Integration Audit Q2") is True
def test_execution_issue_not_audit(self):
"""execution sub-issue 가 audit 로 잘못 감지되면 안 됨."""
assert _is_audit_issue("[IMP-15 실행-1] image_aspect_mismatch") is False
assert _is_audit_issue("[IMP-15 exec-2] table overflow") is False
def test_unrelated_issues(self):
assert _is_audit_issue("IMP-19 I4 zone 비중 분배") is False
assert _is_audit_issue("Fix overflow bug") is False
assert _is_audit_issue("docs(IMP-06): Stage 4 fix") is False
def test_empty_or_none(self):
assert _is_audit_issue("") is False
assert _is_audit_issue(None) is False
def test_audit_word_in_random_position_no_match(self):
"""'audit' 가 단독으로 나오는 건 안 잡아야 함 — 'integration audit' 만."""
assert _is_audit_issue("audit some code") is False
assert _is_audit_issue("security audit") is False
# ─────────────────────────────────────────────────────────────────
# _audit_mode — combination with CLI override
# ─────────────────────────────────────────────────────────────────
class TestAuditMode:
def setup_method(self):
# 각 테스트 전에 override 리셋.
orchestrator.AUDIT_ONLY_OVERRIDE = False
def teardown_method(self):
orchestrator.AUDIT_ONLY_OVERRIDE = False
def test_title_based_only(self):
assert _audit_mode("[INTEGRATION-AUDIT-01] foo") is True
assert _audit_mode("IMP-19 zone") is False
def test_cli_override_forces_audit(self):
"""title 에 marker 없어도 CLI flag 가 audit mode 강제."""
orchestrator.AUDIT_ONLY_OVERRIDE = True
assert _audit_mode("IMP-19 zone") is True
assert _audit_mode("any title") is True
assert _audit_mode("") is True
def test_override_off_falls_back_to_title(self):
orchestrator.AUDIT_ONLY_OVERRIDE = False
assert _audit_mode("IMP-19 zone") is False
assert _audit_mode("[INTEGRATION-AUDIT-01]") is True
# ─────────────────────────────────────────────────────────────────
# _check_audit_only_violations — git status parsing
# ─────────────────────────────────────────────────────────────────
class _FakeCompleted:
def __init__(self, stdout, returncode=0):
self.stdout = stdout
self.stderr = ""
self.returncode = returncode
class TestCheckAuditOnlyViolations:
"""subprocess.run 을 monkeypatch 해서 다양한 git status 출력 시나리오 검증."""
def test_clean_tree(self, monkeypatch):
def fake_run(*args, **kwargs):
return _FakeCompleted(stdout="")
monkeypatch.setattr(subprocess, "run", fake_run)
assert _check_audit_only_violations() == []
def test_only_allowed_changes(self, monkeypatch):
"""docs/architecture 변경만 있으면 violation 0."""
stdout = (
" M docs/architecture/INTEGRATION-AUDIT-01-REPORT.md\n"
"?? docs/architecture/INTEGRATION-AUDIT-01-MATRIX.md\n"
" M docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md\n"
)
monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout))
assert _check_audit_only_violations() == []
def test_src_change_detected(self, monkeypatch):
stdout = (
" M src/phase_z2_pipeline.py\n"
" M docs/architecture/INTEGRATION-AUDIT-01-REPORT.md\n"
)
monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout))
v = _check_audit_only_violations()
assert v == ["src/phase_z2_pipeline.py"]
def test_templates_change_detected(self, monkeypatch):
stdout = " M templates/phase_z2/families/something.html\n"
monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout))
v = _check_audit_only_violations()
assert v == ["templates/phase_z2/families/something.html"]
def test_tests_change_detected(self, monkeypatch):
stdout = " M tests/phase_z2/test_overflow.py\n"
monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout))
v = _check_audit_only_violations()
assert v == ["tests/phase_z2/test_overflow.py"]
def test_multiple_violations(self, monkeypatch):
stdout = (
" M src/a.py\n"
"?? src/b.py\n"
" M templates/c.html\n"
" M tests/d.py\n"
" M docs/architecture/INTEGRATION-AUDIT-01-REPORT.md\n" # allowed
" M data/runs/run123.json\n" # allowed (not in forbidden)
)
monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout))
v = _check_audit_only_violations()
assert set(v) == {"src/a.py", "src/b.py", "templates/c.html", "tests/d.py"}
def test_renamed_file_destination_checked(self, monkeypatch):
"""rename 의 경우 destination 만 검사."""
stdout = "R docs/old.md -> src/new.py\n"
monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout))
v = _check_audit_only_violations()
assert v == ["src/new.py"]
def test_windows_backslash_path(self, monkeypatch):
"""Windows backslash path 도 forward-slash 로 정규화돼서 매치."""
stdout = " M src\\phase_z2_pipeline.py\n"
monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout))
v = _check_audit_only_violations()
assert v == ["src/phase_z2_pipeline.py"]
def test_quoted_path_with_spaces(self, monkeypatch):
"""공백/특수문자 포함 path 는 quoted — quote strip 후 검사."""
stdout = ' M "src/some file.py"\n'
monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout))
v = _check_audit_only_violations()
assert v == ["src/some file.py"]
def test_git_error_fails_open(self, monkeypatch):
"""git 자체 실패 → 가드 false positive 안 만들고 빈 list 반환."""
monkeypatch.setattr(subprocess, "run",
lambda *a, **kw: _FakeCompleted(stdout="", returncode=128))
assert _check_audit_only_violations() == []
def test_subprocess_exception_fails_open(self, monkeypatch):
"""subprocess.run 자체가 raise 해도 가드 false positive X."""
def boom(*a, **kw): raise RuntimeError("git missing")
monkeypatch.setattr(subprocess, "run", boom)
assert _check_audit_only_violations() == []
# ─────────────────────────────────────────────────────────────────
# AUDIT_ONLY_NOTE constants — sanity
# ─────────────────────────────────────────────────────────────────
class TestAuditOnlyConstants:
def test_note_mentions_forbidden_prefixes(self):
for p in AUDIT_ONLY_FORBIDDEN_PREFIXES:
assert p in AUDIT_ONLY_NOTE, f"AUDIT_ONLY_NOTE missing prefix mention: {p}"
def test_note_mentions_allowed_paths(self):
assert "INTEGRATION-AUDIT-*.md" in AUDIT_ONLY_NOTE
assert "PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md" in AUDIT_ONLY_NOTE
def test_note_states_no_code_edit(self):
# "report" 또는 "NOT code" 표현 명시 확인 (LLM 가독성 가드).
lower = AUDIT_ONLY_NOTE.lower()
assert "audit report" in lower or "report writing" in lower
assert "not code" in lower or "no production" in lower
def test_forbidden_prefixes_no_trailing_slash_issues(self):
"""블랙리스트는 startswith 매치 — 'src' (slash 없음) 면 'srcfoo.py' 도 매칭돼서 false positive.
모든 prefix 가 '/' 로 끝나야 함."""
for p in AUDIT_ONLY_FORBIDDEN_PREFIXES:
assert p.endswith("/"), f"prefix '{p}' must end with '/' to avoid false matches"

View File

@@ -252,6 +252,52 @@ class TestC6_OrphanGrandchildAfterNormalExit:
)
# ─────────────────────────────────────────────────────────────────
# C7: input + encoding path — run_claude 가 실제 사용하는 호출 모드.
# 2026-05-18 production bug: str input + encoding="utf-8" 일 때
# wrapper 가 input 을 강제로 bytes 인코딩 → Popen text mode pipe 에
# bytes 쓰려다 TypeError: write() argument must be str, not bytes.
# ─────────────────────────────────────────────────────────────────
class TestC7_InputEncodingPath:
def test_str_input_with_encoding_utf8(self):
"""run_claude 와 동일한 호출 모드 — input=str + encoding='utf-8'."""
# stdin 에서 읽은 그대로 stdout 으로 echo. 한글 포함해서 encoding 검증.
r = _run_with_tree_kill(
[_py(), "-c", "import sys; sys.stdout.write(sys.stdin.read())"],
input="hello 안녕\n",
encoding="utf-8",
timeout=10,
)
assert r.returncode == 0
# encoding= 모드면 stdout 는 str 이어야 함.
assert isinstance(r.stdout, str)
assert "hello" in r.stdout
assert "안녕" in r.stdout
def test_bytes_input_without_encoding(self):
"""encoding 없으면 binary mode — input=bytes 그대로 통과."""
r = _run_with_tree_kill(
[_py(), "-c", "import sys; sys.stdout.buffer.write(sys.stdin.buffer.read())"],
input=b"raw bytes",
timeout=10,
)
assert r.returncode == 0
assert isinstance(r.stdout, bytes)
assert r.stdout == b"raw bytes"
def test_str_input_without_encoding_auto_encoded(self):
"""input=str 인데 encoding 없으면 wrapper 가 자동 utf-8 인코딩."""
r = _run_with_tree_kill(
[_py(), "-c", "import sys; sys.stdout.buffer.write(sys.stdin.buffer.read())"],
input="auto encode 한글",
timeout=10,
)
assert r.returncode == 0
assert isinstance(r.stdout, bytes)
assert r.stdout.decode("utf-8") == "auto encode 한글"
# ─────────────────────────────────────────────────────────────────
# Bonus: _SPAWNED discipline — 다중 호출 후 누적 안 됨
# ─────────────────────────────────────────────────────────────────