Files
C.E.L_Slide_test2/orchestrator.py
kyeongmin f0d4494409
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 21s
fix(orchestrator): P7 governance guards for false-positive YES
- Block Stage 2 YES when IMPLEMENTATION_UNITS contains tests: [].
- Prevent fallback from accepting orchestrator supplement examples as valid plans.
- Honor KEEP_OPEN/DO NOT CLOSE final-close dispositions by skipping close PATCH.
- Add final-close casual self-contradiction guard for YES bodies (allows explicit
  `disposition: KEEP_OPEN_*` to pass through to Patch B).
- Inject rejected approaches from failure reports into next-round context with
  BANNED_APPROACHES block (tests: [] / DOM mount without jsdom / Home.tsx toast
  removal / git add -A).

Refs: #83 (governance break — reopen pending user decision)
      #84 (Stage 2 round 5 slip — replay required after this fix)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-26 13:05:39 +09:00

1951 lines
104 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Orchestrator v6 — Exit Report Contract + Evidence-based Consensus
핵심:
1. "Read ALL comments" 제거 → context pack 직접 주입
2. 완료 stage = canonical exit report (계약서) 사용
3. 현재 stage = stage_start 이후 comment만
4. 5라운드마다 mid-stage compaction
5. exit report = Gitea + local 동시 저장 (1회 생성)
6. FINAL_CONSENSUS: YES에 evidence block 필수
7. evidence 없는 YES → orchestrator 거부
8. context pack 크기 로그
"""
import subprocess, requests, time, sys, os, re, argparse, json, glob
import threading, atexit, signal
from pathlib import Path
from datetime import datetime
from urllib.parse import quote
# P3-1 (2026-05-18) — subprocess cleanup hardening (PID 2780 orphan grandchild regression).
# psutil 은 환경에 이미 설치돼 있음 (Phase A 보완 검토 확인).
import psutil
# ═══════════════════════════════════════════════════════════════
# Config
# ═══════════════════════════════════════════════════════════════
GITEA_URL = os.environ.get("GITEA_URL", "https://gitea.hmac.kr")
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "266ad0d2cc7ba0be580387544cd099193fd9fc85")
GITEA_REPO = os.environ.get("GITEA_REPO", "Kyeongmin/C.E.L_Slide_test2")
PROJECT_DIR = os.environ.get("PROJECT_DIR", os.getcwd())
def _find_claude():
env = os.environ.get("CLAUDE_EXE")
if env: return env
m = sorted(glob.glob(os.path.expanduser(r"~\.vscode\extensions\anthropic.claude-code-*\resources\native-binary\claude.exe")))
return m[-1] if m else "claude"
def _find_codex():
env = os.environ.get("CODEX_CMD")
if env: return env
p = os.path.expanduser(r"~\AppData\Roaming\npm\codex.cmd")
return p if os.path.exists(p) else "codex"
CLAUDE_EXE = _find_claude()
CODEX_CMD = _find_codex()
POLL_INTERVAL = 15
AGENT_TIMEOUT = 1800
COMPACT_EVERY = 5
ORCH_DIR = Path(PROJECT_DIR) / ".orchestrator"
ISSUES_DIR = ORCH_DIR / "issues"
TMP_DIR = ORCH_DIR / "tmp"
DRAFTS_DIR = ORCH_DIR / "drafts" # D-axis 2026-05-18 — agent ↔ orchestrator transport
def ts(): return datetime.now().strftime("%H:%M:%S")
def log(msg): print(f" {ts()} | {msg}")
def header(msg): print(f"\n {'='*60}\n {msg}\n {'='*60}\n")
def divider(msg): print(f"\n {''*60}\n {msg}\n {''*60}")
# ═══════════════════════════════════════════════════════════════
# P3-1/2/3 (2026-05-18) — Subprocess Tree Cleanup
# PID + create_time 추적 → PID 재사용 회피. orphan grandchild (PID 2780 path)
# 까지 정리. Selenium driver.quit() 는 pipeline 책임 (orchestrator 미터치).
# ═══════════════════════════════════════════════════════════════
# 전역 추적 set — wrapper 가 정상 cleanup 후 discard. atexit 안전망용.
# 요소 = (pid, create_time_float). PID 재사용 시 create_time 으로 동일 프로세스 확인.
_SPAWNED = set()
_ORCH_PID = os.getpid()
_ORCH_CREATE = None # main() 에서 채움
def _proc_signature(p):
"""psutil.Process → (pid, create_time) 또는 None (이미 사라짐)."""
try:
return (p.pid, p.create_time())
except (psutil.NoSuchProcess, psutil.AccessDenied):
return None
def _is_same_process(pid, create_time, tolerance=0.001):
"""기록된 (pid, create_time) 가 현재 살아있는 동일 프로세스인지 확인.
create_time 일치 = PID 재사용 아님. tolerance 는 float 비교 안전 마진."""
if pid <= 0 or pid == _ORCH_PID:
return False
try:
p = psutil.Process(pid)
return abs(p.create_time() - create_time) < tolerance and p.is_running()
except (psutil.NoSuchProcess, psutil.AccessDenied):
return False
def _kill_process_tree(root_pid, timeout=5):
"""Parent ALIVE path — psutil.children(recursive=True) traversal.
timeout/Ctrl+C 같이 부모가 아직 살아있을 때 사용."""
if root_pid <= 0 or root_pid == _ORCH_PID:
return 0
try:
root = psutil.Process(root_pid)
except psutil.NoSuchProcess:
return 0
try:
tree = [root] + root.children(recursive=True)
except psutil.NoSuchProcess:
tree = [root]
for p in tree:
try: p.terminate()
except (psutil.NoSuchProcess, psutil.AccessDenied): pass
try:
_, alive = psutil.wait_procs(tree, timeout=timeout)
except Exception:
alive = tree
for p in alive:
try: p.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied): pass
return len(tree)
def _kill_tracked(sigs, timeout=5):
"""Parent DEAD path — (pid, create_time) signature 리스트로 직접 정리.
PID 재사용 회피 위해 create_time 일치 확인. orphan grandchild path."""
procs = []
for pid, ct in sigs:
if not _is_same_process(pid, ct):
continue
try:
procs.append(psutil.Process(pid))
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
if not procs:
return 0
for p in procs:
try: p.terminate()
except (psutil.NoSuchProcess, psutil.AccessDenied): pass
try:
_, alive = psutil.wait_procs(procs, timeout=timeout)
except Exception:
alive = procs
for p in alive:
try: p.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied): pass
return len(procs)
def _run_with_tree_kill(cmd, *, input=None, timeout=None, **popen_kwargs):
"""subprocess.run 의 트리 안전 버전.
- Popen 으로 띄움
- 백그라운드 monitor thread 가 1초 주기로 descendant (pid, create_time) 수집
- 정상 종료 path 와 timeout path 모두 finally 에서 tracked 정리
- 반환: subprocess.CompletedProcess (기존 호출부 호환).
Timeout 은 raise subprocess.TimeoutExpired — 기존 except 호환.
"""
tracked = set() # (pid, create_time) tuples
stop_event = threading.Event()
proc = subprocess.Popen(
cmd,
stdin=subprocess.PIPE if input is not None else None,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
**popen_kwargs,
)
# 직접 child 도 signature 로 추적 (재사용 회피).
try:
root_psu = psutil.Process(proc.pid)
root_sig = _proc_signature(root_psu)
if root_sig:
_SPAWNED.add(root_sig)
except psutil.NoSuchProcess:
root_sig = None
def _monitor():
try:
root = psutil.Process(proc.pid)
except psutil.NoSuchProcess:
return
while not stop_event.is_set():
try:
for child in root.children(recursive=True):
sig = _proc_signature(child)
if sig:
tracked.add(sig)
_SPAWNED.add(sig)
except (psutil.NoSuchProcess, psutil.AccessDenied):
break # parent died — monitor exits; finally takes over
stop_event.wait(timeout=1.0)
mon = threading.Thread(target=_monitor, daemon=True)
mon.start()
# P3 fix (2026-05-18) — Popen 이 encoding= 또는 text=True 를 받으면 자기가 알아서
# text 모드로 stdin/stdout/stderr 처리. wrapper 가 input 을 미리 encode/decode 하면
# 텍스트 모드 pipe 에 bytes 쓰려다 TypeError. Popen 의 mode 에 맞춰 input 타입만 정렬.
text_mode = bool(popen_kwargs.get("text") or popen_kwargs.get("encoding"))
empty_out = "" if text_mode else b""
inp = input
if input is not None:
if text_mode and isinstance(input, bytes):
try: inp = input.decode(popen_kwargs.get("encoding") or "utf-8", "replace")
except Exception: inp = input
elif (not text_mode) and isinstance(input, str):
inp = input.encode("utf-8")
try:
stdout, stderr = proc.communicate(input=inp, timeout=timeout)
rc = proc.returncode
except subprocess.TimeoutExpired:
# Parent still alive here — full tree traversal kill first.
_kill_process_tree(proc.pid)
try:
stdout, stderr = proc.communicate()
except Exception:
stdout, stderr = empty_out, empty_out
# TimeoutExpired 가 가진 partial output 보존을 위해 raise 직전 cleanup.
stop_event.set(); mon.join(timeout=2.0)
_kill_tracked(list(tracked))
# _SPAWNED 정리 — 이번 호출에서 수집한 것만 discard (다른 호출 추적 보호).
for s in tracked: _SPAWNED.discard(s)
if root_sig: _SPAWNED.discard(root_sig)
raise
finally:
if not stop_event.is_set():
stop_event.set()
mon.join(timeout=2.0)
# CRITICAL: tracked descendant 직접 정리 (parent 죽었어도 잡힘 — PID 2780 path).
_kill_tracked(list(tracked))
# Safety net: proc 자체 어쩌다 살아있으면 마저.
if proc.poll() is None:
_kill_process_tree(proc.pid)
# _SPAWNED 정리.
for s in tracked: _SPAWNED.discard(s)
if root_sig: _SPAWNED.discard(root_sig)
# Popen 이 이미 mode 에 맞는 타입으로 반환 — 별도 decode 불필요.
return subprocess.CompletedProcess(args=cmd, returncode=rc, stdout=stdout, stderr=stderr)
def _orchestrator_exit_cleanup():
"""orchestrator 종료 시 마지막 안전망. _SPAWNED 에 남은 추적 PID 일괄 정리."""
if not _SPAWNED:
return
cleaned = _kill_tracked(list(_SPAWNED))
if cleaned:
try: log(f" exit cleanup: {cleaned} tracked subprocess tree(s) terminated")
except Exception: pass
_SPAWNED.clear()
def _sigint_handler(sig, frame):
try: log(" SIGINT — running exit cleanup")
except Exception: pass
_orchestrator_exit_cleanup()
sys.exit(130)
# ═══════════════════════════════════════════════════════════════
# State
# ═══════════════════════════════════════════════════════════════
STATE_FILE = ORCH_DIR / "stage_state.json"
def load_state():
return json.loads(STATE_FILE.read_text(encoding="utf-8")) if STATE_FILE.exists() else {}
def save_state(data):
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
STATE_FILE.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
def get_issue_state(n): return load_state().get(str(n), {"stage": "problem-review"})
def update_issue_state(n, **kw):
s = load_state(); s.setdefault(str(n), {"stage": "problem-review"}).update(kw); save_state(s)
def clear_state(n=None):
if n:
s = load_state(); s.pop(str(n), None); save_state(s)
for f in ISSUES_DIR.glob(f"{n}_*"): f.unlink(missing_ok=True)
# D-axis 2026-05-18 — drafts 도 청소 (issue 별 stage/round 별 파일 모두)
if DRAFTS_DIR.exists():
for f in DRAFTS_DIR.glob(f"{n}_*"): f.unlink(missing_ok=True)
else:
save_state({})
if ISSUES_DIR.exists():
for f in ISSUES_DIR.glob("*"): f.unlink(missing_ok=True)
if DRAFTS_DIR.exists():
for f in DRAFTS_DIR.glob("*"): f.unlink(missing_ok=True)
# ═══════════════════════════════════════════════════════════════
# Exit Report / Compaction files
# ═══════════════════════════════════════════════════════════════
def _erp(n, sid): return ISSUES_DIR / f"{n}_stage_{sid}_exit.md"
def save_exit_report(n, sid, txt):
ISSUES_DIR.mkdir(parents=True, exist_ok=True)
_erp(n, sid).write_text(txt, encoding="utf-8")
def log_orchestrator_event(n, msg):
# Fix 9 (2026-05-17) — Phase A-3a: Category C noise → local log.
# exit-report / auto-escalate / rewind-announcement 은 Gitea POST 하지 않음.
# 진실 source = save_exit_report (local *_exit.md) + stage_state.json + failure_report_path.
# 사람 가시성 = 본 log file. agent context 는 영향 받지 않음.
p = ISSUES_DIR / f"{n}_orchestrator.log"
p.parent.mkdir(parents=True, exist_ok=True)
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with p.open("a", encoding="utf-8") as f:
f.write(f"[{ts}] {msg}\n")
def _atomic_replace(tmp_path, dest_path, max_retries=3, backoff=0.5):
"""Atomic file replace with Windows transient retry.
Phase A-2 reported [WinError 5] from os.replace when AV / IDE / file watcher
briefly locks destination during write. Retry with linear backoff.
POSIX = first attempt succeeds. Windows = transient race may need 1-2 retries."""
for attempt in range(max_retries):
try:
os.replace(tmp_path, dest_path)
return
except PermissionError:
if attempt == max_retries - 1:
raise
time.sleep(backoff * (attempt + 1))
# ═══════════════════════════════════════════════════════════════
# D-axis 2026-05-18 — Agent Draft Files (transport refactor)
# ═══════════════════════════════════════════════════════════════
# agent (Claude/Codex) writes comment body to draft file (NOT direct Gitea POST).
# orchestrator reads draft → validates → Gitea POST → injects next agent's context.
# Goal: accountability (POST 매 round 유지) + reliability (no agent POST lies).
# Path pattern: .orchestrator/drafts/<n>_stage_<sid>_<agent>_r<round>.md
def _draft_path(n, sid, agent, rnd):
return DRAFTS_DIR / f"{n}_stage_{sid}_{agent}_r{rnd}.md"
def _load_draft(n, sid, agent, rnd):
p = _draft_path(n, sid, agent, rnd)
if not p.exists(): return None
try:
return p.read_text(encoding="utf-8").strip()
except Exception:
return None
def _save_draft(n, sid, agent, rnd, content):
p = _draft_path(n, sid, agent, rnd)
p.parent.mkdir(parents=True, exist_ok=True)
tmp = p.with_suffix(".md.tmp")
tmp.write_text(content, encoding="utf-8")
_atomic_replace(tmp, p) # atomic + Windows retry
def _validate_draft(content, sid, agent):
"""Validate draft body. Returns (ok: bool, errors: list[str]).
D-1 stub — only emptiness check. Full validation (FINAL_CONSENSUS / EVIDENCE
/ IMPLEMENTATION_UNITS / Remaining units 필드) added in D-3."""
if not content or not content.strip():
return (False, ["draft empty"])
return (True, [])
def _collect_stage_drafts(n, sid, agent, rnd):
"""D-4 (2026-05-18) — collect local drafts from current stage in chronological order.
Used by build_context_pack to inject local transcript instead of Gitea-fetched comments.
Order: claude r1 → codex r1 → claude r2 → codex r2 → ...
Stops at the current (agent, rnd) call — that draft doesn't exist yet.
Returns list of (round, agent, body_truncated_3000)."""
out = []
for r in range(1, rnd + 1):
for ag in ("claude", "codex"):
if r == rnd and ag == agent:
return out # current call — its own draft not yet written
d = _load_draft(n, sid, ag, r)
if d:
out.append((r, ag, d[:3000]))
return out
def _verify_dual_write(n, sid, agent, rnd, gitea_body):
"""D-3 (2026-05-18) — log-only verification.
Compares the agent's local draft against the Gitea comment body it posted.
Does NOT fail-fast or interrupt the round — purely observational.
Goal: collect data on whether dual-write is reliable BEFORE D-5 cutover.
P1-7 (2026-05-18) — normalize before compare. CRLF/LF + trailing whitespace
on each line + final newline = cosmetic differences. We compare semantic content."""
draft = _load_draft(n, sid, agent, rnd)
if draft is None:
log(f" ⚠️ dual-write SKIPPED: draft not found for {agent} r{rnd}")
return
def _norm(s):
if not s: return ""
# CRLF/CR → LF, strip trailing whitespace per line, strip overall
s = s.replace("\r\n", "\n").replace("\r", "\n")
return "\n".join(line.rstrip() for line in s.split("\n")).strip()
g = _norm(gitea_body)
d = _norm(draft)
if d == g:
log(f" ✅ dual-write OK: draft == gitea body ({len(d)} chars, normalized) for {agent} r{rnd}")
return
if len(d) == len(g):
diff_idx = next((i for i in range(len(d)) if d[i] != g[i]), -1)
log(f" ⚠️ dual-write MISMATCH (same len, diff content) for {agent} r{rnd}: first diff at index {diff_idx}")
else:
log(f" ⚠️ dual-write LEN-DIFF for {agent} r{rnd}: draft={len(d)} gitea={len(g)} (normalized)")
def load_exit_report(n, sid):
p = _erp(n, sid)
return p.read_text(encoding="utf-8") if p.exists() else None
def load_all_exit_reports(n, up_to):
parts = []
for i in range(up_to):
r = load_exit_report(n, STAGES[i]["id"])
if r: parts.append(f"=== {STAGES[i]['label']} Exit Report ===\n{r}")
return "\n\n".join(parts) or "(no prior reports)"
def load_latest_compaction(n, sid):
files = sorted(ISSUES_DIR.glob(f"{n}_stage_{sid}_compact_r*.md"))
return files[-1].read_text(encoding="utf-8") if files else None
# ═══════════════════════════════════════════════════════════════
# Gitea API
# ═══════════════════════════════════════════════════════════════
def gitea(path, method="GET", data=None):
# Fix 6 (2026-05-17) — timeout 강제. 없으면 Gitea API 늦을 때 무한 hang
# (round=5 Codex OK 뒤 get_comments hang 사례 발생).
url = f"{GITEA_URL}/api/v1/repos/{GITEA_REPO}/{path}"
h = {"Authorization": f"token {GITEA_TOKEN}", "Content-Type": "application/json"}
r = getattr(requests, method.lower())(
url, headers=h,
json=data if method != "GET" else None,
timeout=(10, 30), # connect 10s / read 30s
)
r.raise_for_status(); return r.json()
def _comments_cache_path(n):
return ISSUES_DIR / f"{n}_comments_cache.json"
def _load_comments_cache(n):
p = _comments_cache_path(n)
if not p.exists(): return []
try:
data = json.loads(p.read_text(encoding="utf-8"))
return data if isinstance(data, list) else []
except Exception:
return [] # corrupt cache → empty fallback
def _save_comments_cache(n, comments):
p = _comments_cache_path(n)
p.parent.mkdir(parents=True, exist_ok=True)
tmp = p.with_suffix(".json.tmp")
tmp.write_text(json.dumps(comments, ensure_ascii=False), encoding="utf-8")
_atomic_replace(tmp, p) # atomic + Windows transient retry
def get_comments(n):
# Fix 8 (2026-05-17) — Phase A-2: local cache + since= incremental fetch.
# Gitea API 가 page= 파라미터 무시 + 응답 100 cap. 매 호출 full fetch 하면:
# (a) 100 cap invisibility — 새 comment 가 응답 밖으로 밀려 invisible
# (b) wait_comment blind spot — 새 Codex comment 영원히 못 찾음
# (c) trafic 낭비 — round 마다 800 KB
# 대응: cache 에 누적, since=<max(updated_at)> 로 incremental fetch, id 기준 merge.
cache = _load_comments_cache(n)
since_ts = None
if cache:
# max(updated_at or created_at) — edit 까지 잡으려면 updated_at 우선
def _ts(c): return c.get("updated_at") or c.get("created_at") or ""
since_ts = max((_ts(c) for c in cache), default=None)
path = f"issues/{n}/comments?limit=100"
if since_ts:
# since timestamp 안 `:` / `+` (timezone offset) 안전하게 인코딩
path += f"&since={quote(since_ts, safe='')}"
try:
batch = gitea(path)
except Exception as e:
# network/transient — cache 그대로 반환 (안 보이는 새 comment 는 다음 poll 에)
return sorted(cache, key=lambda c: c.get("id", 0))
# merge by id (newer wins for edits)
by_id = {c["id"]: c for c in cache if "id" in c}
for c in batch:
if "id" in c:
by_id[c["id"]] = c
merged = list(by_id.values())
_save_comments_cache(n, merged)
return sorted(merged, key=lambda c: c["id"])
def get_issue(n): return gitea(f"issues/{n}")
def get_open_issues(): return sorted(gitea("issues?state=open&limit=50"), key=lambda i: i["number"])
def set_label(n, l):
try: gitea(f"issues/{n}/labels", "POST", {"labels": [l]})
except: pass
# ═══════════════════════════════════════════════════════════════
# Consensus + Evidence
# ═══════════════════════════════════════════════════════════════
def has_consensus(body):
"""마지막 줄이 정확히 FINAL_CONSENSUS: YES인지 확인 (legacy — parse_consensus 우선)"""
lines = body.strip().splitlines()
return lines[-1].strip() == "FINAL_CONSENSUS: YES" if lines else False
# 2026-05-16 — rewind dispatcher 지원. agent 가 NO 시 rewind_target 명시 강제.
REWIND_TARGET_TO_SID = {
"retry_same": None, # 같은 stage 재시도 (technical_fail only — e.g. push network)
"continue_same": None, # 같은 stage round 계속 — stage 안 남은 planned unit 진행 (Stage 3 만).
"stage_1_review": "problem-review",
"stage_2_plan": "simulation-plan",
"stage_3_edit": "code-edit",
"stage_4_verify": "test-verify",
"stage_5_push": "commit-push",
}
VERIFY_STAGES = ("test-verify", "final-close") # retry_same / continue_same 금지 stage
def parse_consensus(body):
"""FINAL_CONSENSUS + rewind_target 파싱.
Returns (status, rewind_target) :
status : 'YES' | 'NO' | None
rewind_target : 'retry_same' | 'stage_1_review' | ... | 'stage_5_push' | None
NO 시 rewind_target 누락 = agent 에게 supplement 요청 (run_stage 처리)."""
if not body:
return (None, None)
status = None
target = None
for line in body.strip().splitlines()[-10:]:
sl = line.strip()
if sl == "FINAL_CONSENSUS: YES":
status = "YES"
elif sl == "FINAL_CONSENSUS: NO":
status = "NO"
m = re.match(r"^rewind_target:\s*(\S+)\s*$", sl, re.IGNORECASE)
if m:
t = m.group(1).lower()
if t in REWIND_TARGET_TO_SID:
target = t
return (status, target)
def has_evidence(body, stage_id="problem-review"):
"""=== EVIDENCE === 블록 + stage별 필수 필드 확인"""
lower = body.lower()
if "=== evidence ===" not in lower:
return False
base = ["commands run", "files checked"]
if stage_id in ("problem-review", "code-edit"):
required = base
elif stage_id == "simulation-plan":
required = base + ["test results"]
elif stage_id == "test-verify":
required = base + ["test results"]
elif stage_id == "commit-push":
required = base + ["commit"]
elif stage_id == "final-close":
required = base + ["verified facts"]
else:
required = base
return all(r in lower for r in required)
def detect_agent(body):
# P0-1 (2026-05-18) — 첫 줄 (header) 만 검사.
# 이전: body 전체 검색 → Codex 가 evidence 안 [Claude #N] 인용 시 'claude' 오판
# 결과: Stage 2 NO 가 "응답 미감지" 로 처리되어 rewind 누락 → 무한 round loop.
first = (body or "").lstrip().splitlines()
head = first[0] if first else ""
if re.match(r"\[Claude[\s#]", head): return "claude"
if re.match(r"\[Codex[\s#]", head): return "codex"
return None
def parse_remaining_units(body):
"""Codex evidence 의 'Remaining units:' 줄 parse → set of unit ids.
2026-05-17 Fix 1 — continue_same_count 의 progress-based 검증.
2026-05-17 Fix 4 — unit id 패턴 = u<digits> 만 매칭 (Stage 2 schema 와 일치).
Returns set | None (parse 불가 / 줄 없음). 빈 set = 완료 신호."""
if not body:
return None
m = re.search(r"^\s*Remaining[\s_]*units?\s*:\s*(.*)$", body, re.IGNORECASE | re.MULTILINE)
if not m:
return None
raw = m.group(1).strip()
# explicit empty markers
raw_clean = raw.strip().lower()
if raw_clean in ("", "[]", "none", "(none)", "n/a", "-"):
return set()
# Stage 2 schema = id: u1 / u2 / ... → u<digits> 만 매칭. sentence noise (예: "remaining work") 무시.
ids = re.findall(r"\bu\d+\b", raw, re.IGNORECASE)
return set(i.lower() for i in ids) if ids else None
# ═══════════════════════════════════════════════════════════════
# Verification Failure → Rewind Classification
# ═══════════════════════════════════════════════════════════════
# classify_failure 제거 (2026-05-16) — agent 가 rewind_target 을 직접 명시하는 방식.
# 키워드 기반 자동 분류는 잘못 추정 위험 → CONSENSUS_RULE 에서 agent 가 strict 명시 강제.
def save_failure_report(n, from_stage, target_stage, body):
"""검증 실패 보고서 저장"""
ISSUES_DIR.mkdir(parents=True, exist_ok=True)
report = (
f"[Verification Failure Report]\n"
f"Issue: #{n}\n"
f"Failed stage: {from_stage}\n"
f"Rewind to: {target_stage}\n"
f"Failure evidence:\n{body[:2000]}\n"
)
path = ISSUES_DIR / f"{n}_stage_{from_stage}_failed.md"
path.write_text(report, encoding="utf-8")
return path
# ═══════════════════════════════════════════════════════════════
# Rules + Roles (compact)
# ═══════════════════════════════════════════════════════════════
RULES = """=== WORK PRINCIPLES ===
RULE 0 — PIPELINE-CONSTRUCTION (overrides all)
Build GENERAL Phase Z pipeline, NOT sample-passing. Never hardcode MDX 03/04/05.
Evaluate against all 32 frames. Failure must be explainable.
RULE 1: English only. RULE 2: Auto pipeline. RULE 3: Status=3-axis.
RULE 4: Scope-qualified. pytest -q tests. COMMIT SCOPE only.
RULE 5: Factual: value+path+upstream. RULE 6: git add specific files only.
RULE 7: No hardcoding. RULE 8: AI finds 1px first. RULE 9: LLM classifies, code composes.
RULE 10: Don't uncritically accept. RULE 11: Checkpoint. RULE 12: Full paths. RULE 13: Anchor sync.
PZ-1: AI=0 normal. PZ-2: 1turn=1step. PZ-3: No speculative. PZ-4: No silent shrink.
=== COMMENT FORMAT (P5b 2026-05-20 — STRICT, OVERRIDES ALL STAGE-SPECIFIC BODY RULES) ===
The FIRST non-empty line of EVERY Gitea comment MUST start with one of:
[Claude #N] <stage description>
[Codex #N] <stage description>
This rule applies to ALL stages (Stage 1 ~ Stage 6) and ALL issue types
(regular, execution-issue, audit-only). No prefix, no decoration, no banner,
no audit anchor before the agent header. Examples:
CORRECT:
[Codex #3] Stage 2 simulation-plan review — IMP-24
📌 Verification table
...
WRONG (orchestrator detect_agent will fail; stage cannot advance):
📌 **[Claude #3] Stage 2 ...**
## [Codex #3] Stage 2 ...
=== IMPLEMENTATION_UNITS === (header missing entirely)
Audit anchor: ... (preface before header)
This first-line-strict rule OVERRIDES any stage-specific "body MUST contain
ONLY" rule (e.g., COMPACT_PLAN_RULE). Those body rules apply AFTER the
mandatory first-line agent header. Decorations / banners / anchors go on
line 2 or later.
=== CONSENSUS + REWIND (2026-05-16 lock) ===
Final line of every Codex review comment MUST be exactly one of:
FINAL_CONSENSUS: YES
FINAL_CONSENSUS: NO
YES REQUIRES === EVIDENCE === block (commands run, files checked, tests/commit/verified facts as stage requires).
NO evidence = REJECTED.
If NO, the comment MUST also include a line BEFORE FINAL_CONSENSUS:
rewind_target: <enum>
Allowed ENUM:
retry_same # technical_fail ONLY (push network/permission, hook reject). FORBIDDEN for test-verify / final-close.
continue_same # Stage 3 ONLY — current unit verified OK, but more planned units remain. Not a failure.
stage_1_review # rewind to problem-review (root cause / scope-lock wrong)
stage_2_plan # rewind to simulation-plan (plan wrong / missing files / tests)
stage_3_edit # rewind to code-edit (implementation incomplete / scope creep)
stage_4_verify # rewind to test-verify (commit ok but missed regression)
stage_5_push # rewind to commit-push (post-push remote anomaly — rare)
stage_4_verify (test-verify) NO and stage_6 final-close NO: retry_same / continue_same FORBIDDEN. Must rewind to earlier stage.
Stage 3 (code-edit) unit progress: use continue_same per unit; FINAL_CONSENSUS: YES only when ALL implementation_units complete.
TEMP / LOCAL ARTIFACTS: .orchestrator/tmp/ and .orchestrator/drafts/ only."""
C_ROLE = """Claude (analyzer+implementer). Don't blindly agree. Verify code. Think first. [Claude #N]."""
X_ROLE = """Codex (verifier). Verify EVERY claim. Use the verification level required by the CURRENT STAGE.
Do NOT run full pytest unless the stage task explicitly requires it.
With FINAL_CONSENSUS: YES, include === EVIDENCE === block :
=== EVIDENCE ===
Commands run: (list)
Files checked: (list)
Test results: (if Stage 2/4)
Commit SHA: (if Stage 5)
Verified facts: (list, if Stage 6)
With FINAL_CONSENSUS: NO, include rewind_target line BEFORE the consensus line :
rewind_target: stage_1_review | stage_2_plan | stage_3_edit | stage_4_verify | stage_5_push | retry_same | continue_same
FINAL_CONSENSUS: NO
Note: retry_same / continue_same FORBIDDEN for test-verify and final-close stages.
Stage 3 (code-edit): if the single executed unit is correct AND remaining_units is non-empty → continue_same.
Stage 3: if ALL implementation_units complete and verified → FINAL_CONSENSUS: YES.
Stage 3 EVIDENCE block MUST include EXACTLY one of these lines (strict format):
Remaining units: [u2, u3, u4] (bracketed list when units remain)
Remaining units: none (when all units complete)
[Codex #N]."""
# D-axis 2026-05-18 (D-2 dual-write phase) — transition instruction.
# Injected into every agent context pack alongside the existing Gitea POST api hint.
# Goal: agents write their comment body to a local draft file IN ADDITION to (not instead of)
# the normal Gitea POST. Orchestrator will start consuming the drafts in D-3 (comparison),
# D-4 (next-agent context source), and D-5 (full cutover — agent POST forbidden).
# Removed at D-5.
DUAL_WRITE_INSTRUCTION = """=== ORCHESTRATOR DRAFT OUTPUT ===
After posting your normal Gitea comment, save the same comment body to:
{draft_path}
Do not summarize or shorten the draft; it must mirror the Gitea comment body.
The Gitea comment remains required in this transition phase."""
# ═══════════════════════════════════════════════════════════════
# Stages
# ═══════════════════════════════════════════════════════════════
STAGES = [
{"id":"problem-review","label":"Stage 1: 문제 검토","tag":"stage:problem-review",
"c":"Identify root cause. Read issue body + related files. Verify assumptions. Draft scope-lock + guardrails.",
"x":"Verify root cause + scope-lock. grep/find. Flag missing files / wrong assumptions. NO pytest.",
"ef":"root_cause, key_files, scope_lock, out_of_scope, guardrails"},
{"id":"simulation-plan","label":"Stage 2: 시뮬 기반 계획 수립","tag":"stage:simulation-plan",
"c":("Concrete plan covering EVERY axis/item explicitly mentioned in the issue body. "
"First enumerate ALL scoped axes/items from the issue body in a checklist/table. "
"Partial coverage is invalid. "
"For each axis/item include: expected before/after behavior, files to change, "
"per-file changes, tests to add/update, rollback plan, side effects / follow-up issue candidates.\n\n"
"Additionally, MUST include a structured implementation_units block (YAML) that Stage 3 will execute one unit per turn:\n"
" === IMPLEMENTATION_UNITS ===\n"
" - id: u1\n summary: <one-line description>\n files: [<path1>, ...]\n tests: [<path>, ...]\n estimate_lines: <int>\n"
" - id: u2\n ...\n"
"Each unit MUST be atomic — estimate_lines ≤ 50 AND files ≤ 3. If larger, SPLIT into multiple units."),
"x":("Verify plan completeness — every axis/item from the issue body covered with full per-axis details. "
"Partial coverage = NO. Missing files? Tests? Rollback? "
"Run baseline pytest -q tests. Side effects? Cross-check against issue body axes. "
"Verify implementation_units block exists AND each unit ≤ 50 lines / ≤ 3 files. Oversized unit = NO with rewind_target: stage_2_plan."),
"ef":"enumerated_axes, files, per_file_changes, test_plan, rollback, baseline_tests, follow_up_candidates, implementation_units"},
{"id":"code-edit","label":"Stage 3: 코드 수정 / 이슈 분기","tag":"stage:code-edit",
"c":("Implement exactly ONE implementation_unit from the Stage 2 exit report per turn. "
"Do NOT implement multiple units in one turn. "
"FIRST LINE of your Gitea comment: 'Executing unit: <unit_id>'. "
"After editing the files for that single unit, POST a Gitea comment with: "
"unit_executed (id), files_changed (list), diff_summary, remaining_units (list of remaining unit ids — from Stage 2 plan minus units already executed), "
"follow_up_issue_candidates (if scope-lock 외 axis 발견). Then STOP. "
"DO NOT commit or push. The Gitea comment IS the deliverable — stdout is not."),
"x":("Verify only the SINGLE unit executed in this turn. "
"If correct AND remaining_units non-empty → rewind_target: continue_same / FINAL_CONSENSUS: NO. "
"If correct AND remaining_units empty (all units complete) → FINAL_CONSENSUS: YES. "
"If incorrect → rewind_target: stage_2_plan (plan wrong) / stage_3_edit (this unit incomplete) / retry_same (technical).\n\n"
"MANDATORY EVIDENCE LINE FORMAT — include EXACTLY one of:\n"
" Remaining units: [u2, u3, u4] (bracketed list when units remain)\n"
" Remaining units: none (when all units complete)\n"
"Free-form sentences mixing other words on this line will fail orchestrator parse."),
"ef":"unit_executed, files_changed, diff_summary, remaining_units, follow_up_issues_drafted"},
{"id":"test-verify","label":"Stage 4: 테스트 및 검증","tag":"stage:test-verify",
"c":"Run targeted tests + pytest. Verify diff matches plan. Check hardcoding. Regression check. Decide PASS / rewind.",
"x":"Independent test re-run + diff verify. PASS = commit OK signal. FAIL = rewind_target required (no retry_same).",
"ef":"tests_run, test_results, regression_check, diff_summary, pass_decision"},
{"id":"commit-push","label":"Stage 5: 커밋 및 푸쉬","tag":"stage:commit-push",
"c":"git add SPECIFIC files only. git diff --staged. Commit per plan message. Push. Verify remote.",
"x":"Verify commit_sha on origin. Unintended files in commit? Push success? Remote reflects.",
"ef":"commit_sha, push_result, staged_files, remote_verification"},
{"id":"final-close","label":"Stage 6: 최종 확인 / close","tag":"stage:final-close",
"c":"Re-read issue body. Verify commit on origin. Goal vs result. Follow-up links. Labels. Close decision.",
"x":"Final independent verify. PASS = close OK signal. FAIL = rewind_target required (no retry_same).",
"ef":"goal_vs_result, commit_evidence, follow_ups, close_status"},
]
STAGE_IDS = [s["id"] for s in STAGES]
# ═══════════════════════════════════════════════════════════════
# Context Pack
# ═══════════════════════════════════════════════════════════════
def _is_execution_issue(title):
"""P1-4 (2026-05-18) — title 에 '실행-N' 또는 '[IMP-NN 실행-N]' 패턴 있으면 execution sub-issue.
Decomposition 의 child issue 는 parent 가 이미 분석/계획한 작은 axis 만 처리.
Stage 1/2 가 짧고 compact 해야 함 (full design issue 처럼 처리 X)."""
if not title: return False
return bool(re.search(r"\b실행[-\s]\d+\b", title)) or bool(re.search(r"\bexec[-\s]?\d+\b", title, re.IGNORECASE))
# P4 (2026-05-19) — audit-only mode.
# Title-based detection ([INTEGRATION-AUDIT-NN], [AUDIT-ONLY]) + --audit-only CLI 강제.
# 목적: integration audit 류 이슈에서 LLM 이 production code 를 수정하지 못하게 deterministic 가드.
AUDIT_ONLY_OVERRIDE = False # CLI --audit-only 로 main() 에서 set
def _is_audit_issue(title):
"""Title 에 audit 마커 있으면 audit-only mode."""
if not title: return False
if re.search(r"\[(INTEGRATION-AUDIT(?:-\d+)?|AUDIT-ONLY)\b", title, re.IGNORECASE):
return True
return "integration audit" in title.lower()
def _audit_mode(title):
"""audit-only mode 여부. CLI override 또는 title 기반."""
return AUDIT_ONLY_OVERRIDE or _is_audit_issue(title)
# src/ templates/ tests/ = production code surface. audit issue 는 절대 손대면 안 됨.
# 블랙리스트 — 화이트리스트보다 false positive 적음 (data/runs, .orchestrator artifacts 등 자연 통과).
AUDIT_ONLY_FORBIDDEN_PREFIXES = ("src/", "templates/", "tests/")
# P4a (2026-05-19) — Stage 5 commit scope guard. HEAD commit 의 file list 가 이 glob 안에만 있어야.
AUDIT_ALLOWED_COMMIT_GLOBS = (
"docs/architecture/INTEGRATION-AUDIT-*.md",
"docs/architecture/INTEGRATION-AUDIT-*/*", # subdirectory 변형 대응
"docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md",
)
def _audit_baseline_path(n):
"""Per-issue baseline 파일 경로."""
return ORCH_DIR / f"audit_baseline_{n}.json"
def _git_porcelain_paths():
"""git status --porcelain 출력 파싱 — 변경 path set 반환. forward-slash 정규화.
Empty 또는 git 에러 시 빈 set (fail open)."""
try:
r = subprocess.run(
["git", "status", "--porcelain"],
capture_output=True, text=True, encoding="utf-8", errors="replace",
cwd=PROJECT_DIR, timeout=30,
)
if r.returncode != 0:
return set()
except Exception:
return set()
paths = set()
for line in r.stdout.splitlines():
if len(line) < 4: continue
path = line[3:].strip()
if " -> " in path:
path = path.split(" -> ")[-1].strip()
if path.startswith('"') and path.endswith('"'):
path = path[1:-1]
paths.add(path.replace("\\", "/"))
return paths
def _ensure_audit_baseline(n):
"""Audit issue 시작 시점 working tree 의 dirty path set 스냅샷 저장.
이미 baseline 파일 있으면 보존 (resumed run 의 가드 일관성 유지)."""
p = _audit_baseline_path(n)
if p.exists():
return
paths = _git_porcelain_paths()
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(json.dumps(sorted(paths), ensure_ascii=False), encoding="utf-8")
log(f" audit baseline saved: {len(paths)} pre-existing paths (file: {p.name})")
def _load_audit_baseline(n):
"""저장된 baseline path set 로드. 파일 없으면 빈 set."""
p = _audit_baseline_path(n)
if not p.exists():
return set()
try:
data = json.loads(p.read_text(encoding="utf-8"))
return set(data) if isinstance(data, list) else set()
except Exception:
return set()
def _check_audit_only_violations(baseline=None):
"""git status --porcelain 검사. AUDIT_ONLY_FORBIDDEN_PREFIXES 매치 변경 list 반환.
baseline (set of paths) 가 주어지면 그 path 는 violation 에서 제외 — pre-existing dirty 무시.
Returns: list of violating paths (빈 list = 통과)."""
paths = _git_porcelain_paths()
if not paths:
return [] # clean tree or git error — fail open
base = baseline if baseline is not None else set()
bad = []
for norm in paths:
if norm in base:
continue # pre-existing — not a NEW violation
for prefix in AUDIT_ONLY_FORBIDDEN_PREFIXES:
if norm.startswith(prefix):
bad.append(norm)
break
return bad
def _check_audit_commit_scope():
"""P4a — Stage 5 commit scope guard.
HEAD commit 의 file list 가 AUDIT_ALLOWED_COMMIT_GLOBS 안에만 있는지 검증.
Returns: list of paths committed outside allowed scope (빈 list = 통과)."""
import fnmatch
try:
r = subprocess.run(
["git", "show", "HEAD", "--name-only", "--pretty=format:"],
capture_output=True, text=True, encoding="utf-8", errors="replace",
cwd=PROJECT_DIR, timeout=30,
)
if r.returncode != 0:
return [] # git error — fail open
except Exception:
return []
bad = []
for line in r.stdout.splitlines():
path = line.strip().replace("\\", "/")
if not path:
continue
if not any(fnmatch.fnmatch(path, g) for g in AUDIT_ALLOWED_COMMIT_GLOBS):
bad.append(path)
return bad
# P5-2 (2026-05-20) — Dormant trigger guard (L3 layer, issue #58).
# Closed dormant backlog rows (documented:dormant / documented:deferred) carry
# implicit "trigger-on-X" contracts. This helper invokes the standalone
# checker (scripts/check_dormant_triggers.py) which reads the machine-readable
# registry (docs/architecture/DORMANT-TRIGGERS.yaml) and writes activation
# candidates to .orchestrator/dormant_alerts.json.
#
# Guardrails (per Stage 1 scope-lock) :
# - Informational only. Returns the alert list; orchestrator never blocks.
# - manual_evidence_required / followup-linked entries are skipped INSIDE
# the checker (not duplicated here — registry is single source of truth).
# - No LLM call. Deterministic subprocess invocation only.
# - Fail-open : any subprocess / json error returns [] (no false positives).
def _check_dormant_triggers():
"""P5-2 — Run scripts/check_dormant_triggers.py and return the alert list.
Returns: list[dict] of activation-candidate alerts (empty list = no
candidates OR script / parse error). Orchestrator never blocks on this."""
script_path = Path(PROJECT_DIR) / "scripts" / "check_dormant_triggers.py"
if not script_path.exists():
return [] # registry / checker not installed yet — fail open
try:
r = subprocess.run(
[sys.executable, str(script_path)],
capture_output=True, text=True, encoding="utf-8", errors="replace",
cwd=PROJECT_DIR, timeout=30,
)
if r.returncode != 0:
return [] # script error — fail open
except Exception:
return []
alert_path = ORCH_DIR / "dormant_alerts.json"
if not alert_path.exists():
return []
try:
payload = json.loads(alert_path.read_text(encoding="utf-8"))
alerts = payload.get("alerts", [])
return alerts if isinstance(alerts, list) else []
except Exception:
return []
# P1-5 (2026-05-18) — Stage 2 compact rule (모든 issue 적용).
# Stage 2 의 c-role 에 size budget + code snippet 금지 명시. 29 KB plan 차단.
COMPACT_PLAN_RULE = """
COMPACT PLAN REQUIREMENTS (strict):
- The FIRST non-empty line of your comment MUST be the agent header
([Claude #N] ... or [Codex #N] ...). This is enforced by RULES (P5b 2026-05-20)
and OVERRIDES the "body" constraints below. The Stage 2 compact body begins
AFTER the first-line agent header — NOT on line 1.
- Total Stage 2 plan body MUST be ≤ 5,000 chars (4,000 chars target).
- NO code snippets in this comment. Code goes in Stage 3 (code-edit), not Stage 2 plan.
References to file:line locations are fine. Inline code blocks are forbidden.
- After the first-line agent header, the Stage 2 plan body MUST contain ONLY:
a) === IMPLEMENTATION_UNITS === YAML block (units with id/summary/files/tests/estimate_lines)
b) Brief per-unit rationale (≤ 3 lines per unit, no full code)
c) Out-of-scope notes
d) Rollback strategy (1-2 lines)
e) === EVIDENCE === block
f) FINAL_CONSENSUS marker (if you are confident; else expect Codex review)
- Long analysis / rationale / code samples → write to a local file (.orchestrator/drafts/) and reference path, do NOT inline."""
# P1-4 (2026-05-18) — Execution-issue Stage 1/2 prompts (parent body 이미 분석 / 계획됨).
EXECUTION_ISSUE_NOTE = """
EXECUTION-ISSUE MODE (this issue title contains '실행-N' or 'exec-N'):
- This is a child execution issue. The PARENT issue already analyzed scope/plan.
- DO NOT re-derive root cause from scratch. Trust the issue body's scope + acceptance criteria.
- Stage 1 (problem-review): confirm scope-lock matches issue body. ≤ 2,500 chars.
- Stage 2 (simulation-plan): produce IMPLEMENTATION_UNITS YAML only. ≤ 3,500 chars.
Do NOT enumerate parent's axes; focus on THIS issue's single axis.
- Skip deep architectural analysis already done in the parent."""
# P4 (2026-05-19) — audit-only mode prompt block.
# Stage 3 이름이 "코드 수정" 이지만 audit 이슈에서는 절대 source 수정 금지.
# orchestrator 가 Stage 3 YES 게이트에서 git status 직접 검사해 violation 시 자동 rewind.
AUDIT_ONLY_NOTE = """
AUDIT-ONLY MODE (this issue is an integration audit / report-only):
- This issue does NOT modify production source code. Stage 3 = audit report writing, NOT code editing.
- Allowed file changes:
docs/architecture/INTEGRATION-AUDIT-*.md
docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md (only if explicitly in plan)
- FORBIDDEN file changes (orchestrator will auto-reject Stage 3 YES if any of these touched):
src/**, templates/**, tests/**
- If a blocker is found during audit, propose a FOLLOW-UP ISSUE in the report — do NOT modify code in this issue.
- Stage 3 IMPLEMENTATION_UNITS should be audit subtasks (scope_myopia / pipeline_map / conflict_check /
status_integrity / report_assembly / followup_proposal). Each unit's tests: field MUST list verification
commands or report artifacts (NOT pytest tests:[] which the orchestrator rejects).
- Stage 5 commit = only audit report files. pipeline run artifacts under data/runs/ or .orchestrator/
are evidence-only and must NOT be staged for commit.
- COMMENT FORMAT (CRITICAL — orchestrator detect_agent is first-line strict, P0-1):
The FIRST non-empty line of every Gitea comment MUST be exactly one of:
[Claude #<N>] <stage description>
[Codex #<N>] <stage description>
Audit anchor citation, banners, prefaces of any kind MUST appear AFTER the first line
(line 2 or later). If you put `Audit anchor:` or any other preface BEFORE the [Claude #N] /
[Codex #N] header, the orchestrator will fail to detect the agent and the stage cannot
advance — your work will be discarded and re-attempted with token waste.
Correct example:
[Codex #14] Stage 4 test-verify — INTEGRATION-AUDIT-02
Audit anchor: This audit verifies pipeline contracts...
...
FINAL_CONSENSUS: YES
"""
def build_context_pack(n, title, body, sid, agent, rnd, start_cnt, compact=None):
idx = STAGE_IDS.index(sid); si = STAGES[idx]
role = C_ROLE if agent == "claude" else X_ROLE
task = si["c"] if agent == "claude" else si["x"]
prior = load_all_exit_reports(n, idx)
# P1-4/P1-5 (2026-05-18) — execution-issue + Stage 2 compact rule
# P4 (2026-05-19) — audit-only mode injection (모든 stage 에 prompt 가드 + Stage 3 git diff 가드 별도)
extras = []
if sid == "simulation-plan":
extras.append(COMPACT_PLAN_RULE)
if _is_execution_issue(title):
extras.append(EXECUTION_ISSUE_NOTE)
if _audit_mode(title):
extras.append(AUDIT_ONLY_NOTE)
extras_text = "".join(extras)
# 검증 실패 보고서 (rewind 시 이전 실패 맥락 전달).
# 2026-05-16 — issue state 의 failure_report_path 를 source-of-truth 로.
# 모든 stage NO (test-verify/final-close 뿐 아니라 code-edit 등) 의 from_stage 캐치.
# P7 (2026-05-26) — banned approaches injection (Codex CLI helper consensus).
# failure_report 본문에서 known anti-pattern keyword 추출 → BANNED_APPROACHES block 생성
# → 다음 round prompt 에 strong-marker 로 inject. 동일 방식 재제안 방지 (#84 round loop).
failure_ctx = ""
ist_fc = get_issue_state(n)
fr_path_str = ist_fc.get("failure_report_path")
if fr_path_str:
fail_path = Path(fr_path_str)
if fail_path.exists():
from_sid = ist_fc.get("failure_from_stage", "?")
fail_body = fail_path.read_text(encoding='utf-8')
# P7 — extract banned approach signals (deterministic keyword scan).
# 각 entry: (regex, label, why). escape_hatch 는 future patch 의 JSON 구조 에서 형식화.
# 현재 단계 = prompt-injection 만 (Codex 단계화 안의 "즉시 patch" layer).
banned_signals = [
(r"tests:\s*\[\s*\]",
"tests: [] empty test list per implementation unit",
"Orchestrator strict rule — 1 unit = impl + test inseparable. NOT allowed to defer tests to later units."),
(r"@testing-library|jsdom|render\s*\(|screen\.",
"DOM mount-based vitest (render() / screen / @testing-library)",
"Front/package.json devDependencies has no jsdom / @testing-library/react. Mount-based tests cannot run."),
(r"toast\.error\s*\(\s*formatAiRepairHumanReviewMessage",
"Home.tsx formatAiRepairHumanReviewMessage toast.error removal",
"Post-#92 commit 896f273 rewrote the formatter to operational-only channel. Removing toast call = operational alert regression."),
(r"git\s+add\s+(-A|--all|\.)\b",
"git add -A / git add . / git add --all",
"Untracked artifact pollution risk. Stage 5 must add only files in unit's declared `files:` list explicitly."),
]
hits = []
for pat, label, why in banned_signals:
if re.search(pat, fail_body, re.IGNORECASE):
hits.append((label, why))
banned_block = ""
if hits:
banned_block = "\n=== BANNED APPROACHES (previously rejected — DO NOT REUSE) ===\n"
for i, (label, why) in enumerate(hits, 1):
banned_block += f"{i}. {label}\n reason: {why}\n"
banned_block += (
"BINDING: re-proposing any banned approach above = automatic FINAL_CONSENSUS: NO. "
"If environment/preconditions changed (e.g., new package install), state the EVIDENCE "
"of the change BEFORE re-proposal.\n"
)
failure_ctx = (
f"\n\n=== REWIND: FAILURE REPORT (from {from_sid}) ===\n"
f"{fail_body[:1500]}\n"
f"{banned_block}"
f"Fix the issues above before re-attempting.\n"
)
# D-4 (2026-05-18) — local draft transcript with Gitea fallback.
# 1. 우선 local drafts 수집 (현재 stage, 현재 호출 이전까지)
# 2. drafts 존재 → local transcript 사용 (속도 + outage 무관)
# 3. drafts 비어있음 (D-2 prompt 무시 / 첫 round 등) → 기존 Gitea path fallback
drafts = _collect_stage_drafts(n, sid, agent, rnd)
if drafts:
# local draft path — limit to last N entries (mirror existing recent[-8:] semantic)
window = COMPACT_EVERY * 2 if compact else 8
recent_drafts = drafts[-window:]
c_text = "\n---\n".join([f"[{ag} r{r}] {body}" for r, ag, body in recent_drafts])
else:
# fallback — original Gitea-based recent comments (기존 흐름 그대로)
all_c = get_comments(n)
stage_c = all_c[start_cnt:]
if compact:
recent = stage_c[-(COMPACT_EVERY*2):]
else:
recent = stage_c[-8:]
c_text = "\n---\n".join([
f"[{detect_agent(c['body']) or '?'}] {c['body'][:3000]}" for c in recent
]) or "(none)"
api = f"POST comment: {GITEA_URL}/api/v1/repos/{GITEA_REPO}/issues/{n}/comments | token $GITEA_TOKEN"
# D-axis 2026-05-18 (D-2 dual-write) — draft path for this (agent, round).
# Agent must write the same comment body to this path AND POST to Gitea (existing flow).
draft_path = _draft_path(n, sid, agent, rnd)
dual_write = DUAL_WRITE_INSTRUCTION.format(draft_path=str(draft_path))
pack = (
f"ISSUE #{n}: {title}\nURL: {GITEA_URL}/{GITEA_REPO}/issues/{n}\n\n"
f"=== ISSUE BODY ===\n{body}\n\n"
f"=== COMPLETED STAGE EXIT REPORTS (binding contracts) ===\n{prior}\n\n"
f"{failure_ctx}"
f"=== CURRENT: {si['label']} Round #{rnd} ===\nTask: {task}{extras_text}\n\n"
f"{('=== MID-STAGE COMPACTION ==='+chr(10)+compact+chr(10)*2) if compact else ''}"
f"=== RECENT COMMENTS (current stage) ===\n{c_text}\n\n"
f"DO NOT read all Gitea comments. Exit reports are binding contracts.\n\n"
f"{RULES}\n{role}\n{api}\n\n{dual_write}\n"
)
log(f" context pack: {len(pack):,} chars")
return pack
# ═══════════════════════════════════════════════════════════════
# Compaction / Exit Report
# ═══════════════════════════════════════════════════════════════
def generate_compaction(n, sid, comments, rnd):
text = "\n---\n".join([f"[{detect_agent(c['body']) or '?'}] {c['body'][:2000]}" for c in comments])
prompt = f"Summarize this discussion (under 500 words). Agreed, rejected, open, evidence.\n\n{text}"
try:
# P3-1 — _run_with_tree_kill: parent/grandchild cleanup 보장.
r = _run_with_tree_kill(
[CLAUDE_EXE, "-p", "--dangerously-skip-permissions", prompt],
encoding="utf-8", timeout=300, cwd=PROJECT_DIR)
if r.returncode == 0 and r.stdout and r.stdout.strip():
p = ISSUES_DIR / f"{n}_stage_{sid}_compact_r{rnd}.md"
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(r.stdout.strip(), encoding="utf-8")
return r.stdout.strip()
except: pass
return None
def generate_and_post_exit_report(n, sid):
si = STAGES[STAGE_IDS.index(sid)]
comments = get_comments(n)
codex_yes = claude_last = None
for c in reversed(comments):
a = detect_agent(c["body"])
if a == "codex" and has_consensus(c["body"]) and not codex_yes: codex_yes = c
if a == "claude" and not claude_last: claude_last = c
if codex_yes and claude_last: break
basis = ""
if claude_last: basis += f"[Claude]\n{claude_last['body'][:2000]}\n\n"
if codex_yes: basis += f"[Codex YES]\n{codex_yes['body'][:2000]}\n"
# Fix 2 (2026-05-17) — Stage 2 의 IMPLEMENTATION_UNITS YAML block 은 2000 char
# truncation 에 잘릴 수 있음. 별도 추출해서 *항상* prompt 에 verbatim 포함.
# 2026-05-17 (Codex 추가 fix B) — 검색 범위 = current stage comments (state.stage_start_count
# 이후). YES guard 와 기준 통일. 이전 stage / round 의 stale block 회수 방지.
iu_block = ""
if sid == "simulation-plan":
iu_pat_re = re.compile(r"===\s*IMPLEMENTATION_UNITS\s*===\s*\n(.*?)(?=\n===\s|\Z)",
re.DOTALL | re.IGNORECASE)
# current stage comments 범위 — state 의 stage_start_count source-of-truth.
ist_g = get_issue_state(n)
sc = ist_g.get("stage_start_count")
scope = comments[sc:] if isinstance(sc, int) else comments[-10:] # fallback
# 최신 match 우선 — reverse iteration. 가장 최근 IU block 채택.
for src_comment in reversed(scope):
full = src_comment.get("body", "")
m = iu_pat_re.search(full)
if m:
iu_block = m.group(0).strip()
break
# Fix 3 (2026-05-17) — Stage 2 의 exit report 는 implementation_units YAML block 을
# *verbatim* 보존해야 Stage 3 가 unit-per-turn 으로 동작 가능. summary 금지.
stage2_extra = ""
if sid == "simulation-plan":
stage2_extra = (
"\nCRITICAL — preserve the === IMPLEMENTATION_UNITS === YAML block VERBATIM "
"from the agreed Claude/Codex comments. Do NOT summarize, paraphrase, or "
"compress that block. Stage 3 will parse it unit-by-unit.\n"
)
if iu_block:
stage2_extra += (
f"\nReference (use this exact block verbatim in the exit report) :\n"
f"{iu_block}\n"
)
prompt = (
f"Generate EXIT REPORT for {si['label']} issue #{n}.\n"
f"Format:\n"
f"📌 **[오케스트레이터] {si['label']} 완료**\n"
f"■ 핵심 결정 (Korean 3-5줄)\n■ 범위 제외\n■ 다음 단계\n\n"
f"=== EXIT REPORT (English, binding contract) ===\n"
f"Fields: {si['ef']}\n"
f"Include: unresolved_questions, guardrails, evidence, source_comment_ids, commit_sha\n"
f"{stage2_extra}\n"
f"=== BASIS ===\n{basis}\n"
f"Under 600 words for non-block prose (the IMPLEMENTATION_UNITS YAML block does NOT count). Facts only.\n"
)
log(" Exit report 생성...")
try:
# P3-1 — tree-safe subprocess.
r = _run_with_tree_kill(
[CLAUDE_EXE, "-p", "--dangerously-skip-permissions", prompt],
encoding="utf-8", timeout=300, cwd=PROJECT_DIR)
if r.returncode == 0 and r.stdout and r.stdout.strip():
report = r.stdout.strip()
save_exit_report(n, sid, report)
# Fix 9 (Phase A-3a) — Gitea POST 제거. local *_exit.md 가 binding contract.
log_orchestrator_event(n, f"exit report saved: stage={sid} ({len(report)} chars)")
log(f" Exit report 완료")
return report
except Exception as e:
log(f" (exit report failed: {e})")
fb = f"📌 **[오케스트레이터]** {si['label']} 완료\n\n{basis[:1000]}"
# Codex last fix (2026-05-17) — Stage 2 fallback 도 IU block 포함.
# exit report Claude 생성 실패 시 fallback path 진입 → Stage 3 binding contract 에
# IU block 누락 = unit-per-turn 깨짐. 정상 path 와 동일하게 iu_block 보존.
if sid == "simulation-plan" and iu_block:
fb += f"\n\n{iu_block}\n"
save_exit_report(n, sid, fb)
# Fix 9 (Phase A-3a) — fallback path. local *_exit.md 가 truth. Gitea POST 제거.
log_orchestrator_event(n, f"exit report saved (fallback): stage={sid} ({len(fb)} chars)")
return fb
# ═══════════════════════════════════════════════════════════════
# Agents
# ═══════════════════════════════════════════════════════════════
def _save_agent_stdout(agent, stdout, stderr):
"""2026-05-17 stdout 캡처 — 진단 도구.
Claude 가 stdout 으로 답하고 Gitea POST 안 한 case 의 원인 분석용.
file = .orchestrator/tmp/{agent}_last_stdout.txt / _last_stderr.txt.
bytes / str 둘 다 받음 (TimeoutExpired 는 bytes)."""
def _norm(x):
if x is None: return ""
if isinstance(x, bytes):
try: return x.decode("utf-8", "replace")
except: return repr(x)
return str(x)
TMP_DIR.mkdir(parents=True, exist_ok=True)
try:
(TMP_DIR / f"{agent}_last_stdout.txt").write_text(_norm(stdout), encoding="utf-8")
(TMP_DIR / f"{agent}_last_stderr.txt").write_text(_norm(stderr), encoding="utf-8")
except Exception as e:
log(f" (stdout capture failed: {e})")
def run_claude(prompt):
log(" Claude...")
# Fix 5 (2026-05-17) — Windows CreateProcess command-line limit (≈32,767 chars).
# context_pack 이 35 KB+ 일 때 [WinError 206] 파일 이름이나 확장명이 너무 깁니다.
# prompt 를 stdin 으로 전달 → arg length 제한 무관.
# P3-1 (2026-05-18) — _run_with_tree_kill 로 변경. orphan grandchild 정리 보장.
try:
r = _run_with_tree_kill(
[CLAUDE_EXE, "-p", "--dangerously-skip-permissions"],
input=prompt,
encoding="utf-8", errors="replace",
timeout=AGENT_TIMEOUT, cwd=PROJECT_DIR,
)
_save_agent_stdout("claude", r.stdout, r.stderr)
if r.returncode != 0:
log(f" Claude FAILED: returncode={r.returncode}")
if r.stderr: log(f" stderr: {r.stderr[-500:]}")
if r.stdout: log(f" stdout(tail): {r.stdout[-500:]}")
return False
# 성공 시에도 stdout tail 출력 — POST 명령 시도 흔적 확인용.
if r.stdout:
lines = r.stdout.strip().splitlines()
log(f" Claude OK: {len(lines)} lines, {len(r.stdout):,} chars")
log(f" stdout(tail): {r.stdout.strip()[-300:]}")
return True
except subprocess.TimeoutExpired as e:
# Fix 4 (2026-05-17) — partial stdout/stderr 저장. 진단 데이터 보존.
_save_agent_stdout("claude", e.stdout, e.stderr)
partial = len(e.stdout) if e.stdout else 0
log(f" Claude TIMEOUT ({AGENT_TIMEOUT}s) — partial stdout {partial} bytes saved")
return False
except Exception as e:
log(f" Claude EXCEPTION: {type(e).__name__}: {e}")
return False
def run_codex(prompt):
log(" Codex...")
pf = TMP_DIR / "codex_prompt.txt"
pf.parent.mkdir(parents=True, exist_ok=True)
pf.write_text(prompt, encoding="utf-8")
# P3-1 (2026-05-18) — _run_with_tree_kill 로 변경. Codex CLI 가 띄우는 grandchild 정리.
try:
r = _run_with_tree_kill(
[CODEX_CMD, "exec", "--sandbox", "danger-full-access",
f"Read the file {pf} and follow the instructions inside it exactly."],
encoding="utf-8", errors="replace",
timeout=AGENT_TIMEOUT, cwd=PROJECT_DIR,
)
_save_agent_stdout("codex", r.stdout, r.stderr)
if r.returncode != 0:
log(f" Codex FAILED: returncode={r.returncode}")
if r.stderr: log(f" stderr: {r.stderr[-500:]}")
if r.stdout: log(f" stdout(tail): {r.stdout[-500:]}")
return False
if r.stdout:
lines = r.stdout.strip().splitlines()
log(f" Codex OK: {len(lines)} lines, {len(r.stdout):,} chars")
log(f" stdout(tail): {r.stdout.strip()[-300:]}")
return True
except subprocess.TimeoutExpired as e:
# Fix 4 (2026-05-17) — partial stdout/stderr 저장.
_save_agent_stdout("codex", e.stdout, e.stderr)
partial = len(e.stdout) if e.stdout else 0
log(f" Codex TIMEOUT ({AGENT_TIMEOUT}s) — partial stdout {partial} bytes saved")
return False
except Exception as e:
log(f" Codex EXCEPTION: {type(e).__name__}: {e}")
return False
def wait_comment(n, prev, timeout=1800):
# Fix 6 (2026-05-17) — gitea API transient timeout/error 시 polling 유지.
# 이전: get_comments 가 raise → wait_comment 가 crash → main 종료.
elapsed = 0
while elapsed < timeout:
time.sleep(POLL_INTERVAL); elapsed += POLL_INTERVAL
try:
cs = get_comments(n)
except Exception as e:
log(f" ⚠️ get_comments error: {type(e).__name__}: {e} — retry next poll")
continue
if len(cs) > prev: return cs
if elapsed % 60 == 0: log(f" ... {elapsed}s")
return None
# ═══════════════════════════════════════════════════════════════
# Stage Runner
# ═══════════════════════════════════════════════════════════════
def run_stage(n, title, body, sid):
si = STAGES[STAGE_IDS.index(sid)]
header(f"#{n}: {title}\n {si['label']}")
set_label(n, si["tag"])
# 재시작 복구: stage_start_stage가 현재 stage와 일치할 때만 재사용
# P0-2 (2026-05-18) — slicing sanity. 외부에서 comment 삭제됐을 때 start_cnt > 실제 count
# 상태 가능 → comments[start_cnt:] = 빈 slice → "현재 stage comment 없음" 으로 오판.
# 대응: 실제 comment count 와 비교해서 stale 이면 재산정.
ist = get_issue_state(n)
if (ist.get("stage") == sid
and ist.get("stage_start_stage") == sid
and ist.get("stage_start_count") is not None):
start_cnt = ist["stage_start_count"]
actual = len(get_comments(n))
if start_cnt > actual:
log(f" ⚠️ stage_start_count={start_cnt} > actual comments={actual} — 외부 삭제 감지. resetting to {actual}.")
start_cnt = actual
update_issue_state(n, stage_start_count=start_cnt)
else:
log(f" (resumed: stage_start_count={start_cnt})")
else:
comments = get_comments(n)
start_cnt = len(comments)
update_issue_state(n, stage=sid, stage_start_stage=sid, stage_start_count=start_cnt)
cr = xr = 0
compact = load_latest_compaction(n, sid)
if compact:
log(f" (loaded compaction: {len(compact):,} chars)")
backoff = 30 # exponential: 30→60→120→300 cap
while True:
comments = get_comments(n); count = len(comments)
if get_issue(n)["state"] == "closed": log("Closed externally"); return False
rnd = cr + 1
log(f" round={rnd} stage_comments={count - start_cnt}")
# Mid-stage compaction
if rnd > 1 and (rnd - 1) % COMPACT_EVERY == 0:
log(" Compaction...")
compact = generate_compaction(n, sid, comments[start_cnt:], rnd)
if compact: log(f" Compacted: {len(compact):,} chars")
# Claude
cr += 1; divider(f"{si['label']} — Claude #{cr}")
p = build_context_pack(n, title, body, sid, "claude", cr, start_cnt, compact)
if not run_claude(p):
log(f" retry in {backoff}s"); time.sleep(backoff); backoff = min(backoff * 2, 300); continue
backoff = 30
updated = wait_comment(n, count)
if not updated:
log(f" no comment, retry in {backoff}s"); time.sleep(backoff); backoff = min(backoff * 2, 300); continue
backoff = 30
comments = updated; count = len(comments)
# D-3 (2026-05-18) — log-only dual-write verification. Does NOT interrupt flow.
_verify_dual_write(n, sid, "claude", cr, comments[-1].get("body", ""))
# Codex
xr += 1; divider(f"{si['label']} — Codex #{xr}")
p = build_context_pack(n, title, body, sid, "codex", xr, start_cnt, compact)
if not run_codex(p):
log(f" retry in {backoff}s"); time.sleep(backoff); backoff = min(backoff * 2, 300); continue
backoff = 30
updated = wait_comment(n, count)
if not updated:
log(f" no comment, retry in {backoff}s"); time.sleep(backoff); backoff = min(backoff * 2, 300); continue
backoff = 30
comments = updated
# D-3 (2026-05-18) — log-only dual-write verification. Does NOT interrupt flow.
_verify_dual_write(n, sid, "codex", xr, comments[-1].get("body", ""))
# Consensus + Evidence check (2026-05-16 rewind dispatcher)
last = comments[-1]["body"]
is_codex = detect_agent(last) == "codex"
if not is_codex:
log(f" Codex 응답 미감지 — first line: {last.lstrip().splitlines()[0][:80]!r}" if last and last.strip() else " Codex 응답 미감지 — empty body")
# P5b (2026-05-20) — detect_agent None 시 supplement 가드.
# 범위 변경: audit-only 제한 해제 — 모든 issue 에서 작동 (#24 같은 일반 이슈 silent loop fix).
# Throttle: 현재 stage 안에 이미 N (=2) 회 supplement 가 누적되면 stop + user-action-required.
# 직전 N supplement 가 박혀도 LLM 이 또 위반하면 4 번째 round 부터는 hard stop.
SUPP_MAX = 2
SUPP_MARKER = "⚠️ **[Orchestrator]** Agent header missing"
stage_cmts = comments[start_cnt:]
supp_count = sum(1 for c in stage_cmts if (c.get("body") or "").lstrip().startswith(SUPP_MARKER))
if supp_count >= SUPP_MAX:
log(f"⛔ Agent header supplement {supp_count}/{SUPP_MAX} reached — STOP (user action required)")
try: gitea(f"issues/{n}/comments", "POST", {"body":
f"⛔ **[Orchestrator]** STOP — Stage `{sid}` cannot advance.\n\n"
f"`detect_agent` failed {supp_count}+ times in this stage. The LLM is not honoring "
f"the first-line agent header contract despite supplements.\n\n"
"**Action required (human)**: review last few comments, ensure FIRST non-empty line is "
"`[Claude #N]` or `[Codex #N]`, then restart `python -u .\\orchestrator.py --issue {n}`.\n\n"
"Orchestrator run is exiting this issue to prevent further token waste."})
except: pass
return False # exit run_stage → run_issue treats as external close → moves on
try: gitea(f"issues/{n}/comments", "POST", {"body":
f"{SUPP_MARKER} — orchestrator `detect_agent` could not find "
"`[Claude #N]` or `[Codex #N]` on the first non-empty line.\n\n"
"**Comment format contract (P5b 2026-05-20, see RULES)**:\n"
"The FIRST non-empty line of EVERY Gitea comment (both Claude and Codex, ALL stages) MUST be:\n"
" `[Claude #N] <stage description>`\n"
" `[Codex #N] <stage description>`\n\n"
"No prefix. No decoration. No banner. No audit anchor before the header.\n"
"Decorations (`📌`, `##`, `**`, audit anchor, etc.) go on line 2 or later.\n\n"
"This rule OVERRIDES any stage-specific 'body MUST contain ONLY' rule (e.g., COMPACT_PLAN_RULE) — "
"those body rules apply AFTER the mandatory first-line agent header.\n\n"
f"Supplement count for this stage: {supp_count + 1}/{SUPP_MAX}. "
f"At {SUPP_MAX}+ violations the orchestrator will hard-stop this issue."})
except: pass
continue
status, target = parse_consensus(last)
# YES 처리 — evidence 검증
if status == "YES":
if has_evidence(last, sid):
# Fix 1 (2026-05-17 A안) — Stage 3 YES 는 Remaining units: none 강제.
# remaining_units 가 비어있어야 모든 unit 완료. non-empty/parse-fail YES = 모순.
if sid == "code-edit":
cur_remaining_yes = parse_remaining_units(last)
if cur_remaining_yes is None:
log("⚠️ Stage 3 YES but Remaining units line missing — supplement requested")
try: gitea(f"issues/{n}/comments", "POST", {"body":
"⚠️ **[Orchestrator]** Stage 3 FINAL_CONSENSUS: YES requires a parseable line in the EVIDENCE block:\n\n"
" Remaining units: none (when all implementation_units complete)\n\n"
"Without this, orchestrator cannot verify all units were executed."})
except: pass
continue
if cur_remaining_yes:
log(f"⚠️ Stage 3 YES but Remaining units non-empty ({sorted(cur_remaining_yes)}) — contradiction")
try: gitea(f"issues/{n}/comments", "POST", {"body":
f"⚠️ **[Orchestrator]** Contradiction: FINAL_CONSENSUS: YES but Remaining units: {sorted(cur_remaining_yes)}.\n\n"
"If units remain → rewind_target: continue_same / FINAL_CONSENSUS: NO.\n"
"If all complete → `Remaining units: none`."})
except: pass
continue
# Fix 2 (2026-05-17 A안) — Stage 2 YES 는 IMPLEMENTATION_UNITS YAML block 존재 강제.
# Stage 3 의 unit-per-turn 동작은 이 block parse 에 의존.
# 2026-05-17 (Codex 추가 fix A) — 검색 범위 = current stage comments 만
# (comments[start_cnt:]). 이전 round / stage 의 stale block 으로 통과 방지.
# 2026-05-17 (Codex 추가 fix B) — 헤더만으로는 부족. block body 안에
# 최소 1 개의 `- id: u\d+` entry 가 있어야 통과. 빈 block silent pass 차단.
if sid == "simulation-plan":
iu_block_pat = re.compile(
r"===\s*IMPLEMENTATION_UNITS\s*===\s*\n(.*?)(?=\n===\s|\Z)",
re.IGNORECASE | re.DOTALL,
)
iu_unit_pat = re.compile(r"^\s*-\s*id:\s*u\d+", re.IGNORECASE | re.MULTILINE)
# P1-6 (2026-05-18) — tests:[] 단위 금지 직접 강제.
# #45 Codex #2 가 catch 한 violation 을 orchestrator 가 *Codex 가기 전에* 차단.
# 패턴: 'tests: []' 또는 'tests:[]' (whitespace 변형 포함)
iu_tests_empty_pat = re.compile(
r"^\s*tests\s*:\s*\[\s*\]\s*$", re.IGNORECASE | re.MULTILINE)
def _iu_valid(text):
m = iu_block_pat.search(text or "")
if not m: return (False, "block missing")
block_body = m.group(1)
if not iu_unit_pat.search(block_body):
return (False, "no `- id: u<N>` entry")
if iu_tests_empty_pat.search(block_body):
return (False, "unit with `tests: []` (forbidden — implementation + tests = same unit)")
return (True, "")
ok, reason = _iu_valid(last)
# P7 (2026-05-26) — fallback skip when last YES body itself is invalid.
# 이전: last invalid → comments[start_cnt:] 에서 valid block 찾아 구제 →
# orchestrator 자기 supplement comment 의 Example block 이 valid 로 통과 (#84 round 5 슬립).
# 변경: last 가 진짜 invalid 면 fallback 자체 skip. 단 last 의 _iu_valid 실패가
# "block missing" 인 경우만 (Codex 가 YAML block 을 안 echo 한 경우) 이전 round 의
# Claude plan 으로 fallback — 단 orchestrator-authored supplement 는 제외.
if not ok and reason == "block missing":
for c in comments[start_cnt:]:
body = c.get("body", "") or ""
# exclude orchestrator-authored supplement comments (own example block trap)
ls = body.lstrip()
if ls.startswith("⚠️ **[Orchestrator]**") or \
ls.startswith("📌 **[오케스트레이터]**") or \
ls.startswith(" **[Orchestrator]**"):
continue
ok2, _ = _iu_valid(body)
if ok2:
ok = True; break
if not ok:
log(f"⚠️ Stage 2 YES but IMPLEMENTATION_UNITS invalid ({reason}) — supplement requested")
try: gitea(f"issues/{n}/comments", "POST", {"body":
f"⚠️ **[Orchestrator]** Stage 2 FINAL_CONSENSUS: YES rejected: {reason}.\n\n"
"Requirements (strict):\n"
"- `=== IMPLEMENTATION_UNITS ===` block with at least one `- id: u<N>` entry\n"
"- Each unit MUST include `tests: [<path>, ...]` (NOT `tests: []`)\n"
"- Implementation + tests = same unit (no deferring tests to later units)\n\n"
"Example:\n"
" === IMPLEMENTATION_UNITS ===\n"
" - id: u1\n summary: ...\n files: [...]\n tests: [tests/.../test_xxx.py]\n estimate_lines: <int>\n"})
except: pass
continue
# P4 (2026-05-19) — AUDIT-ONLY guard: Stage 3 (code-edit) YES 직전 git status 검사.
# src/templates/tests 변경 있으면 자동 reject + supplement 요청. LLM 양심 무관 deterministic.
# P4a (2026-05-19) — baseline subtraction. audit 시작 시점 dirty path 는 제외 —
# Claude 가 새로 만든 forbidden 변경만 잡음.
if sid == "code-edit" and _audit_mode(title):
baseline = _load_audit_baseline(n)
bad = _check_audit_only_violations(baseline)
if bad:
log(f"⚠️ AUDIT-ONLY violation — Stage 3 YES rejected: {len(bad)} forbidden file change(s)")
log(f" violations (first 5): {bad[:5]}")
try: gitea(f"issues/{n}/comments", "POST", {"body":
"⚠️ **[Orchestrator]** AUDIT-ONLY mode violation: Stage 3 YES rejected.\n\n"
"This issue is in audit-only mode. Production code changes are forbidden.\n\n"
f"NEW forbidden file changes detected ({len(bad)} file(s), beyond pre-existing baseline):\n" +
"\n".join(f" - `{v}`" for v in bad[:20]) +
("\n - ... (truncated)" if len(bad) > 20 else "") + "\n\n"
"Revert these changes and limit Stage 3 outputs to:\n"
"- `docs/architecture/INTEGRATION-AUDIT-*.md`\n"
"- `docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md` (only if planned)\n\n"
"If a blocker was found, propose a follow-up issue in the audit report — "
"do NOT modify production code in this audit issue."})
except: pass
continue
# P4a (2026-05-19) — Stage 5 commit scope guard.
# 'git add -A' 같은 명령으로 dirty WIP 가 audit commit 에 섞이는 사고 방지.
# HEAD commit 의 파일 list 가 AUDIT_ALLOWED_COMMIT_GLOBS 안에만 있어야 함.
if sid == "commit-push" and _audit_mode(title):
out_of_scope = _check_audit_commit_scope()
if out_of_scope:
log(f"⚠️ AUDIT-ONLY violation — Stage 5 YES rejected: HEAD commit includes {len(out_of_scope)} out-of-scope file(s)")
log(f" out-of-scope (first 5): {out_of_scope[:5]}")
try: gitea(f"issues/{n}/comments", "POST", {"body":
"⚠️ **[Orchestrator]** AUDIT-ONLY mode violation: Stage 5 YES rejected.\n\n"
"The HEAD commit includes files outside the audit-allowed scope.\n\n"
f"Out-of-scope files in HEAD commit ({len(out_of_scope)} file(s)):\n" +
"\n".join(f" - `{v}`" for v in out_of_scope[:20]) +
("\n - ... (truncated)" if len(out_of_scope) > 20 else "") + "\n\n"
"Allowed commit scope:\n"
"- `docs/architecture/INTEGRATION-AUDIT-*.md`\n"
"- `docs/architecture/INTEGRATION-AUDIT-*/*` (subdirectory variants)\n"
"- `docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md`\n\n"
"Remediation (use --force-with-lease, NOT plain --force):\n"
"```\n"
"git reset --soft HEAD~1\n"
"git restore --staged <out-of-scope files>\n"
"git commit -m '<audit commit message>'\n"
"git push --force-with-lease origin <branch>\n"
"```\n\n"
"Do NOT use `git add -A` or `git add .` in audit-only mode. "
"Stage only the audit report files explicitly."})
except: pass
continue
# P5-2 (2026-05-20) — Dormant trigger guard (L3 layer, issue #58).
# Stage 4 (test-verify) PASS → run dormant trigger checker against the
# current change surface. If alerts written, post INFORMATIONAL supplement
# comment. NEVER blocks Stage 5 entry (checker is exit 0; helper fail-open).
# Audit-only issues skip — their change surface is restricted to audit docs,
# which the registry does not watch.
if sid == "test-verify" and not _audit_mode(title):
alerts = _check_dormant_triggers()
if alerts:
log(f" Dormant trigger guard: {len(alerts)} activation candidate(s) detected (informational)")
try: gitea(f"issues/{n}/comments", "POST", {"body":
" **[Orchestrator]** Dormant trigger guard — informational alert (does NOT block Stage 5).\n\n"
"The following closed dormant backlog axes have changed-file evidence matching their "
"activation triggers. Registry: `docs/architecture/DORMANT-TRIGGERS.yaml`. "
"Alert artifact: `.orchestrator/dormant_alerts.json`.\n\n" +
"\n".join(
f"- **#{a.get('issue')}** {a.get('title')}"
f"`{(a.get('on_trigger') or {}).get('action', '?')}` "
f"({len(((a.get('match') or {}).get('files')) or [])} file(s))"
for a in alerts[:10]
) +
("\n - ... (truncated)" if len(alerts) > 10 else "") + "\n\n"
"Recommended next step : open a follow-up issue using the `template:` field in the "
"registry, OR acknowledge in the next stage comment. Stage 5 proceeds regardless."})
except: pass
# Never `continue` — checker is informational only (Stage 1 guardrail).
# P7 (2026-05-26) — final-close YES casual self-contradiction inline guard.
# parse_consensus 는 건드리지 않음 (다른 caller 영향 차단). YES 처리 block 안에서
# sid == "final-close" 인 경우만 casual contradiction 검사.
#
# 설계 의도 분기 (Patch B 와 분담) :
# - explicit `disposition: KEEP_OPEN_*` line 이 있으면 = 의도된 keep-open
# → 이 guard 통과 → Patch B (close PATCH skip) 가 처리.
# - explicit disposition line 없이 "NO close signal" 또는 "DO NOT CLOSE"
# casual 표현 만 있으면 = self-contradiction → supplement + continue.
#
# cf. #83 IMP-83 case = YES + explicit `disposition: KEEP_OPEN_AS_UMBRELLA_ANCHOR`
# → 통과 (Patch B 가 close skip).
if sid == "final-close":
has_explicit_disposition = bool(re.search(
r"^\s*disposition\s*:\s*KEEP_OPEN",
last, re.IGNORECASE | re.MULTILINE))
if not has_explicit_disposition:
casual_contradiction_patterns = [
(r"NO\s+close\s+signal", "NO close signal"),
(r"DO\s*NOT\s*CLOSE", "DO NOT CLOSE"),
]
hit = None
for p, label in casual_contradiction_patterns:
if re.search(p, last, re.IGNORECASE):
hit = label; break
if hit:
log(f"⚠️ Stage 6 YES casual self-contradiction ({hit}) — supplement requested")
try: gitea(f"issues/{n}/comments", "POST", {"body":
f"⚠️ **[Orchestrator]** Stage 6 FINAL_CONSENSUS: YES rejected — casual self-contradiction.\n\n"
f"YES marker 와 동시에 본문에 `{hit}` 등장 — 명시적 `disposition:` line 없음.\n\n"
"Resolution:\n"
" (a) If close intended → remove `{hit}` and re-state YES with close evidence.\n"
" (b) If keep-open intended → add explicit line:\n"
" `disposition: KEEP_OPEN_AS_UMBRELLA_ANCHOR` (or similar)\n"
" then orchestrator will honor keep-open at close PATCH (Patch B).\n"
" (c) Or switch to `FINAL_CONSENSUS: NO` with appropriate rewind_target."})
except: pass
continue
log(f"{si['label']} — YES (evidence verified)")
# stage 완료 = unit counter + remaining tracker 모두 reset
update_issue_state(n, continue_same_count=0, last_remaining_units=None)
return True
else:
log("⚠️ YES without sufficient evidence — supplement requested")
try: gitea(f"issues/{n}/comments", "POST", {"body":
"⚠️ **[Orchestrator]** FINAL_CONSENSUS: YES was not accepted yet.\n\n"
"Reason: The comment did not include the required evidence block for this stage.\n\n"
"Please supplement:\n"
"- === EVIDENCE === block header\n"
"- Commands run\n"
"- Files checked\n"
"- Test results (if Stage 2/4)\n"
"- Commit SHA (if Stage 5)\n"
"- Verified facts (if Stage 6)\n\n"
"The stage remains open and will continue."})
except: pass
continue
# NO 처리 — rewind dispatcher
if status == "NO":
# (a) NO 도 evidence 필요 (Codex fix #2 — RULE: NO evidence = REJECTED)
if not has_evidence(last, sid):
log("⚠️ NO without sufficient evidence — supplement requested")
try: gitea(f"issues/{n}/comments", "POST", {"body":
"⚠️ **[Orchestrator]** FINAL_CONSENSUS: NO also requires an === EVIDENCE === block.\n\n"
"Please supplement evidence (commands run, files checked, tests/commit/verified facts as stage requires) "
"BEFORE the rewind_target line."})
except: pass
continue
# (b) rewind_target 누락 → supplement 요청
if not target:
log("⚠️ NO without rewind_target — supplement requested")
try: gitea(f"issues/{n}/comments", "POST", {"body":
"⚠️ **[Orchestrator]** FINAL_CONSENSUS: NO requires a rewind_target line.\n\n"
"Add one of:\n"
" rewind_target: retry_same (technical_fail ONLY — push network/permission)\n"
" rewind_target: stage_1_review\n"
" rewind_target: stage_2_plan\n"
" rewind_target: stage_3_edit\n"
" rewind_target: stage_4_verify\n"
" rewind_target: stage_5_push\n\n"
"Stage 4 (test-verify) and Stage 6 (final-close) FORBID retry_same — must rewind to earlier stage."})
except: pass
continue
# (c) retry_same — verification stage 에서는 금지 (사용자 lock F)
if target == "retry_same":
if sid in VERIFY_STAGES:
log(f"⚠️ retry_same forbidden for {sid} — supplement requested")
try: gitea(f"issues/{n}/comments", "POST", {"body":
f"⚠️ **[Orchestrator]** retry_same is forbidden for Stage {sid}.\n\n"
"Verification stage NO must rewind to an earlier stage:\n"
" stage_1_review / stage_2_plan / stage_3_edit / stage_5_push"})
except: pass
continue
log(f"🔁 retry_same — same stage round (technical retry)")
continue
# (c2) continue_same — Stage 3 (code-edit) ONLY (2026-05-17 lock).
# RULES 의 "Stage 3 ONLY" spec 와 정합 — code-edit 가 아니면 supplement 요청.
# progress-based counter (Fix 1) : remaining_units 가 줄지 않을 때만 증가.
if target == "continue_same":
if sid != "code-edit":
log(f"⚠️ continue_same forbidden for {sid} (Stage 3 only) — supplement requested")
try: gitea(f"issues/{n}/comments", "POST", {"body":
f"⚠️ **[Orchestrator]** continue_same is allowed ONLY for Stage 3 (code-edit).\n\n"
f"Current stage: {sid}. Choose another rewind_target:\n"
" stage_1_review / stage_2_plan / stage_3_edit / stage_5_push / retry_same"})
except: pass
continue
# Fix 1 — counter 는 *progress-based*. remaining_units 가 줄지 않을 때만 증가.
# 정상 진행 (u1→u2→u3 …) 은 매 round remaining 줄어듦 → counter reset.
# u1 stuck (3 round remaining 동일) = 진짜 progress 없음 → escalate.
ist_cs = get_issue_state(n)
cur_remaining = parse_remaining_units(last)
prev_remaining_list = ist_cs.get("last_remaining_units")
prev_remaining = set(prev_remaining_list) if prev_remaining_list is not None else None
if cur_remaining is None:
# parse fail — Codex evidence 에 'Remaining units:' 줄 없음/잘못된 format
log("⚠️ continue_same but Remaining units line not parseable — supplement requested")
try: gitea(f"issues/{n}/comments", "POST", {"body":
"⚠️ **[Orchestrator]** continue_same requires a parseable line in the EVIDENCE block:\n\n"
" Remaining units: [u2, u3, u4] (or comma list / `none` if all complete)\n\n"
"Without this, orchestrator cannot verify progress between rounds."})
except: pass
continue
# Fix 1 (2026-05-17) — empty set + continue_same = 모순.
# 모든 unit 완료 = FINAL_CONSENSUS: YES 여야 함. continue_same X.
if not cur_remaining:
log("⚠️ continue_same with empty Remaining units — contradiction, supplement requested")
try: gitea(f"issues/{n}/comments", "POST", {"body":
"⚠️ **[Orchestrator]** Contradiction: rewind_target: continue_same but Remaining units: none.\n\n"
"If all implementation_units complete → FINAL_CONSENSUS: YES (not NO + continue_same).\n"
"If units remain → list them: `Remaining units: [uN, ...]`."})
except: pass
continue
# progress 판정 : remaining 가 *prev 와 같으면* 진행 없음 → counter+1
if prev_remaining is not None and cur_remaining == prev_remaining:
cnt = (ist_cs.get("continue_same_count") or 0) + 1
else:
cnt = 0 # remaining 변화 = progress. counter reset.
update_issue_state(n,
continue_same_count=cnt,
last_remaining_units=sorted(cur_remaining))
if cnt >= 3:
log(f"⚠️ continue_same stuck — remaining_units unchanged {cnt}× → auto-escalate to stage_2_plan")
update_issue_state(n, continue_same_count=0, last_remaining_units=None)
fp = save_failure_report(n, sid, "simulation-plan",
last + f"\n\n[Auto-escalate: continue_same×{cnt} with remaining_units unchanged = plan/implementation stuck]")
update_issue_state(n,
failure_report_path=str(fp),
failure_from_stage=sid)
# Fix 9 (Phase A-3a) — Gitea POST 제거. state + failure_report 가 truth.
log_orchestrator_event(n,
f"AUTO-ESCALATE: continue_same stuck in {sid}, "
f"remaining_units={sorted(cur_remaining)} unchanged for {cnt} rounds. "
f"Rewinding to simulation-plan.")
return "rewind:simulation-plan"
log(f"➡️ continue_same — remaining_units={sorted(cur_remaining)} (counter={cnt})")
continue
# (d) target stage 로 rewind
rewind_sid = REWIND_TARGET_TO_SID.get(target)
if not rewind_sid:
log(f"⚠️ unknown rewind_target: {target}")
continue
# (e) verification stage 의 self-rewind 금지 (Codex fix #3)
# test-verify → stage_4_verify / final-close → 자기자신 = 같은 stage 반복.
# retry_same 금지의 spirit 위배.
if sid in VERIFY_STAGES and rewind_sid == sid:
log(f"⚠️ self-rewind forbidden for verification stage {sid} — supplement requested")
try: gitea(f"issues/{n}/comments", "POST", {"body":
f"⚠️ **[Orchestrator]** rewind_target pointing to the same verification stage ({sid}) is forbidden.\n\n"
"Choose an EARLIER stage:\n"
" stage_1_review / stage_2_plan / stage_3_edit / stage_5_push (for final-close)"})
except: pass
continue
log(f"🔄 Codex NO → rewind to {rewind_sid}")
fp = save_failure_report(n, sid, rewind_sid, last)
log(f" Failure report: {fp}")
# state 에 failure_report_path 기록 (Codex fix #1 — context pack 가 read)
# rewind 시 continue_same_count reset (다른 stage 로 이동 = 새 cycle)
update_issue_state(n,
failure_report_path=str(fp),
failure_from_stage=sid,
continue_same_count=0,
last_remaining_units=None)
return f"rewind:{rewind_sid}"
# status 가 None — 합의 마커 자체 미부착
log(" no FINAL_CONSENSUS marker — continuing")
# ═══════════════════════════════════════════════════════════════
# Issue / Batch / Status / Main
# ═══════════════════════════════════════════════════════════════
def run_issue(n, until=None):
issue = get_issue(n)
if issue["state"] == "closed": log(f"#{n} closed, skip"); return
title = issue["title"]; body = issue.get("body", "")
header(f"Issue #{n}: {title}")
# P4a (2026-05-19) — audit baseline 저장 (resumed run 시 기존 파일 보존).
# audit mode 일 때만 호출 — 일반 이슈 경로 영향 0.
if _audit_mode(title):
_ensure_audit_baseline(n)
st = get_issue_state(n); cur = st.get("stage", "problem-review")
si = STAGE_IDS.index(cur) if cur in STAGE_IDS else 0
ei = STAGE_IDS.index(until)+1 if until and until in STAGE_IDS else len(STAGES)
i = si
while i < ei:
s = STAGES[i]
result = run_stage(n, title, body, s["id"])
# 외부 close
if result is False:
log(" Stage interrupted (issue closed externally)"); return
# 되감기 (검증 실패) — Codex fix #4: comment English only
if isinstance(result, str) and result.startswith("rewind:"):
target_stage = result.split(":")[1]
target_idx = STAGE_IDS.index(target_stage) if target_stage in STAGE_IDS else si
fp = ISSUES_DIR / f"{n}_stage_{s['id']}_failed.md"
if fp.exists():
# Fix 9 (Phase A-3a) — Gitea POST 제거. state + failure_report 가 truth.
log_orchestrator_event(n,
f"STAGE FAILED — rewinding: "
f"{s['id']} ({s['label']}) → {STAGES[target_idx]['id']} ({STAGES[target_idx]['label']}). "
f"Failure report: {fp.name}")
# state 갱신 — stage 이동, stage_start 초기화. failure_report_path 는 유지
# (다음 stage 의 context pack 에서 read).
update_issue_state(n, stage=target_stage, stage_start_count=None, stage_start_stage=None)
log(f" Rewind: {s['label']}{STAGES[target_idx]['label']}")
i = target_idx
continue
# 정상 완료 — failure_report_path + counter / remaining tracker 모두 clear
generate_and_post_exit_report(n, s["id"])
nxt = STAGE_IDS[i+1] if i+1 < len(STAGE_IDS) else "done"
update_issue_state(n, stage=nxt, stage_start_count=None, stage_start_stage=None,
failure_report_path=None, failure_from_stage=None,
continue_same_count=0, last_remaining_units=None)
if s["id"] == "final-close":
# P7 (2026-05-26) — KEEP_OPEN guard. Stage 6 exit body / last YES body 가 명시적
# keep-open / no-close 신호 내면 close PATCH skip. body-level lock 이 있는 umbrella
# anchor (#83 IMP-83 등) 보호 — Stage 6 성공 = "올바른 disposition 확정" 이며,
# 그 disposition 이 KEEP_OPEN 일 수 있음.
keep_open_patterns = [
r"KEEP_OPEN_AS_UMBRELLA_ANCHOR",
r"DO\s*NOT\s*CLOSE",
r"disposition\s*:\s*KEEP_OPEN",
r"^\s*action\s*:\s*NONE",
r"^\s*state_after\s*:\s*open",
r"NO\s+close\s+signal",
]
keep_open = False
last_body = comments[-1].get("body", "") if comments else ""
for p in keep_open_patterns:
if re.search(p, last_body, re.IGNORECASE | re.MULTILINE):
keep_open = True; break
if not keep_open:
exit_path = _erp(n, "final-close")
if exit_path.exists():
try:
exit_body = exit_path.read_text(encoding="utf-8", errors="ignore")
for p in keep_open_patterns:
if re.search(p, exit_body, re.IGNORECASE | re.MULTILINE):
keep_open = True; break
except: pass
if keep_open:
log(f"Stage 6 KEEP_OPEN signal — issue #{n} NOT closed (umbrella/governance anchor honored)")
try: gitea(f"issues/{n}/comments", "POST", {"body":
" **[Orchestrator]** Stage 6 KEEP_OPEN signal honored — issue not closed.\n\n"
"Detected one of: `KEEP_OPEN_AS_UMBRELLA_ANCHOR`, `DO NOT CLOSE`, "
"`disposition: KEEP_OPEN`, `action: NONE`, `state_after: open`, `NO close signal`.\n\n"
"Orchestrator abstains from `PATCH state=closed` per user-decision-first lock. "
"Final-close stage marked done; issue state preserved as `open`."})
except: pass
else:
try: gitea(f"issues/{n}", "PATCH", {"state": "closed"}); log("Closed")
except: pass
i += 1
log(f"#{n} done: {STAGE_IDS[min(ei-1, len(STAGE_IDS)-1)]}")
def run_all(start_from=None, until=None):
issues = get_open_issues()
if start_from: issues = [i for i in issues if i["number"] >= start_from]
if not issues: log("No issues"); return
header(f"Running {len(issues)} issues")
for i in issues: run_issue(i["number"], until); log(f"#{i['number']} → next")
header("Complete")
def show_status(n=None):
state = load_state()
if n:
issue = get_issue(n); cs = get_comments(n); ist = state.get(str(n), {})
exits = [s["id"] for s in STAGES if _erp(n, s["id"]).exists()]
print(f"\n #{n}: {issue['title']}\n stage={ist.get('stage','problem-review')} comments={len(cs)}")
print(f" exits: {', '.join(exits) or 'none'}\n"); return
issues = get_open_issues()
header(f"{GITEA_REPO}{len(issues)} open")
for i in issues:
ist = state.get(str(i["number"]), {}); stage = ist.get("stage", "problem-review")
exits = sum(1 for s in STAGES if _erp(i["number"], s["id"]).exists())
print(f" #{i['number']:>3} {i['title'][:40]:<40} [{stage}] exits:{exits}")
print()
def main():
if not GITEA_TOKEN: print("\n GITEA_TOKEN required\n"); sys.exit(1)
for d in [ORCH_DIR, ISSUES_DIR, TMP_DIR, DRAFTS_DIR]: d.mkdir(parents=True, exist_ok=True)
# P3-3 (2026-05-18) — orchestrator 종료 시 _SPAWNED 잔여 정리 안전망.
global _ORCH_CREATE
try: _ORCH_CREATE = psutil.Process(_ORCH_PID).create_time()
except Exception: _ORCH_CREATE = None
atexit.register(_orchestrator_exit_cleanup)
try: signal.signal(signal.SIGINT, _sigint_handler)
except (ValueError, AttributeError): pass # non-main thread or platform 미지원
log(f"Claude: {CLAUDE_EXE}"); log(f"Codex: {CODEX_CMD}"); log(f"Repo: {GITEA_REPO}"); print()
p = argparse.ArgumentParser(description="Orchestrator v6")
p.add_argument("--issue", "-i", type=int); p.add_argument("--status", "-s", action="store_true")
p.add_argument("--from", dest="sf", type=int); p.add_argument("--until", choices=STAGE_IDS)
p.add_argument("--reset", type=int, metavar="N"); p.add_argument("--reset-all", action="store_true")
p.add_argument("--audit-only", action="store_true",
help="P4: force audit-only mode (no src/templates/tests edits, Stage 3 guard active)")
a = p.parse_args()
# P4 — CLI override 가 title 검사를 강제. title 에 marker 없어도 audit-only 로 잠금.
if a.audit_only:
global AUDIT_ONLY_OVERRIDE
AUDIT_ONLY_OVERRIDE = True
log(" --audit-only flag: audit mode forced (src/templates/tests changes will be blocked)")
if a.reset: clear_state(a.reset); log(f"Cleared #{a.reset}")
elif a.reset_all: clear_state(); log("All cleared")
elif a.status: show_status(a.issue)
elif a.issue: run_issue(a.issue, a.until)
elif a.sf: run_all(a.sf, a.until)
else: run_all(until=a.until)
if __name__ == "__main__": main()