From e32f632464d33cb40afe7ae58e38e13c9e20f5a3 Mon Sep 17 00:00:00 2001 From: kyeongmin Date: Tue, 19 May 2026 10:29:15 +0900 Subject: [PATCH] fix(orchestrator): P4a baseline-diff guard + Stage 5 commit scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P4 had two production issues blocking #50 integration audit deployment: 1. Stage 3 guard had no baseline awareness — flagged ALL forbidden-path changes including pre-existing dirty WIP. Empirical: 328 such files already in current working tree (tests/matching/ artifacts etc). #50 would have hit reject loops immediately without Claude doing anything wrong. 2. Stage 5 had no commit-scope guard — if Claude ran `git add -A` and committed user's existing WIP, audit commit would be polluted with unrelated production changes. P4a additions: - _audit_baseline_path / _ensure_audit_baseline / _load_audit_baseline: snapshot working-tree dirty paths at run_issue entry for audit issues. Resumed runs preserve existing baseline (no overwrite). - _check_audit_only_violations(baseline=None): accept baseline set, subtract from violations — only flags NEW forbidden changes introduced after audit start. - _check_audit_commit_scope: verify HEAD commit's file list matches AUDIT_ALLOWED_COMMIT_GLOBS (INTEGRATION-AUDIT-*.md, BACKLOG.md). - run_issue: save baseline on audit-mode entry only — no impact on normal issues. - Stage 5 (commit-push) YES gate: new guard rejects on out-of-scope files with remediation prompt (git reset --soft + force-with-lease). 19 new tests: - baseline subtraction (5): pre-existing removed, None=keep-all, empty-set=catch-all, full-coverage filter, Windows path normalize. - baseline persist (5): roundtrip, no-overwrite on resume, missing fallback, corrupt JSON fallback, non-list fallback. - commit scope detection (7): report-only allowed, backlog allowed, src/ rejected, unrelated docs rejected, git error fail-open, Windows backslash, empty commit pass. - allowed globs sanity (2): every glob has audit marker, all under docs/architecture/. Total: 94/94 pytest pass (75 prior + 19 new). Co-Authored-By: Claude Opus 4.7 (1M context) --- orchestrator.py | 131 +++++++++++++-- tests/orchestrator_unit/test_audit_mode.py | 180 +++++++++++++++++++++ 2 files changed, 297 insertions(+), 14 deletions(-) diff --git a/orchestrator.py b/orchestrator.py index b7425d9..89c7a03 100644 --- a/orchestrator.py +++ b/orchestrator.py @@ -756,9 +756,20 @@ def _audit_mode(title): # 블랙리스트 — 화이트리스트보다 false positive 적음 (data/runs, .orchestrator artifacts 등 자연 통과). AUDIT_ONLY_FORBIDDEN_PREFIXES = ("src/", "templates/", "tests/") -def _check_audit_only_violations(): - """git status --porcelain 검사. AUDIT_ONLY_FORBIDDEN_PREFIXES 매치 변경 list 반환. - Returns: list of violating paths (빈 list = 통과).""" +# P4a (2026-05-19) — Stage 5 commit scope guard. HEAD commit 의 file list 가 이 glob 안에만 있어야. +AUDIT_ALLOWED_COMMIT_GLOBS = ( + "docs/architecture/INTEGRATION-AUDIT-*.md", + "docs/architecture/INTEGRATION-AUDIT-*/*", # subdirectory 변형 대응 + "docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md", +) + +def _audit_baseline_path(n): + """Per-issue baseline 파일 경로.""" + return ORCH_DIR / f"audit_baseline_{n}.json" + +def _git_porcelain_paths(): + """git status --porcelain 출력 파싱 — 변경 path set 반환. forward-slash 정규화. + Empty 또는 git 에러 시 빈 set (fail open).""" try: r = subprocess.run( ["git", "status", "--porcelain"], @@ -766,29 +777,84 @@ def _check_audit_only_violations(): cwd=PROJECT_DIR, timeout=30, ) if r.returncode != 0: - # git error — fail open (가드 자체 실패는 false 알람 만들지 않음). - return [] + return set() except Exception: - return [] - bad = [] + return set() + paths = set() for line in r.stdout.splitlines(): if len(line) < 4: continue - # porcelain format: "XY path" — XY 는 staged/unstaged 2-char. path = line[3:].strip() - # rename: "XY old -> new" — destination 만 검사. if " -> " in path: path = path.split(" -> ")[-1].strip() - # quoted path (special chars) — strip wrapping quotes. if path.startswith('"') and path.endswith('"'): path = path[1:-1] - # forward-slash 통일 (Windows backslash 도 처리). - norm = path.replace("\\", "/") + paths.add(path.replace("\\", "/")) + return paths + +def _ensure_audit_baseline(n): + """Audit issue 시작 시점 working tree 의 dirty path set 스냅샷 저장. + 이미 baseline 파일 있으면 보존 (resumed run 의 가드 일관성 유지).""" + p = _audit_baseline_path(n) + if p.exists(): + return + paths = _git_porcelain_paths() + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(json.dumps(sorted(paths), ensure_ascii=False), encoding="utf-8") + log(f" audit baseline saved: {len(paths)} pre-existing paths (file: {p.name})") + +def _load_audit_baseline(n): + """저장된 baseline path set 로드. 파일 없으면 빈 set.""" + p = _audit_baseline_path(n) + if not p.exists(): + return set() + try: + data = json.loads(p.read_text(encoding="utf-8")) + return set(data) if isinstance(data, list) else set() + except Exception: + return set() + +def _check_audit_only_violations(baseline=None): + """git status --porcelain 검사. AUDIT_ONLY_FORBIDDEN_PREFIXES 매치 변경 list 반환. + baseline (set of paths) 가 주어지면 그 path 는 violation 에서 제외 — pre-existing dirty 무시. + Returns: list of violating paths (빈 list = 통과).""" + paths = _git_porcelain_paths() + if not paths: + return [] # clean tree or git error — fail open + base = baseline if baseline is not None else set() + bad = [] + for norm in paths: + if norm in base: + continue # pre-existing — not a NEW violation for prefix in AUDIT_ONLY_FORBIDDEN_PREFIXES: if norm.startswith(prefix): bad.append(norm) break return bad +def _check_audit_commit_scope(): + """P4a — Stage 5 commit scope guard. + HEAD commit 의 file list 가 AUDIT_ALLOWED_COMMIT_GLOBS 안에만 있는지 검증. + Returns: list of paths committed outside allowed scope (빈 list = 통과).""" + import fnmatch + try: + r = subprocess.run( + ["git", "show", "HEAD", "--name-only", "--pretty=format:"], + capture_output=True, text=True, encoding="utf-8", errors="replace", + cwd=PROJECT_DIR, timeout=30, + ) + if r.returncode != 0: + return [] # git error — fail open + except Exception: + return [] + bad = [] + for line in r.stdout.splitlines(): + path = line.strip().replace("\\", "/") + if not path: + continue + if not any(fnmatch.fnmatch(path, g) for g in AUDIT_ALLOWED_COMMIT_GLOBS): + bad.append(path) + return bad + # P1-5 (2026-05-18) — Stage 2 compact rule (모든 issue 적용). # Stage 2 의 c-role 에 size budget + code snippet 금지 명시. 29 KB plan 차단. COMPACT_PLAN_RULE = """ @@ -1284,15 +1350,18 @@ def run_stage(n, title, body, sid): # P4 (2026-05-19) — AUDIT-ONLY guard: Stage 3 (code-edit) YES 직전 git status 검사. # src/templates/tests 변경 있으면 자동 reject + supplement 요청. LLM 양심 무관 deterministic. + # P4a (2026-05-19) — baseline subtraction. audit 시작 시점 dirty path 는 제외 — + # Claude 가 새로 만든 forbidden 변경만 잡음. if sid == "code-edit" and _audit_mode(title): - bad = _check_audit_only_violations() + baseline = _load_audit_baseline(n) + bad = _check_audit_only_violations(baseline) if bad: log(f"⚠️ AUDIT-ONLY violation — Stage 3 YES rejected: {len(bad)} forbidden file change(s)") log(f" violations (first 5): {bad[:5]}") try: gitea(f"issues/{n}/comments", "POST", {"body": "⚠️ **[Orchestrator]** AUDIT-ONLY mode violation: Stage 3 YES rejected.\n\n" "This issue is in audit-only mode. Production code changes are forbidden.\n\n" - f"Forbidden file changes detected ({len(bad)} file(s)):\n" + + f"NEW forbidden file changes detected ({len(bad)} file(s), beyond pre-existing baseline):\n" + "\n".join(f" - `{v}`" for v in bad[:20]) + ("\n - ... (truncated)" if len(bad) > 20 else "") + "\n\n" "Revert these changes and limit Stage 3 outputs to:\n" @@ -1303,6 +1372,36 @@ def run_stage(n, title, body, sid): except: pass continue + # P4a (2026-05-19) — Stage 5 commit scope guard. + # 'git add -A' 같은 명령으로 dirty WIP 가 audit commit 에 섞이는 사고 방지. + # HEAD commit 의 파일 list 가 AUDIT_ALLOWED_COMMIT_GLOBS 안에만 있어야 함. + if sid == "commit-push" and _audit_mode(title): + out_of_scope = _check_audit_commit_scope() + if out_of_scope: + log(f"⚠️ AUDIT-ONLY violation — Stage 5 YES rejected: HEAD commit includes {len(out_of_scope)} out-of-scope file(s)") + log(f" out-of-scope (first 5): {out_of_scope[:5]}") + try: gitea(f"issues/{n}/comments", "POST", {"body": + "⚠️ **[Orchestrator]** AUDIT-ONLY mode violation: Stage 5 YES rejected.\n\n" + "The HEAD commit includes files outside the audit-allowed scope.\n\n" + f"Out-of-scope files in HEAD commit ({len(out_of_scope)} file(s)):\n" + + "\n".join(f" - `{v}`" for v in out_of_scope[:20]) + + ("\n - ... (truncated)" if len(out_of_scope) > 20 else "") + "\n\n" + "Allowed commit scope:\n" + "- `docs/architecture/INTEGRATION-AUDIT-*.md`\n" + "- `docs/architecture/INTEGRATION-AUDIT-*/*` (subdirectory variants)\n" + "- `docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md`\n\n" + "Remediation (use --force-with-lease, NOT plain --force):\n" + "```\n" + "git reset --soft HEAD~1\n" + "git restore --staged \n" + "git commit -m ''\n" + "git push --force-with-lease origin \n" + "```\n\n" + "Do NOT use `git add -A` or `git add .` in audit-only mode. " + "Stage only the audit report files explicitly."}) + except: pass + continue + log(f"✅ {si['label']} — YES (evidence verified)") # stage 완료 = unit counter + remaining tracker 모두 reset update_issue_state(n, continue_same_count=0, last_remaining_units=None) @@ -1475,6 +1574,10 @@ def run_issue(n, until=None): if issue["state"] == "closed": log(f"#{n} closed, skip"); return title = issue["title"]; body = issue.get("body", "") header(f"Issue #{n}: {title}") + # P4a (2026-05-19) — audit baseline 저장 (resumed run 시 기존 파일 보존). + # audit mode 일 때만 호출 — 일반 이슈 경로 영향 0. + if _audit_mode(title): + _ensure_audit_baseline(n) st = get_issue_state(n); cur = st.get("stage", "problem-review") si = STAGE_IDS.index(cur) if cur in STAGE_IDS else 0 ei = STAGE_IDS.index(until)+1 if until and until in STAGE_IDS else len(STAGES) diff --git a/tests/orchestrator_unit/test_audit_mode.py b/tests/orchestrator_unit/test_audit_mode.py index cada121..69ee378 100644 --- a/tests/orchestrator_unit/test_audit_mode.py +++ b/tests/orchestrator_unit/test_audit_mode.py @@ -22,8 +22,13 @@ from orchestrator import ( # noqa: E402 _is_audit_issue, _audit_mode, _check_audit_only_violations, + _check_audit_commit_scope, + _ensure_audit_baseline, + _load_audit_baseline, + _audit_baseline_path, AUDIT_ONLY_FORBIDDEN_PREFIXES, AUDIT_ONLY_NOTE, + AUDIT_ALLOWED_COMMIT_GLOBS, ) @@ -199,6 +204,181 @@ class TestCheckAuditOnlyViolations: # AUDIT_ONLY_NOTE constants — sanity # ───────────────────────────────────────────────────────────────── +# ───────────────────────────────────────────────────────────────── +# P4a: baseline-aware violations +# ───────────────────────────────────────────────────────────────── + +class TestBaselineAwareViolations: + def test_baseline_subtraction_removes_preexisting(self, monkeypatch): + """pre-existing forbidden path 는 baseline 에 있으면 violation 에서 제외.""" + stdout = ( + " M src/already_dirty.py\n" # baseline 안에 있음 — 제외돼야 함 + " M src/new_violation.py\n" # baseline 밖 — violation + ) + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout)) + baseline = {"src/already_dirty.py"} + v = _check_audit_only_violations(baseline=baseline) + assert v == ["src/new_violation.py"] + + def test_baseline_none_keeps_all(self, monkeypatch): + """baseline=None 이면 기존 동작 — 모든 forbidden 잡음.""" + stdout = " M src/a.py\n M src/b.py\n" + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout)) + v = _check_audit_only_violations(baseline=None) + assert set(v) == {"src/a.py", "src/b.py"} + + def test_baseline_empty_set_keeps_all(self, monkeypatch): + """baseline=set() 이면 모두 새 violation 으로 잡음.""" + stdout = " M src/a.py\n" + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout)) + v = _check_audit_only_violations(baseline=set()) + assert v == ["src/a.py"] + + def test_baseline_filters_all_violations(self, monkeypatch): + """모든 violation 이 baseline 에 있으면 빈 list 반환 — clean 으로 판정.""" + stdout = " M src/a.py\n M templates/b.html\n M tests/c.py\n" + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout)) + baseline = {"src/a.py", "templates/b.html", "tests/c.py"} + v = _check_audit_only_violations(baseline=baseline) + assert v == [] + + def test_baseline_path_normalized_match(self, monkeypatch): + """baseline 의 path 는 forward-slash 정규화 형태. Windows backslash 도 매치.""" + stdout = " M src\\windows_path.py\n" + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout)) + baseline = {"src/windows_path.py"} # baseline 도 forward-slash 형태로 저장 + v = _check_audit_only_violations(baseline=baseline) + assert v == [] + + +# ───────────────────────────────────────────────────────────────── +# P4a: _ensure_audit_baseline / _load_audit_baseline +# ───────────────────────────────────────────────────────────────── + +class TestAuditBaselinePersist: + def test_save_and_load_roundtrip(self, monkeypatch, tmp_path): + """baseline 저장 → 로드 → 동일 path set 반환.""" + # Redirect ORCH_DIR to tmp_path for isolation. + monkeypatch.setattr(orchestrator, "ORCH_DIR", tmp_path) + # Mock git status output. + stdout = " M src/a.py\n?? src/b.py\n M docs/c.md\n" + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout)) + _ensure_audit_baseline(999) + loaded = _load_audit_baseline(999) + assert loaded == {"src/a.py", "src/b.py", "docs/c.md"} + + def test_ensure_does_not_overwrite_existing(self, monkeypatch, tmp_path): + """이미 baseline 파일 있으면 덮어쓰지 않음 — resumed run 의 가드 일관성.""" + monkeypatch.setattr(orchestrator, "ORCH_DIR", tmp_path) + # First save with one set. + stdout1 = " M src/original.py\n" + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout1)) + _ensure_audit_baseline(999) + # Second call with DIFFERENT git status — should NOT overwrite. + stdout2 = " M src/different.py\n" + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout2)) + _ensure_audit_baseline(999) + loaded = _load_audit_baseline(999) + # Original baseline preserved. + assert loaded == {"src/original.py"} + + def test_load_missing_returns_empty_set(self, monkeypatch, tmp_path): + monkeypatch.setattr(orchestrator, "ORCH_DIR", tmp_path) + assert _load_audit_baseline(8888) == set() + + def test_load_corrupt_returns_empty_set(self, monkeypatch, tmp_path): + monkeypatch.setattr(orchestrator, "ORCH_DIR", tmp_path) + # Manually write corrupt JSON. + p = tmp_path / "audit_baseline_7777.json" + p.write_text("not valid json {{{", encoding="utf-8") + assert _load_audit_baseline(7777) == set() + + def test_load_non_list_returns_empty_set(self, monkeypatch, tmp_path): + """baseline 파일이 list 가 아닌 다른 JSON (예: dict) 이면 empty set.""" + monkeypatch.setattr(orchestrator, "ORCH_DIR", tmp_path) + p = tmp_path / "audit_baseline_6666.json" + p.write_text('{"unexpected": "shape"}', encoding="utf-8") + assert _load_audit_baseline(6666) == set() + + +# ───────────────────────────────────────────────────────────────── +# P4a: _check_audit_commit_scope — Stage 5 guard +# ───────────────────────────────────────────────────────────────── + +class TestAuditCommitScope: + def test_clean_commit_audit_report_only(self, monkeypatch): + """audit report 파일만 commit 되면 통과.""" + stdout = ( + "docs/architecture/INTEGRATION-AUDIT-01-REPORT.md\n" + "docs/architecture/INTEGRATION-AUDIT-01-MATRIX.md\n" + ) + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout)) + assert _check_audit_commit_scope() == [] + + def test_backlog_update_allowed(self, monkeypatch): + stdout = ( + "docs/architecture/INTEGRATION-AUDIT-01-REPORT.md\n" + "docs/architecture/PHASE-Z-IMPLEMENTATION-ISSUE-BACKLOG.md\n" + ) + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout)) + assert _check_audit_commit_scope() == [] + + def test_src_file_in_commit_detected(self, monkeypatch): + """audit commit 에 src/ 파일이 끼면 violation.""" + stdout = ( + "docs/architecture/INTEGRATION-AUDIT-01-REPORT.md\n" + "src/phase_z2_pipeline.py\n" + ) + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout)) + v = _check_audit_commit_scope() + assert v == ["src/phase_z2_pipeline.py"] + + def test_unrelated_doc_detected(self, monkeypatch): + """docs/ 라도 audit 관련 아닌 doc 은 violation.""" + stdout = ( + "docs/architecture/INTEGRATION-AUDIT-01-REPORT.md\n" + "docs/some_other_doc.md\n" # 다른 doc + "docs/architecture/PHASE-Z-PIPELINE-OVERVIEW.md\n" # audit 와 무관한 doc + ) + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout)) + v = _check_audit_commit_scope() + assert set(v) == {"docs/some_other_doc.md", + "docs/architecture/PHASE-Z-PIPELINE-OVERVIEW.md"} + + def test_git_error_fails_open(self, monkeypatch): + """git show 자체 실패 → 빈 list (가드가 false positive 만들지 않음).""" + monkeypatch.setattr(subprocess, "run", + lambda *a, **kw: _FakeCompleted(stdout="", returncode=128)) + assert _check_audit_commit_scope() == [] + + def test_windows_backslash_normalized(self, monkeypatch): + """Windows backslash path 도 forward-slash 정규화 후 glob 매치.""" + stdout = "docs\\architecture\\INTEGRATION-AUDIT-01-REPORT.md\n" + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout=stdout)) + assert _check_audit_commit_scope() == [] + + def test_empty_commit_passes(self, monkeypatch): + """commit 에 파일 변경 없음 (보통 안 일어나지만) — 위반 없음.""" + monkeypatch.setattr(subprocess, "run", lambda *a, **kw: _FakeCompleted(stdout="")) + assert _check_audit_commit_scope() == [] + + +# ───────────────────────────────────────────────────────────────── +# P4a: allowed-glob shape sanity +# ───────────────────────────────────────────────────────────────── + +class TestAuditCommitAllowedGlobs: + def test_globs_have_audit_marker(self): + """모든 allowed glob 에 INTEGRATION-AUDIT 또는 BACKLOG 마커 존재.""" + for g in AUDIT_ALLOWED_COMMIT_GLOBS: + assert ("INTEGRATION-AUDIT" in g) or ("BACKLOG" in g) + + def test_globs_under_docs_architecture(self): + """모든 allowed path 가 docs/architecture/ 산하 — src/ 등 우발적 허용 차단.""" + for g in AUDIT_ALLOWED_COMMIT_GLOBS: + assert g.startswith("docs/architecture/"), f"glob escapes docs/architecture/: {g}" + + class TestAuditOnlyConstants: def test_note_mentions_forbidden_prefixes(self): for p in AUDIT_ONLY_FORBIDDEN_PREFIXES: