diff --git a/Makefile b/Makefile index 510a7e12..962eb4b8 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,10 @@ endif DUMP_SERVICES ?= all RESTORE_SERVICES ?= all +DUMP_DATASET ?= full +RESTORE_DATASET ?= +FILTER_SERVICES ?= postgres,ory-postgres +OUTPUT_BACKUP ?= FILE_PATH ?= RESTORE_INPUT ?= $(or $(FILE_PATH),$(word 2,$(MAKECMDGOALS))) CONFIRM_RESTORE ?= @@ -51,7 +55,7 @@ DOCKER_IMAGE_REF ?= WORKS_DOCKER_COMMIT_CONTAINER ?= WORKS_DOCKER_IMAGE_ARCHIVE_DIR ?= /tmp/baron-sso-docker-image-upload -.PHONY: help build-auth-config validate-auth-config verify-auth-config render-ory-config up up-all up-infra up-ory up-app up-backend ensure-networks ensure-infra ensure-ory ensure-restore-containers up-dev up-front-dev dev dev-debug down drop down-app down-backend down-infra down-ory check-infra ps logs-infra logs-ory logs-app backup-tools-build dump restore dump-verify restore-verify dump-list restore-plan upload-cloud works-drive-refresh-token dump-upload-cloud docker-image-upload-works docker-image-verify-works +.PHONY: help build-auth-config validate-auth-config verify-auth-config render-ory-config up up-all up-infra up-ory up-app up-backend ensure-networks ensure-infra ensure-ory ensure-restore-containers up-dev up-front-dev dev dev-debug down drop down-app down-backend down-infra down-ory check-infra ps logs-infra logs-ory logs-app backup-tools-build dump filter-personnel-dump restore dump-verify restore-verify dump-list restore-plan upload-cloud works-drive-refresh-token dump-upload-cloud docker-image-upload-works docker-image-verify-works help: ## 생성된 타깃과 옵션 목록 표시 @printf "Usage:\n make [OPTION=value ...]\n\n" @@ -314,10 +318,14 @@ backup-tools-build: ## 백업 도구 Docker 이미지 빌드 ifeq ($(BACKUP_USE_DOCKER),true) dump: backup-tools-build ## 백업 덤프 생성 - $(BACKUP_DOCKER_RUN) bash -lc 'DUMP_SERVICES="$(DUMP_SERVICES)" DUMP_MODE="$(DUMP_MODE)" BACKUP="$(BACKUP)" BACKUP_ROOT="$(BACKUP_ROOT)" scripts/backup/dump.sh' + $(BACKUP_DOCKER_RUN) bash -lc 'DUMP_SERVICES="$(DUMP_SERVICES)" DUMP_DATASET="$(DUMP_DATASET)" DUMP_MODE="$(DUMP_MODE)" BACKUP="$(BACKUP)" BACKUP_ROOT="$(BACKUP_ROOT)" scripts/backup/dump.sh' + +filter-personnel-dump: backup-tools-build ## 전체 백업에서 personnel dataset 백업 추출 + $(MAKE) --no-print-directory ensure-restore-containers RESTORE_SERVICES="$(FILTER_SERVICES)" CONFIRM_RESTORE=baron-sso + $(BACKUP_DOCKER_RUN) bash -lc 'BACKUP="$(BACKUP)" OUTPUT_BACKUP="$(OUTPUT_BACKUP)" FILTER_SERVICES="$(FILTER_SERVICES)" scripts/backup/filter_personnel_dump.sh' restore: backup-tools-build ensure-restore-containers ## 백업 덤프 복구 - $(BACKUP_DOCKER_RUN) bash -lc 'RESTORE_INPUT="$(RESTORE_INPUT)" BACKUP="$(BACKUP)" DUMP_FILE="$(DUMP_FILE)" RESTORE_SERVICES="$(RESTORE_SERVICES)" CONFIRM_RESTORE="$(CONFIRM_RESTORE)" ALLOW_NON_EMPTY_RESTORE="$(ALLOW_NON_EMPTY_RESTORE)" RESTORE_REPORT="$(RESTORE_REPORT)" scripts/backup/restore.sh' + $(BACKUP_DOCKER_RUN) bash -lc 'RESTORE_INPUT="$(RESTORE_INPUT)" BACKUP="$(BACKUP)" DUMP_FILE="$(DUMP_FILE)" RESTORE_SERVICES="$(RESTORE_SERVICES)" RESTORE_DATASET="$(RESTORE_DATASET)" CONFIRM_RESTORE="$(CONFIRM_RESTORE)" ALLOW_NON_EMPTY_RESTORE="$(ALLOW_NON_EMPTY_RESTORE)" RESTORE_REPORT="$(RESTORE_REPORT)" scripts/backup/restore.sh' dump-verify: backup-tools-build ## 백업 덤프 검증 $(BACKUP_DOCKER_RUN) bash -lc 'BACKUP="$(BACKUP)" scripts/backup/verify-dump.sh' @@ -329,7 +337,7 @@ dump-list: backup-tools-build ## 사용 가능한 백업 덤프 목록 조회 $(BACKUP_DOCKER_RUN) bash -lc 'BACKUP_ROOT="$(BACKUP_ROOT)" scripts/backup/dump-list.sh' restore-plan: backup-tools-build ## 복구 실행 계획 출력 - $(BACKUP_DOCKER_RUN) bash -lc 'RESTORE_INPUT="$(RESTORE_INPUT)" BACKUP="$(BACKUP)" DUMP_FILE="$(DUMP_FILE)" RESTORE_SERVICES="$(RESTORE_SERVICES)" CONFIRM_RESTORE="$(CONFIRM_RESTORE)" RESTORE_REPORT="$(RESTORE_REPORT)" scripts/backup/restore-plan.sh' + $(BACKUP_DOCKER_RUN) bash -lc 'RESTORE_INPUT="$(RESTORE_INPUT)" BACKUP="$(BACKUP)" DUMP_FILE="$(DUMP_FILE)" RESTORE_SERVICES="$(RESTORE_SERVICES)" RESTORE_DATASET="$(RESTORE_DATASET)" CONFIRM_RESTORE="$(CONFIRM_RESTORE)" RESTORE_REPORT="$(RESTORE_REPORT)" scripts/backup/restore-plan.sh' upload-cloud: backup-tools-build ## 백업 덤프 클라우드 업로드 $(BACKUP_DOCKER_RUN) bash -lc '$(if $(WORKS_DRIVE_DRY_RUN),WORKS_DRIVE_DRY_RUN="$(WORKS_DRIVE_DRY_RUN)" )$(if $(WORKS_DRIVE_AUTH_MODE),WORKS_DRIVE_AUTH_MODE="$(WORKS_DRIVE_AUTH_MODE)" )BACKUP="$(BACKUP)" scripts/backup/upload_cloud.sh' @@ -338,10 +346,14 @@ works-drive-refresh-token: ## WORKS Drive OAuth refresh token 갱신 WORKS_DRIVE_TOKEN_GRANT="$(WORKS_DRIVE_TOKEN_GRANT)" WORKS_DRIVE_AUTH_CODE="$(WORKS_DRIVE_AUTH_CODE)" WORKS_DRIVE_AUTH_CALLBACK_URL="$(WORKS_DRIVE_AUTH_CALLBACK_URL)" scripts/backup/refresh_works_drive_token.sh else dump: ## 백업 덤프 생성 - DUMP_SERVICES="$(DUMP_SERVICES)" DUMP_MODE="$(DUMP_MODE)" BACKUP="$(BACKUP)" BACKUP_ROOT="$(BACKUP_ROOT)" scripts/backup/dump.sh + DUMP_SERVICES="$(DUMP_SERVICES)" DUMP_DATASET="$(DUMP_DATASET)" DUMP_MODE="$(DUMP_MODE)" BACKUP="$(BACKUP)" BACKUP_ROOT="$(BACKUP_ROOT)" scripts/backup/dump.sh + +filter-personnel-dump: ## 전체 백업에서 personnel dataset 백업 추출 + $(MAKE) --no-print-directory ensure-restore-containers RESTORE_SERVICES="$(FILTER_SERVICES)" CONFIRM_RESTORE=baron-sso + BACKUP="$(BACKUP)" OUTPUT_BACKUP="$(OUTPUT_BACKUP)" FILTER_SERVICES="$(FILTER_SERVICES)" scripts/backup/filter_personnel_dump.sh restore: ensure-restore-containers ## 백업 덤프 복구 - RESTORE_INPUT="$(RESTORE_INPUT)" BACKUP="$(BACKUP)" DUMP_FILE="$(DUMP_FILE)" RESTORE_SERVICES="$(RESTORE_SERVICES)" CONFIRM_RESTORE="$(CONFIRM_RESTORE)" ALLOW_NON_EMPTY_RESTORE="$(ALLOW_NON_EMPTY_RESTORE)" RESTORE_REPORT="$(RESTORE_REPORT)" scripts/backup/restore.sh + RESTORE_INPUT="$(RESTORE_INPUT)" BACKUP="$(BACKUP)" DUMP_FILE="$(DUMP_FILE)" RESTORE_SERVICES="$(RESTORE_SERVICES)" RESTORE_DATASET="$(RESTORE_DATASET)" CONFIRM_RESTORE="$(CONFIRM_RESTORE)" ALLOW_NON_EMPTY_RESTORE="$(ALLOW_NON_EMPTY_RESTORE)" RESTORE_REPORT="$(RESTORE_REPORT)" scripts/backup/restore.sh dump-verify: ## 백업 덤프 검증 BACKUP="$(BACKUP)" scripts/backup/verify-dump.sh @@ -353,7 +365,7 @@ dump-list: ## 사용 가능한 백업 덤프 목록 조회 BACKUP_ROOT="$(BACKUP_ROOT)" scripts/backup/dump-list.sh restore-plan: ## 복구 실행 계획 출력 - RESTORE_INPUT="$(RESTORE_INPUT)" BACKUP="$(BACKUP)" DUMP_FILE="$(DUMP_FILE)" RESTORE_SERVICES="$(RESTORE_SERVICES)" CONFIRM_RESTORE="$(CONFIRM_RESTORE)" RESTORE_REPORT="$(RESTORE_REPORT)" scripts/backup/restore-plan.sh + RESTORE_INPUT="$(RESTORE_INPUT)" BACKUP="$(BACKUP)" DUMP_FILE="$(DUMP_FILE)" RESTORE_SERVICES="$(RESTORE_SERVICES)" RESTORE_DATASET="$(RESTORE_DATASET)" CONFIRM_RESTORE="$(CONFIRM_RESTORE)" RESTORE_REPORT="$(RESTORE_REPORT)" scripts/backup/restore-plan.sh upload-cloud: ## 백업 덤프 클라우드 업로드 $(if $(WORKS_DRIVE_DRY_RUN),WORKS_DRIVE_DRY_RUN="$(WORKS_DRIVE_DRY_RUN)" )$(if $(WORKS_DRIVE_AUTH_MODE),WORKS_DRIVE_AUTH_MODE="$(WORKS_DRIVE_AUTH_MODE)" )BACKUP="$(BACKUP)" scripts/backup/upload_cloud.sh diff --git a/docs/personnel-dataset-backup-staging-restore-design-2026-06-22.md b/docs/personnel-dataset-backup-staging-restore-design-2026-06-22.md new file mode 100644 index 00000000..2506a496 --- /dev/null +++ b/docs/personnel-dataset-backup-staging-restore-design-2026-06-22.md @@ -0,0 +1,225 @@ +# Production Personnel Dataset Backup and Staging Restore Design + +## Estimate Time + +Estimate Time: 2.5d + +## 목적 + +프로덕션의 "인력정보" 관련 데이터만 주기적으로 백업하고, 스테이징에 복구해 운영 데이터에 가까운 검증 환경을 구축한다. 이 기능은 재해 복구용 full backup이 아니라 staging rehearsal용 논리 데이터셋 이관 기능이다. + +기존 `make dump`/`make restore`는 저장소 단위 full backup을 목표로 한다. 이번 기능은 Hydra와 RP 정보를 제외해야 하므로 저장소 단위 필터인 `DUMP_SERVICES=postgres,ory-postgres`만으로는 충분하지 않다. 별도의 dataset profile을 두어 테이블/행/민감값을 명시적으로 제한해야 한다. + +## 정책 전제 + +- Ory Stack은 identity, authorization, OAuth/OIDC 상태의 SoT다. +- Backend DB의 `users`는 Ory에 저장되지 않거나 Ory API만으로 조회하기 어려운 Baron 운영 read model이다. +- Hydra client, consent, token/session state와 Baron RP metadata는 이번 데이터셋 대상이 아니다. +- staging restore는 운영 복구가 아니므로 production credential/session을 그대로 들고 오지 않는다. +- Wiki는 사용 중이지만, 정책 업데이트 초안은 `docs/` 문서로 남기고 사람이 검토 후 Wiki에 반영한다. + +## 제안 인터페이스 + +기존 백업 명령을 유지하면서 dataset profile을 추가한다. + +```bash +make dump DUMP_SERVICES=postgres,ory-postgres DUMP_DATASET=personnel DUMP_MODE=maintenance +make restore RESTORE_SERVICES=postgres,ory-postgres RESTORE_DATASET=personnel CONFIRM_RESTORE=baron-sso +make restore-plan RESTORE_DATASET=personnel BACKUP=backups/baron-sso-backup-YYYYMMDD-HHMMSSZ +``` + +운영 자동화에서는 production에서 full backup만 만들고, staging에서 받은 full backup을 personnel dataset으로 필터링한 뒤 복구한다. + +```bash +# Production +make dump DUMP_SERVICES=all DUMP_MODE=maintenance + +# Staging +make filter-personnel-dump \ + BACKUP=backups/prod-full-backup-YYYYMMDD-HHMMSSZ \ + OUTPUT_BACKUP=backups/prod-personnel-filtered-YYYYMMDD-HHMMSSZ + +make restore \ + BACKUP=backups/prod-personnel-filtered-YYYYMMDD-HHMMSSZ \ + RESTORE_DATASET=personnel \ + RESTORE_SERVICES=postgres \ + CONFIRM_RESTORE=baron-sso +``` + +`filter-personnel-dump`는 full backup의 `baron.dump`, `ory_kratos.dump`, `ory_keto.dump`를 staging scratch DB에 일시 복원한 뒤 `personnel` JSONL dataset을 생성한다. Hydra dump와 RP metadata는 filtered backup에 복사하지 않는다. + +추가 환경 변수: + +| 변수 | 기본값 | 의미 | +| --- | --- | --- | +| `DUMP_DATASET` | `full` | `full`은 기존 동작, `personnel`은 인력정보 논리 데이터셋 | +| `RESTORE_DATASET` | manifest 기준 | 복구할 dataset profile | +| `FILTER_SERVICES` | `postgres,ory-postgres` | full backup에서 personnel dataset으로 필터링할 서비스 범위 | +| `OUTPUT_BACKUP` | empty | `filter-personnel-dump`의 personnel dataset 출력 경로 | +| `PERSONNEL_TENANT_ROOT_SLUGS` | empty | 비어 있으면 전체 인력정보, 값이 있으면 지정 tenant root 하위만 | +| `PERSONNEL_INCLUDE_KRATOS_IDENTITIES` | `true` | staging 로그인/subject 일치 검증이 필요할 때 Kratos identity subset 포함 | +| `PERSONNEL_RESET_CREDENTIALS` | `true` | Kratos credential/session을 production 그대로 복구하지 않도록 강제 | +| `PERSONNEL_INCLUDE_WORKSMOBILE_MAPPING` | `true` | WORKS externalKey 비교와 조직도 검증용 mapping 포함 | +| `PERSONNEL_INCLUDE_OUTBOX` | `false` | queue state는 기본 제외, 장애 재현 시에만 별도 허용 | + +`DUMP_DATASET=personnel`은 `DUMP_SERVICES=all`에서도 Hydra DB와 RP metadata를 포함하지 않도록 내부적으로 차단한다. 사용자가 `DUMP_DATASET=personnel DUMP_SERVICES=ory-postgres`를 지정해도 `ory_hydra` dump는 생성하지 않는다. + +## 백업 산출물 구조 + +```text +baron-sso-backup-YYYYMMDD-HHMMSSZ/ + manifest.json + checksums.sha256 + datasets/ + personnel/ + dataset-manifest.json + postgres/ + users.jsonl + user_login_ids.jsonl + tenants.jsonl + tenant_domains.jsonl + user_groups.jsonl + worksmobile_resource_mappings.jsonl + ory_kratos/ + identities.jsonl + identity_credentials.reset-plan.jsonl + ory_keto/ + relation_tuples.jsonl + reports/ + row-counts.json + exclusions.json + restore-plan.md +``` + +`personnel` dataset은 `pg_dump -Fc`만으로 만들지 않는다. 행 필터, 민감값 제거, RP/Hydra 제외를 안전하게 보장하려면 `COPY (SELECT ...) TO STDOUT` 또는 `psql` JSONL export를 사용한다. full backup은 기존 `postgres/baron.dump`, `postgres/ory_*.dump` 형식을 그대로 유지한다. + +## 포함 범위 + +### Baron Postgres 포함 + +| 테이블/데이터 | 포함 이유 | 처리 | +| --- | --- | --- | +| `users` | 인력 기본 정보, 상태, 조직 표시 read model | 포함. `relying_party_id`는 null 처리 또는 제외 검증 | +| `user_login_ids` | 사번/로그인 ID 등 인력 식별자 | 포함 | +| `tenants` | 회사/조직/사용자 그룹 계층 | `COMPANY_GROUP`, `COMPANY`, `ORGANIZATION`, `USER_GROUP` 중심 포함 | +| `tenant_domains` | 회사 도메인 기반 소속 판단 | 포함 | +| `user_groups` | 조직도/부서 계층 | 포함 | +| `worksmobile_resource_mappings` | WORKS externalKey 기반 비교/동기화 기준 | `USER`, `ORGUNIT`만 포함 | + +### Ory Kratos 포함 + +`PERSONNEL_INCLUDE_KRATOS_IDENTITIES=true`일 때만 포함한다. + +- `identities`: subject UUID와 traits 기반 식별을 위해 포함한다. +- credential/session/recovery/verifiable address는 production 값을 그대로 복구하지 않는다. +- restore 단계에서 staging용 임시 credential 정책 또는 password reset-required 상태를 만든다. +- Kratos DB 직접 조작은 일반 write path가 아니므로 maintenance guard와 별도 확인값을 요구한다. + +### Ory Keto 포함 + +- 사용자, tenant, user group membership/ownership 관계 tuple만 포함한다. +- RP namespace 또는 RP object를 참조하는 tuple은 제외한다. +- restore 후 Keto relation tuple subject/object가 복구된 user/tenant/group만 참조하는지 검증한다. + +## 제외 범위 + +| 제외 대상 | 이유 | +| --- | --- | +| Hydra DB 전체 | OAuth2 client, consent, token/session은 staging RP 상태를 오염시킬 수 있음 | +| Baron RP metadata | 사용자가 명시한 비대상이며 staging RP 설정은 별도 관리 대상 | +| `rp_user_metadata` | RP별 custom claim 데이터라 personnel 공통 데이터가 아님 | +| `client_consents` | Hydra/RP consent read model 성격 | +| API key/client secret류 | staging secret과 충돌 위험 | +| audit/clickhouse logs | 인력정보 환경 구축의 필수 원장이 아님 | +| `worksmobile_outbox` | 큐 처리 상태라 반복 restore 시 중복 작업 위험 | +| Redis | 휘발성 cache/session | + +## Restore 전략 + +staging restore는 DB 전체 drop/restore가 아니라 scoped replace 방식으로 설계한다. + +1. restore 전 `restore-plan`을 생성해 포함/제외 테이블, row count, tenant scope, Hydra/RP exclusion을 표시한다. +2. backend worker, WORKS relay, Keto outbox relay를 중지한다. +3. dataset을 staging scratch schema 또는 임시 DB에 적재한다. +4. row count, foreign key, soft-delete, tenant hierarchy, user-login collision을 검증한다. +5. staging의 Hydra/RP 관련 테이블과 설정은 건드리지 않는다. +6. Baron personnel tables를 dependency order에 따라 replace/upsert한다. +7. Kratos identity subset을 포함한 경우 production credential/session을 제거하고 staging credential policy를 적용한다. +8. Keto personnel tuple만 replace/upsert하고 RP tuple은 보존한다. +9. `restore-verify`에서 다음 항목을 검증한다. + +검증 항목: + +- `users.id`와 Kratos `identities.id` 일치 여부 +- `user_login_ids.user_id`, `tenant_id` 참조 무결성 +- `tenants.parent_id`, `user_groups.parent_id` 계층 무결성 +- `users.relying_party_id`가 남아 있지 않은지 +- `rp_user_metadata`, Hydra dump 파일, Hydra restore step이 생성되지 않았는지 +- Keto tuple이 복구 대상 user/tenant/group만 참조하는지 +- WORKS mapping의 Baron resource 참조가 존재하는지 + +## 구현 위치 + +제안 파일: + +- `scripts/backup/lib/dataset.sh`: dataset profile validation과 공통 manifest helper +- `scripts/backup/lib/personnel_dataset.sh`: personnel export/import SQL, exclusion guard +- `scripts/backup/dump.sh`: `DUMP_DATASET` 분기 추가 +- `scripts/backup/restore.sh`: `RESTORE_DATASET` 분기와 scoped restore 추가 +- `scripts/backup/restore-plan.sh`: dataset restore plan 출력 +- `scripts/backup/lib/report.sh`: dataset row-count/exclusion report 표시 +- `Makefile`: `DUMP_DATASET`, `RESTORE_DATASET`, `PERSONNEL_*` 변수 전달 + +## 테스트 계획 + +구현 전 RED 테스트를 먼저 추가한다. + +1. `test/personnel_dataset_backup_policy_test.sh` + - `DUMP_DATASET=personnel` dry-run 또는 fixture run에서 Hydra dump가 계획되지 않는지 확인한다. + - `rp_user_metadata`, `client_consents`, `relying_parties` 계열 데이터가 dataset manifest에 포함되지 않는지 확인한다. + - unknown dataset profile을 거부하는지 확인한다. + +2. `test/personnel_dataset_restore_policy_test.sh` + - `RESTORE_DATASET=personnel`이 `CONFIRM_RESTORE=baron-sso` 없이 실패하는지 확인한다. + - restore-plan에 포함/제외 테이블과 credential reset policy가 표시되는지 확인한다. + - non-empty target guard와 scoped restore guard가 동시에 동작하는지 확인한다. + +3. `scripts/backup/lib/personnel_dataset.sh` 단위 shell test + - tenant root scope가 있을 때 users/tenants/user_groups 쿼리가 같은 scope로 제한되는지 확인한다. + - `PERSONNEL_INCLUDE_OUTBOX=false` 기본값에서 outbox가 제외되는지 확인한다. + +4. 통합 테스트 + - fixture Postgres에 users/tenants/RP/Hydra 유사 데이터를 넣는다. + - personnel dataset dump를 실행한다. + - 빈 staging fixture DB에 restore한다. + - 인력정보는 들어오고 Hydra/RP/consent/custom claim 데이터는 남지 않는지 검증한다. + +5. E2E 또는 smoke + - staging restore 후 AdminFront/User list와 OrgFront 조직도 조회가 정상인지 확인한다. + - 화면 변화가 있는 기능은 아니므로 스냅샷 업로드는 구현 범위에서 제외한다. 단, 복구 후 조직도 화면 검증이 필요하면 별도 E2E 이슈로 분리한다. + +## 위험과 결정 필요 사항 + +1. Kratos identity 포함 여부 + - 포함하면 staging에서 production과 같은 subject UUID로 검증할 수 있다. + - 대신 credential/session 민감값 제거가 필수이고, Kratos DB maintenance 예외 정책을 문서화해야 한다. + - 제외하면 인력/조직 화면은 검증 가능하지만 실제 로그인 subject 일치 검증은 제한된다. + +2. restore 방식 + - table replace는 단순하지만 staging 고유 사용자와 충돌할 수 있다. + - upsert는 staging 고유 데이터 보존에 유리하지만 삭제/퇴사 반영 정책이 복잡해진다. + - 기본안은 `PERSONNEL_RESTORE_MODE=replace-scoped`로 두고, staging 고유 tenant slug allowlist는 보호한다. + +3. WORKS mapping 포함 여부 + - externalKey 비교를 위해 포함하는 것이 좋다. + - outbox는 중복 실행 위험이 있어 기본 제외가 맞다. + +## 구현 순서 + +1. Gitea 이슈에 본 설계와 테스트 RED 계획을 등록한다. +2. RED 테스트를 먼저 추가하고 실패를 확인한다. +3. dataset profile validation과 manifest/exclusion guard를 구현한다. +4. Baron Postgres personnel export/import를 구현한다. +5. Kratos/Keto subset 처리는 guard와 reset policy를 먼저 구현한 뒤 활성화한다. +6. `make restore-plan`, report, verification을 보강한다. +7. 테스트 통과 후 문서와 Wiki 반영 필요 여부를 검토한다. diff --git a/scripts/backup/dump.sh b/scripts/backup/dump.sh index 8ad7cfed..029a692c 100755 --- a/scripts/backup/dump.sh +++ b/scripts/backup/dump.sh @@ -8,15 +8,17 @@ source "$script_dir/lib/postgres.sh" source "$script_dir/lib/clickhouse.sh" source "$script_dir/lib/config.sh" source "$script_dir/lib/report.sh" +source "$script_dir/lib/personnel_dataset.sh" repo_root="$(backup_repo_root)" services="$(normalize_service_filter "${DUMP_SERVICES:-all}")" +dataset="$(normalize_dataset_profile "${DUMP_DATASET:-full}")" mode="${DUMP_MODE:-maintenance}" backup_root="${BACKUP_ROOT:-$repo_root/backups}" backup_dir="${BACKUP:-$backup_root/baron-sso-backup-$(backup_timestamp)}" mkdir -p "$backup_dir/reports" -create_manifest "$backup_dir" "$mode" "$services" +create_manifest "$backup_dir" "$mode" "$services" "$dataset" service_timings_json="[]" run_backup_step() { @@ -40,26 +42,31 @@ run_backup_step() { backup_log "Creating backup at $backup_dir" backup_log "Backup mode: $mode" +backup_log "Dataset: $dataset" backup_log "Services: $services" -if service_enabled postgres "$services"; then - run_backup_step postgres dump_baron_postgres "$backup_dir" -fi +if [[ "$dataset" == "personnel" ]]; then + run_backup_step personnel dump_personnel_dataset "$backup_dir" "$services" +else + if service_enabled postgres "$services"; then + run_backup_step postgres dump_baron_postgres "$backup_dir" + fi -if service_enabled ory-postgres "$services"; then - run_backup_step ory-postgres dump_ory_postgres "$backup_dir" -fi + if service_enabled ory-postgres "$services"; then + run_backup_step ory-postgres dump_ory_postgres "$backup_dir" + fi -if service_enabled clickhouse "$services"; then - run_backup_step clickhouse dump_baron_clickhouse "$backup_dir" -fi + if service_enabled clickhouse "$services"; then + run_backup_step clickhouse dump_baron_clickhouse "$backup_dir" + fi -if service_enabled ory-clickhouse "$services"; then - run_backup_step ory-clickhouse dump_ory_clickhouse "$backup_dir" -fi + if service_enabled ory-clickhouse "$services"; then + run_backup_step ory-clickhouse dump_ory_clickhouse "$backup_dir" + fi -if service_enabled config "$services"; then - run_backup_step config dump_config_snapshot "$backup_dir" + if service_enabled config "$services"; then + run_backup_step config dump_config_snapshot "$backup_dir" + fi fi write_backup_markdown_report "$backup_dir" "succeeded" "$services" "$service_timings_json" diff --git a/scripts/backup/filter_personnel_dump.sh b/scripts/backup/filter_personnel_dump.sh new file mode 100755 index 00000000..cb5b5fb5 --- /dev/null +++ b/scripts/backup/filter_personnel_dump.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +set -Eeuo pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$script_dir/lib/common.sh" +source "$script_dir/lib/manifest.sh" +source "$script_dir/lib/postgres.sh" +source "$script_dir/lib/config.sh" +source "$script_dir/lib/report.sh" +source "$script_dir/lib/personnel_dataset.sh" + +repo_root="$(backup_repo_root)" +source_backup="${BACKUP:-${RESTORE_INPUT:-${FILE_PATH:-}}}" +services="$(normalize_service_filter "${FILTER_SERVICES:-postgres,ory-postgres}")" +output_backup="${OUTPUT_BACKUP:-$repo_root/backups/baron-sso-personnel-filtered-$(backup_timestamp)}" +scratch_suffix="$(date -u '+%Y%m%d%H%M%S')_$$" +scratch_baron="baron_personnel_filter_${scratch_suffix}" +scratch_kratos="ory_kratos_personnel_filter_${scratch_suffix}" +scratch_keto="ory_keto_personnel_filter_${scratch_suffix}" + +cleanup_filter_scratch() { + if [[ -n "${scratch_baron:-}" ]]; then + docker exec -e "PGPASSWORD=${DB_PASSWORD:-password}" baron_postgres \ + psql -U "${DB_USER:-baron}" -d postgres -v ON_ERROR_STOP=1 \ + -c "drop database if exists ${scratch_baron} with (force)" >/dev/null 2>&1 || true + fi + if [[ -n "${scratch_kratos:-}" || -n "${scratch_keto:-}" ]]; then + docker exec -e "PGPASSWORD=${ORY_POSTGRES_PASSWORD:-secret}" ory_postgres \ + psql -U "${ORY_POSTGRES_USER:-ory}" -d postgres -v ON_ERROR_STOP=1 \ + -c "drop database if exists ${scratch_kratos} with (force)" \ + -c "drop database if exists ${scratch_keto} with (force)" >/dev/null 2>&1 || true + fi +} + +trap cleanup_filter_scratch EXIT + +quote_pg_database() { + local raw="$1" + printf '"%s"' "${raw//\"/\"\"}" +} + +create_scratch_database() { + local container="$1" + local user="$2" + local password="$3" + local database="$4" + local database_ident + + database_ident="$(quote_pg_database "$database")" + docker exec -e "PGPASSWORD=$password" "$container" \ + psql -U "$user" -d postgres -v ON_ERROR_STOP=1 \ + -c "drop database if exists ${database_ident} with (force)" \ + -c "create database ${database_ident}" +} + +restore_custom_dump_to_scratch() { + local container="$1" + local user="$2" + local password="$3" + local database="$4" + local dump_path="$5" + + backup_require_path "$dump_path" + docker exec -i -e "PGPASSWORD=$password" "$container" \ + pg_restore -U "$user" -d "$database" --clean --if-exists <"$dump_path" +} + +[[ -n "$source_backup" ]] || backup_die "BACKUP is required. Example: make filter-personnel-dump BACKUP=backups/full OUTPUT_BACKUP=backups/personnel" +backup_require_path "$source_backup/manifest.json" +backup_require_command docker +backup_require_container baron_postgres +backup_require_container ory_postgres + +if [[ "$(jq -r '.dataset // "full"' "$source_backup/manifest.json")" == "personnel" ]]; then + backup_die "source BACKUP is already a personnel dataset: $source_backup" +fi + +backup_log "Filtering personnel dataset from full backup: $source_backup" +backup_log "Output backup: $output_backup" +backup_log "Services: $services" + +mkdir -p "$output_backup/reports" +create_manifest "$output_backup" "filtered-from-full" "$services" "personnel" +write_personnel_dataset_manifest "$output_backup" "$services" + +if service_enabled postgres "$services"; then + backup_log "Restoring Baron full dump to scratch DB: $scratch_baron" + create_scratch_database baron_postgres "${DB_USER:-baron}" "${DB_PASSWORD:-password}" "$scratch_baron" + restore_custom_dump_to_scratch baron_postgres "${DB_USER:-baron}" "${DB_PASSWORD:-password}" "$scratch_baron" "$source_backup/postgres/baron.dump" + DB_NAME="$scratch_baron" dump_personnel_baron_postgres "$output_backup" +fi + +if service_enabled ory-postgres "$services"; then + backup_log "Restoring Kratos/Keto full dumps to scratch DBs: $scratch_kratos, $scratch_keto" + create_scratch_database ory_postgres "${ORY_POSTGRES_USER:-ory}" "${ORY_POSTGRES_PASSWORD:-secret}" "$scratch_kratos" + create_scratch_database ory_postgres "${ORY_POSTGRES_USER:-ory}" "${ORY_POSTGRES_PASSWORD:-secret}" "$scratch_keto" + restore_custom_dump_to_scratch ory_postgres "${ORY_POSTGRES_USER:-ory}" "${ORY_POSTGRES_PASSWORD:-secret}" "$scratch_kratos" "$source_backup/postgres/${KRATOS_DB:-ory_kratos}.dump" + restore_custom_dump_to_scratch ory_postgres "${ORY_POSTGRES_USER:-ory}" "${ORY_POSTGRES_PASSWORD:-secret}" "$scratch_keto" "$source_backup/postgres/${KETO_DB:-ory_keto}.dump" + KRATOS_DB="$scratch_kratos" KETO_DB="$scratch_keto" dump_personnel_ory_postgres "$output_backup" +fi + +write_backup_markdown_report "$output_backup" "succeeded" "$services" "[]" +backup_checksum_file "$output_backup" +BACKUP="$output_backup" "$script_dir/verify-dump.sh" + +backup_log "Personnel filtered backup complete: $output_backup" diff --git a/scripts/backup/lib/common.sh b/scripts/backup/lib/common.sh index e9917376..0908cdf9 100644 --- a/scripts/backup/lib/common.sh +++ b/scripts/backup/lib/common.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash BACKUP_SUPPORTED_SERVICES="postgres ory-postgres clickhouse ory-clickhouse config" +BACKUP_SUPPORTED_DATASETS="full personnel" backup_repo_root() { if [[ -n "${BACKUP_REPO_ROOT:-}" ]]; then @@ -65,6 +66,18 @@ normalize_service_filter() { printf '%s\n' "$normalized" } +normalize_dataset_profile() { + local raw="${1:-full}" + + [[ -n "$raw" ]] || raw="full" + if ! grep -qw -- "$raw" <<<"$BACKUP_SUPPORTED_DATASETS"; then + backup_die "unknown backup dataset: $raw" + return 1 + fi + + printf '%s\n' "$raw" +} + service_enabled() { local service="$1" local services="$2" diff --git a/scripts/backup/lib/manifest.sh b/scripts/backup/lib/manifest.sh index c67a83ae..6e97129c 100644 --- a/scripts/backup/lib/manifest.sh +++ b/scripts/backup/lib/manifest.sh @@ -4,11 +4,17 @@ create_manifest() { local backup_dir="$1" local mode="$2" local services="$3" + local dataset="${4:-full}" local repo_root local created_at local git_commit local service local first=1 + local environment_scope="same-env-only" + + if [[ "$dataset" == "personnel" ]]; then + environment_scope="staging-rehearsal" + fi repo_root="$(backup_repo_root)" created_at="$(backup_utc_now)" @@ -20,7 +26,8 @@ create_manifest() { printf ' "created_at": "%s",\n' "$created_at" printf ' "git_commit": "%s",\n' "$git_commit" printf ' "mode": "%s",\n' "$mode" - printf ' "environment_scope": "same-env-only",\n' + printf ' "dataset": "%s",\n' "$dataset" + printf ' "environment_scope": "%s",\n' "$environment_scope" printf ' "services": [' for service in $services; do if [[ "$first" -eq 1 ]]; then diff --git a/scripts/backup/lib/personnel_dataset.sh b/scripts/backup/lib/personnel_dataset.sh new file mode 100644 index 00000000..77dc3c08 --- /dev/null +++ b/scripts/backup/lib/personnel_dataset.sh @@ -0,0 +1,367 @@ +#!/usr/bin/env bash + +personnel_dataset_dir() { + local backup_dir="$1" + printf '%s\n' "$backup_dir/datasets/personnel" +} + +write_personnel_dataset_manifest() { + local backup_dir="$1" + local services="$2" + local dataset_dir + local reports_dir + local include_kratos + local reset_credentials + local include_worksmobile_mapping + local include_outbox + local tenant_roots + + dataset_dir="$(personnel_dataset_dir "$backup_dir")" + reports_dir="$dataset_dir/reports" + include_kratos="${PERSONNEL_INCLUDE_KRATOS_IDENTITIES:-true}" + reset_credentials="${PERSONNEL_RESET_CREDENTIALS:-true}" + include_worksmobile_mapping="${PERSONNEL_INCLUDE_WORKSMOBILE_MAPPING:-true}" + include_outbox="${PERSONNEL_INCLUDE_OUTBOX:-false}" + tenant_roots="${PERSONNEL_TENANT_ROOT_SLUGS:-}" + + mkdir -p "$reports_dir" + jq -n \ + --arg dataset "personnel" \ + --arg services "$services" \ + --arg tenant_roots "$tenant_roots" \ + --arg include_kratos "$include_kratos" \ + --arg reset_credentials "$reset_credentials" \ + --arg include_worksmobile_mapping "$include_worksmobile_mapping" \ + --arg include_outbox "$include_outbox" \ + '{ + dataset: $dataset, + format_version: "1", + services: ($services | split(" ") | map(select(length > 0))), + scope: { + tenant_root_slugs: (if $tenant_roots == "" then [] else ($tenant_roots | split(",") | map(gsub("^\\s+|\\s+$"; ""))) end) + }, + included: { + baron_postgres_tables: [ + "public.tenants", + "public.tenant_domains", + "public.user_groups", + "public.users", + "public.user_login_ids", + "public.worksmobile_resource_mappings" + ], + ory_kratos_tables: (if $include_kratos == "true" then ["public.identities"] else [] end), + ory_keto_tables: ["public.keto_relation_tuples"] + }, + excluded: { + databases: ["ory_hydra"], + tables: [ + "public.relying_parties", + "public.rp_user_metadata", + "public.client_consents", + "public.client_secrets", + "public.api_keys", + "public.worksmobile_outboxes" + ], + volatile: ["redis", "sessions", "oauth2_tokens", "audit_logs"] + }, + restore_policy: { + reset_credentials: ($reset_credentials == "true"), + include_worksmobile_mapping: ($include_worksmobile_mapping == "true"), + include_outbox: ($include_outbox == "true"), + hydra_restore: false, + rp_metadata_restore: false, + default_mode: "replace-dataset-rows" + } + }' >"$dataset_dir/dataset-manifest.json" +} + +personnel_psql_jsonl() { + local container="$1" + local user="$2" + local password="$3" + local database="$4" + local sql="$5" + local output_file="$6" + + docker exec -e "PGPASSWORD=$password" "$container" \ + psql -U "$user" -d "$database" -At -v ON_ERROR_STOP=1 -c "$sql" \ + >"$output_file" +} + +personnel_write_count() { + local report_file="$1" + local key="$2" + local file_path="$3" + local count + + if [[ -f "$file_path" ]]; then + count="$(wc -l <"$file_path" | tr -d '[:space:]')" + else + count="0" + fi + printf '%s:%s\n' "$key" "$count" >>"$report_file" +} + +dump_personnel_baron_postgres() { + local backup_dir="$1" + local db_user="${DB_USER:-baron}" + local db_password="${DB_PASSWORD:-password}" + local db_name="${DB_NAME:-baron_sso}" + local dataset_dir + local output_dir + local report_file + + backup_require_command docker + backup_require_container baron_postgres + + dataset_dir="$(personnel_dataset_dir "$backup_dir")" + output_dir="$dataset_dir/postgres" + report_file="$dataset_dir/reports/row-counts.txt" + mkdir -p "$output_dir" "$dataset_dir/reports" + : >"$report_file" + + backup_log "Dumping personnel Baron Postgres dataset: $db_name" + personnel_psql_jsonl baron_postgres "$db_user" "$db_password" "$db_name" \ + "select to_jsonb(t)::text from public.tenants t order by t.id" \ + "$output_dir/tenants.jsonl" + personnel_psql_jsonl baron_postgres "$db_user" "$db_password" "$db_name" \ + "select to_jsonb(t)::text from public.tenant_domains t order by t.id" \ + "$output_dir/tenant_domains.jsonl" + personnel_psql_jsonl baron_postgres "$db_user" "$db_password" "$db_name" \ + "select to_jsonb(t)::text from public.user_groups t order by t.id" \ + "$output_dir/user_groups.jsonl" + personnel_psql_jsonl baron_postgres "$db_user" "$db_password" "$db_name" \ + "select (to_jsonb(t) - 'password_hash' - 'relying_party_id' || jsonb_build_object('relying_party_id', null))::text from public.users t order by t.id" \ + "$output_dir/users.jsonl" + personnel_psql_jsonl baron_postgres "$db_user" "$db_password" "$db_name" \ + "select to_jsonb(t)::text from public.user_login_ids t order by t.id" \ + "$output_dir/user_login_ids.jsonl" + + if [[ "${PERSONNEL_INCLUDE_WORKSMOBILE_MAPPING:-true}" == "true" ]]; then + personnel_psql_jsonl baron_postgres "$db_user" "$db_password" "$db_name" \ + "select to_jsonb(t)::text from public.worksmobile_resource_mappings t where t.baron_resource_type in ('USER', 'ORGUNIT') order by t.id" \ + "$output_dir/worksmobile_resource_mappings.jsonl" + else + : >"$output_dir/worksmobile_resource_mappings.jsonl" + fi + + personnel_write_count "$report_file" "public.tenants" "$output_dir/tenants.jsonl" + personnel_write_count "$report_file" "public.tenant_domains" "$output_dir/tenant_domains.jsonl" + personnel_write_count "$report_file" "public.user_groups" "$output_dir/user_groups.jsonl" + personnel_write_count "$report_file" "public.users" "$output_dir/users.jsonl" + personnel_write_count "$report_file" "public.user_login_ids" "$output_dir/user_login_ids.jsonl" + personnel_write_count "$report_file" "public.worksmobile_resource_mappings" "$output_dir/worksmobile_resource_mappings.jsonl" +} + +dump_personnel_ory_postgres() { + local backup_dir="$1" + local db_user="${ORY_POSTGRES_USER:-ory}" + local db_password="${ORY_POSTGRES_PASSWORD:-secret}" + local kratos_db="${KRATOS_DB:-ory_kratos}" + local keto_db="${KETO_DB:-ory_keto}" + local dataset_dir + local report_file + + backup_require_command docker + backup_require_container ory_postgres + + dataset_dir="$(personnel_dataset_dir "$backup_dir")" + report_file="$dataset_dir/reports/row-counts.txt" + mkdir -p "$dataset_dir/ory_kratos" "$dataset_dir/ory_keto" "$dataset_dir/reports" + + if [[ "${PERSONNEL_INCLUDE_KRATOS_IDENTITIES:-true}" == "true" ]]; then + backup_log "Dumping personnel Kratos identity subset: $kratos_db" + personnel_psql_jsonl ory_postgres "$db_user" "$db_password" "$kratos_db" \ + "select (to_jsonb(i) - 'metadata_admin')::text from public.identities i order by i.id" \ + "$dataset_dir/ory_kratos/identities.jsonl" + else + : >"$dataset_dir/ory_kratos/identities.jsonl" + fi + jq -n '{policy:"reset_credentials", credentials:[]}' >"$dataset_dir/ory_kratos/identity_credentials.reset-plan.jsonl" + + backup_log "Dumping personnel Keto relation tuple subset: $keto_db" + personnel_psql_jsonl ory_postgres "$db_user" "$db_password" "$keto_db" \ + "select to_jsonb(t)::text from public.keto_relation_tuples t where t.namespace <> 'RelyingParty' and coalesce(t.subject_set_namespace, '') <> 'RelyingParty' order by t.namespace, t.object, t.relation, t.subject_id" \ + "$dataset_dir/ory_keto/relation_tuples.jsonl" + + personnel_write_count "$report_file" "public.identities" "$dataset_dir/ory_kratos/identities.jsonl" + personnel_write_count "$report_file" "public.keto_relation_tuples" "$dataset_dir/ory_keto/relation_tuples.jsonl" +} + +dump_personnel_dataset() { + local backup_dir="$1" + local services="$2" + + write_personnel_dataset_manifest "$backup_dir" "$services" + if service_enabled postgres "$services"; then + dump_personnel_baron_postgres "$backup_dir" + fi + if service_enabled ory-postgres "$services"; then + dump_personnel_ory_postgres "$backup_dir" + fi + if service_enabled config "$services"; then + dump_config_snapshot "$backup_dir" + fi +} + +personnel_dataset_manifest_path() { + local backup_dir="$1" + printf '%s\n' "$backup_dir/datasets/personnel/dataset-manifest.json" +} + +restore_personnel_plan_policy_json() { + local backup_dir="$1" + local manifest_path + + manifest_path="$(personnel_dataset_manifest_path "$backup_dir")" + if [[ -f "$manifest_path" ]]; then + jq -c '{dataset, included, excluded, restore_policy, scope}' "$manifest_path" + else + printf '{}\n' + fi +} + +personnel_restore_jsonl_id_table() { + local container="$1" + local user="$2" + local password="$3" + local database="$4" + local table="$5" + local input_file="$6" + local scratch + local columns + local updates + + backup_require_path "$input_file" + if [[ ! -s "$input_file" ]]; then + backup_log "Skipping empty personnel dataset table: public.$table" + return 0 + fi + + scratch="_personnel_restore_${table}_json" + columns="$(docker exec -e "PGPASSWORD=$password" "$container" \ + psql -U "$user" -d "$database" -At -v ON_ERROR_STOP=1 \ + -c "select string_agg(quote_ident(column_name), ', ' order by ordinal_position) from information_schema.columns where table_schema = 'public' and table_name = '$table'")" + updates="$(docker exec -e "PGPASSWORD=$password" "$container" \ + psql -U "$user" -d "$database" -At -v ON_ERROR_STOP=1 \ + -c "select string_agg(format('%I = excluded.%I', column_name, column_name), ', ' order by ordinal_position) from information_schema.columns where table_schema = 'public' and table_name = '$table' and column_name <> 'id'")" + + [[ -n "$columns" ]] || backup_die "cannot resolve columns for personnel restore table: public.$table" + [[ -n "$updates" ]] || backup_die "cannot resolve update columns for personnel restore table: public.$table" + + backup_log "Restoring personnel dataset table: public.$table" + docker exec -e "PGPASSWORD=$password" "$container" \ + psql -U "$user" -d "$database" -v ON_ERROR_STOP=1 \ + -c "drop table if exists public.${scratch}" \ + -c "create table public.${scratch} (line jsonb not null)" + docker exec -i -e "PGPASSWORD=$password" "$container" \ + psql -U "$user" -d "$database" -v ON_ERROR_STOP=1 \ + -c "\\copy public.${scratch}(line) from stdin" <"$input_file" + docker exec -e "PGPASSWORD=$password" "$container" \ + psql -U "$user" -d "$database" -v ON_ERROR_STOP=1 \ + -c "insert into public.${table} (${columns}) select ${columns} from (select (jsonb_populate_record(null::public.${table}, line)).* from public.${scratch}) r on conflict (id) do update set ${updates}" \ + -c "drop table public.${scratch}" +} + +restore_personnel_baron_postgres() { + local backup_dir="$1" + local db_user="${DB_USER:-baron}" + local db_password="${DB_PASSWORD:-password}" + local db_name="${DB_NAME:-baron_sso}" + local input_dir + + input_dir="$(personnel_dataset_dir "$backup_dir")/postgres" + backup_require_command docker + backup_require_container baron_postgres + + personnel_restore_jsonl_id_table baron_postgres "$db_user" "$db_password" "$db_name" tenants "$input_dir/tenants.jsonl" + personnel_restore_jsonl_id_table baron_postgres "$db_user" "$db_password" "$db_name" tenant_domains "$input_dir/tenant_domains.jsonl" + personnel_restore_jsonl_id_table baron_postgres "$db_user" "$db_password" "$db_name" user_groups "$input_dir/user_groups.jsonl" + personnel_restore_jsonl_id_table baron_postgres "$db_user" "$db_password" "$db_name" users "$input_dir/users.jsonl" + personnel_restore_jsonl_id_table baron_postgres "$db_user" "$db_password" "$db_name" user_login_ids "$input_dir/user_login_ids.jsonl" + personnel_restore_jsonl_id_table baron_postgres "$db_user" "$db_password" "$db_name" worksmobile_resource_mappings "$input_dir/worksmobile_resource_mappings.jsonl" +} + +restore_personnel_keto_relation_tuples() { + local backup_dir="$1" + local db_user="${ORY_POSTGRES_USER:-ory}" + local db_password="${ORY_POSTGRES_PASSWORD:-secret}" + local keto_db="${KETO_DB:-ory_keto}" + local input_file + local scratch="_personnel_restore_keto_relation_tuples_json" + + input_file="$(personnel_dataset_dir "$backup_dir")/ory_keto/relation_tuples.jsonl" + backup_require_path "$input_file" + if [[ ! -s "$input_file" ]]; then + backup_log "Skipping empty personnel Keto relation tuple dataset" + return 0 + fi + + backup_log "Restoring personnel Keto relation tuple subset: $keto_db" + docker exec -e "PGPASSWORD=$db_password" ory_postgres \ + psql -U "$db_user" -d "$keto_db" -v ON_ERROR_STOP=1 \ + -c "drop table if exists public.${scratch}" \ + -c "create table public.${scratch} (line jsonb not null)" + docker exec -i -e "PGPASSWORD=$db_password" ory_postgres \ + psql -U "$db_user" -d "$keto_db" -v ON_ERROR_STOP=1 \ + -c "\\copy public.${scratch}(line) from stdin" <"$input_file" + docker exec -e "PGPASSWORD=$db_password" ory_postgres \ + psql -U "$db_user" -d "$keto_db" -v ON_ERROR_STOP=1 \ + -c "delete from public.keto_relation_tuples where namespace <> 'RelyingParty' and coalesce(subject_set_namespace, '') <> 'RelyingParty'" \ + -c "insert into public.keto_relation_tuples select (jsonb_populate_record(null::public.keto_relation_tuples, line)).* from public.${scratch}" \ + -c "drop table public.${scratch}" +} + +restore_personnel_kratos_identities() { + local backup_dir="$1" + local db_user="${ORY_POSTGRES_USER:-ory}" + local db_password="${ORY_POSTGRES_PASSWORD:-secret}" + local kratos_db="${KRATOS_DB:-ory_kratos}" + local input_file + + input_file="$(personnel_dataset_dir "$backup_dir")/ory_kratos/identities.jsonl" + backup_require_command docker + backup_require_container ory_postgres + personnel_restore_jsonl_id_table ory_postgres "$db_user" "$db_password" "$kratos_db" identities "$input_file" +} + +restore_personnel_ory_postgres() { + local backup_dir="$1" + + restore_personnel_kratos_identities "$backup_dir" + restore_personnel_keto_relation_tuples "$backup_dir" +} + +restore_personnel_dataset() { + local backup_dir="$1" + local services="$2" + local report_items=() + local item + + backup_require_path "$(personnel_dataset_manifest_path "$backup_dir")" + + if service_enabled postgres "$services"; then + restore_personnel_baron_postgres "$backup_dir" + item="$(jq -n --arg label "personnel/postgres" --arg status "passed" '{label:$label, status:$status}')" + report_items+=("$item") + fi + + if service_enabled ory-postgres "$services"; then + restore_personnel_ory_postgres "$backup_dir" + item="$(jq -n --arg label "personnel/ory-postgres" --arg status "passed" '{label:$label, status:$status}')" + report_items+=("$item") + fi + + if service_enabled config "$services"; then + restore_config_snapshot "$backup_dir" + item="$(jq -n --arg label "personnel/config" --arg status "passed" '{label:$label, status:$status}')" + report_items+=("$item") + fi + + if [[ "${#report_items[@]}" -eq 0 ]]; then + target_verification_reports="[]" + else + target_verification_reports="$(printf '%s\n' "${report_items[@]}" | jq -s '.')" + fi + target_verification_status="passed" +} diff --git a/scripts/backup/lib/report.sh b/scripts/backup/lib/report.sh index 3f396ac4..fc0e0ccf 100644 --- a/scripts/backup/lib/report.sh +++ b/scripts/backup/lib/report.sh @@ -130,6 +130,7 @@ write_restore_markdown_report() { "| 입력 유형 | \(.backup_source // "unknown") |", "| 백업 경로 | `\(.backup_dir // "")` |", "| Dump 파일 | `\(.dump_file // "")` |", + "| Dataset | `\(.dataset // "full")` |", "| 서비스 | `\(services)` |", "", "## 검증", @@ -138,6 +139,9 @@ write_restore_markdown_report() { "| --- | --- |", "| Dump checksum | \(.verification.dump_checksum // "not_run") |", "| 대상 row count | \(.verification.target_row_counts // "not_run") |", + (if (.dataset // "full") == "personnel" then + "| Personnel exclusions | `\((.restore_policy.excluded.databases // []) | join(", "))` / `\((.restore_policy.excluded.tables // []) | join(", "))` |" + else empty end), "", "## 대상별 검증 결과", "", diff --git a/scripts/backup/restore.sh b/scripts/backup/restore.sh index 3d35657f..a2ebd09a 100755 --- a/scripts/backup/restore.sh +++ b/scripts/backup/restore.sh @@ -7,6 +7,7 @@ source "$script_dir/lib/postgres.sh" source "$script_dir/lib/clickhouse.sh" source "$script_dir/lib/config.sh" source "$script_dir/lib/report.sh" +source "$script_dir/lib/personnel_dataset.sh" dry_run=false if [[ "${1:-}" == "--dry-run" ]]; then @@ -26,6 +27,7 @@ report_message="" dump_checksum_status="not_run" target_verification_status="not_run" target_verification_reports="[]" +dataset="full" json_array_from_words() { local words="$1" @@ -43,6 +45,7 @@ write_restore_report() { local finished_at local services_json local restore_policy_json="{}" + local personnel_policy_json="{}" [[ -n "$report_path" ]] || return 0 @@ -51,6 +54,9 @@ write_restore_report() { if [[ -n "${backup_dir:-}" && -f "$backup_dir/manifest.json" ]]; then restore_policy_json="$(jq -c '.restore_policy // {}' "$backup_dir/manifest.json")" fi + if [[ "${dataset:-full}" == "personnel" ]]; then + personnel_policy_json="$(restore_personnel_plan_policy_json "$backup_dir")" + fi mkdir -p "$(dirname "$report_path")" jq -n \ @@ -69,6 +75,8 @@ write_restore_report() { --arg target_row_counts "$target_verification_status" \ --argjson target_reports "$target_verification_reports" \ --argjson restore_policy "$restore_policy_json" \ + --arg dataset "${dataset:-full}" \ + --argjson personnel_policy "$personnel_policy_json" \ '{ format_version: $format_version, started_at: $started_at, @@ -78,10 +86,11 @@ write_restore_report() { backup_source: $backup_source, backup_dir: $backup_dir, dump_file: (if $dump_file == "" then null else $dump_file end), + dataset: $dataset, services: $services, allow_non_empty_restore: ($allow_non_empty_restore == "true"), dry_run: ($dry_run == "true"), - restore_policy: $restore_policy, + restore_policy: (if $dataset == "personnel" then $personnel_policy else $restore_policy end), verification: { dump_checksum: $dump_checksum, target_row_counts: $target_row_counts, @@ -439,6 +448,13 @@ if [[ "${CONFIRM_RESTORE:-}" != "baron-sso" ]]; then fi services="$(normalize_service_filter "${RESTORE_SERVICES:-all}")" +if [[ -n "${RESTORE_DATASET:-}" ]]; then + dataset="$(normalize_dataset_profile "$RESTORE_DATASET")" +elif [[ -f "$backup_dir/manifest.json" ]]; then + dataset="$(normalize_dataset_profile "$(jq -r '.dataset // "full"' "$backup_dir/manifest.json")")" +else + dataset="full" +fi allow_non_empty="${ALLOW_NON_EMPTY_RESTORE:-false}" if [[ "${RESTORE_TEST_NON_EMPTY:-}" == "1" && "$allow_non_empty" != "true" ]]; then @@ -447,6 +463,7 @@ fi if [[ "$dry_run" == "true" ]]; then backup_log "Restore plan for $backup_dir" + backup_log "Dataset: $dataset" backup_log "Services: $services" backup_log "ALLOW_NON_EMPTY_RESTORE=$allow_non_empty" backup_log "RESTORE_REPORT=$report_path" @@ -466,27 +483,32 @@ fi BACKUP="$backup_dir" "$script_dir/verify-dump.sh" dump_checksum_status="passed" -if service_enabled postgres "$services"; then - restore_baron_postgres "$backup_dir" +if [[ "$dataset" == "personnel" ]]; then + restore_personnel_dataset "$backup_dir" "$services" +else + if service_enabled postgres "$services"; then + restore_baron_postgres "$backup_dir" + fi + + if service_enabled ory-postgres "$services"; then + restore_ory_postgres "$backup_dir" + fi + + if service_enabled clickhouse "$services"; then + restore_baron_clickhouse "$backup_dir" + fi + + if service_enabled ory-clickhouse "$services"; then + restore_ory_clickhouse "$backup_dir" + fi + + if service_enabled config "$services"; then + restore_config_snapshot "$backup_dir" + fi + + verify_restored_targets fi -if service_enabled ory-postgres "$services"; then - restore_ory_postgres "$backup_dir" -fi - -if service_enabled clickhouse "$services"; then - restore_baron_clickhouse "$backup_dir" -fi - -if service_enabled ory-clickhouse "$services"; then - restore_ory_clickhouse "$backup_dir" -fi - -if service_enabled config "$services"; then - restore_config_snapshot "$backup_dir" -fi - -verify_restored_targets write_restore_report "succeeded" "restore completed and target row-count verification passed" backup_log "Restore complete. Keep WORKS relay disabled until comparison dry-run passes." diff --git a/test/make_help_target_test.sh b/test/make_help_target_test.sh index be8c39da..31f7ae02 100755 --- a/test/make_help_target_test.sh +++ b/test/make_help_target_test.sh @@ -23,11 +23,11 @@ assert_contains "$help_output" "Targets:" assert_contains "$help_output" "Options:" assert_contains "$help_output" "Restore Safety:" -for target in up dev code-check dump restore-plan code-check-userfront-e2e-tests; do +for target in up dev code-check dump filter-personnel-dump restore-plan code-check-userfront-e2e-tests; do assert_contains "$help_output" "$target" done -for option in DEV_SERVICES CODE_CHECK_TEST_JOBS PLAYWRIGHT_WORKERS BACKUP_USE_DOCKER DUMP_SERVICES RESTORE_SERVICES; do +for option in DEV_SERVICES CODE_CHECK_TEST_JOBS PLAYWRIGHT_WORKERS BACKUP_USE_DOCKER DUMP_SERVICES RESTORE_SERVICES DUMP_DATASET RESTORE_DATASET; do assert_contains "$help_output" "$option" done diff --git a/test/personnel_dataset_backup_policy_test.sh b/test/personnel_dataset_backup_policy_test.sh new file mode 100644 index 00000000..a641df7c --- /dev/null +++ b/test/personnel_dataset_backup_policy_test.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +set -euo pipefail + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +fail() { + echo "ERROR: $*" >&2 + exit 1 +} + +assert_contains() { + local output="$1" + local expected="$2" + grep -Fq -- "$expected" <<<"$output" || fail "output must contain: $expected" +} + +assert_not_exists() { + local path="$1" + [[ ! -e "$path" ]] || fail "path must not exist: $path" +} + +source "$repo_root/scripts/backup/lib/common.sh" +source "$repo_root/scripts/backup/lib/manifest.sh" +source "$repo_root/scripts/backup/lib/personnel_dataset.sh" + +[[ "$(normalize_dataset_profile full)" == "full" ]] || fail "full dataset profile must be accepted" +[[ "$(normalize_dataset_profile personnel)" == "personnel" ]] || fail "personnel dataset profile must be accepted" + +if normalize_dataset_profile unknown >/tmp/baron-sso-dataset-profile.out 2>&1; then + fail "unknown dataset profile must be rejected" +fi +assert_contains "$(cat /tmp/baron-sso-dataset-profile.out)" "unknown backup dataset" + +tmp_dir="$(mktemp -d /tmp/baron-sso-personnel-dataset.XXXXXX)" +trap 'rm -rf "$tmp_dir"' EXIT INT TERM + +create_manifest "$tmp_dir" "maintenance" "postgres ory-postgres" "personnel" +jq -e '.dataset == "personnel" and .environment_scope == "staging-rehearsal"' "$tmp_dir/manifest.json" >/dev/null \ + || fail "personnel manifest must mark the staging rehearsal dataset scope" + +mkdir -p "$tmp_dir/datasets/personnel/reports" +write_personnel_dataset_manifest "$tmp_dir" "postgres ory-postgres" + +dataset_manifest="$tmp_dir/datasets/personnel/dataset-manifest.json" +jq -e ' + .dataset == "personnel" + and (.excluded.databases | index("ory_hydra")) + and (.excluded.tables | index("public.relying_parties")) + and (.excluded.tables | index("public.rp_user_metadata")) + and (.excluded.tables | index("public.client_consents")) + and (.restore_policy.reset_credentials == true) +' "$dataset_manifest" >/dev/null || fail "personnel dataset manifest must document Hydra/RP exclusions and credential reset policy" + +assert_not_exists "$tmp_dir/postgres/ory_hydra.dump" +assert_not_exists "$tmp_dir/postgres/baron.dump" + +dump_dry_run="$( + make --dry-run --always-make -C "$repo_root" dump \ + DUMP_SERVICES="postgres,ory-postgres" \ + DUMP_DATASET="personnel" \ + DUMP_MODE="maintenance" 2>&1 +)" +assert_contains "$dump_dry_run" 'DUMP_DATASET="personnel"' + +restore_dry_run="$( + make --dry-run --always-make -C "$repo_root" restore \ + BACKUP="backups/example" \ + RESTORE_SERVICES="postgres,ory-postgres" \ + RESTORE_DATASET="personnel" \ + CONFIRM_RESTORE="baron-sso" 2>&1 +)" +assert_contains "$restore_dry_run" 'RESTORE_DATASET="personnel"' + +echo "OK: personnel dataset backup policy excludes Hydra/RP data and exposes Makefile controls" diff --git a/test/personnel_dataset_live_e2e_test.sh b/test/personnel_dataset_live_e2e_test.sh new file mode 100644 index 00000000..0114777d --- /dev/null +++ b/test/personnel_dataset_live_e2e_test.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +set -euo pipefail + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +fail() { + echo "ERROR: $*" >&2 + exit 1 +} + +if [[ "${RUN_PERSONNEL_DATASET_LIVE_E2E:-}" != "1" ]]; then + echo "SKIP: set RUN_PERSONNEL_DATASET_LIVE_E2E=1 to run the Docker-backed personnel dataset E2E test" + exit 0 +fi + +tmp_dir="$(mktemp -d /tmp/baron-sso-personnel-live-e2e.XXXXXX)" +trap 'rm -rf "$tmp_dir"' EXIT INT TERM + +backup_dir="$tmp_dir/backup" +filtered_backup_dir="$tmp_dir/filtered-personnel" +source_full_backup="${PERSONNEL_FILTER_SOURCE_BACKUP:-backups/baron-sso-backup-20260622-023904Z}" +restore_report="$tmp_dir/restore-report.json" +restore_exec_report="$tmp_dir/restore-exec-report.json" +restore_db="baron_personnel_restore_e2e_$$" + +BACKUP_USE_DOCKER=false \ +BACKUP="$backup_dir" \ +DUMP_SERVICES="postgres,ory-postgres" \ +DUMP_DATASET="personnel" \ +DUMP_MODE="maintenance" \ +"$repo_root/scripts/backup/dump.sh" + +BACKUP_USE_DOCKER=false \ +BACKUP="$source_full_backup" \ +OUTPUT_BACKUP="$filtered_backup_dir" \ +"$repo_root/scripts/backup/filter_personnel_dump.sh" + +[[ -f "$filtered_backup_dir/datasets/personnel/dataset-manifest.json" ]] || fail "filtered dataset manifest must be created" +[[ ! -f "$filtered_backup_dir/postgres/ory_hydra.dump" ]] || fail "filtered personnel backup must not contain Hydra dump" +[[ ! -f "$filtered_backup_dir/postgres/baron.dump" ]] || fail "filtered personnel backup must not contain full Baron dump" + +filtered_users="$(wc -l <"$filtered_backup_dir/datasets/personnel/postgres/users.jsonl" | tr -d '[:space:]')" +direct_users="$(wc -l <"$backup_dir/datasets/personnel/postgres/users.jsonl" | tr -d '[:space:]')" +[[ "$filtered_users" == "$direct_users" ]] || fail "filtered personnel users count mismatch: got $filtered_users, want $direct_users" + +[[ -f "$backup_dir/datasets/personnel/dataset-manifest.json" ]] || fail "dataset manifest must be created" +[[ -f "$backup_dir/datasets/personnel/postgres/users.jsonl" ]] || fail "users JSONL must be created" +[[ -f "$backup_dir/datasets/personnel/postgres/tenants.jsonl" ]] || fail "tenants JSONL must be created" +[[ ! -f "$backup_dir/postgres/ory_hydra.dump" ]] || fail "Hydra dump must not be created for personnel dataset" +[[ ! -f "$backup_dir/postgres/baron.dump" ]] || fail "full Baron dump must not be created for personnel dataset" + +if grep -R '"password_hash"' "$backup_dir/datasets/personnel/postgres/users.jsonl"; then + fail "personnel users export must not contain password_hash" +fi + +if grep -R '"relying_party_id":"[^"]' "$backup_dir/datasets/personnel/postgres/users.jsonl"; then + fail "personnel users export must not keep RP ownership" +fi + +BACKUP_USE_DOCKER=false \ +BACKUP="$filtered_backup_dir" \ +RESTORE_SERVICES="postgres,ory-postgres" \ +RESTORE_DATASET="personnel" \ +CONFIRM_RESTORE="baron-sso" \ +RESTORE_REPORT="$restore_report" \ +"$repo_root/scripts/backup/restore-plan.sh" + +jq -e ' + .status == "planned" + and .dataset == "personnel" + and (.restore_policy.excluded.databases | index("ory_hydra")) + and (.restore_policy.excluded.tables | index("public.relying_parties")) +' "$restore_report" >/dev/null || fail "restore plan report must describe personnel exclusions" + +docker exec -e PGPASSWORD=password baron_postgres \ + psql -U baron -d postgres -v ON_ERROR_STOP=1 \ + -c "drop database if exists ${restore_db} with (force)" \ + -c "create database ${restore_db}" +docker exec -e PGPASSWORD=password baron_postgres \ + pg_dump -U baron -d baron_sso --schema-only \ + | docker exec -i -e PGPASSWORD=password baron_postgres \ + psql -U baron -d "$restore_db" -v ON_ERROR_STOP=1 >/dev/null + +cleanup_restore_db() { + docker exec -e PGPASSWORD=password baron_postgres \ + psql -U baron -d postgres -v ON_ERROR_STOP=1 \ + -c "drop database if exists ${restore_db} with (force)" >/dev/null || true +} +trap 'cleanup_restore_db; rm -rf "$tmp_dir"' EXIT INT TERM + +BACKUP_USE_DOCKER=false \ +BACKUP="$filtered_backup_dir" \ +RESTORE_SERVICES="postgres" \ +RESTORE_DATASET="personnel" \ +CONFIRM_RESTORE="baron-sso" \ +ALLOW_NON_EMPTY_RESTORE="true" \ +RESTORE_REPORT="$restore_exec_report" \ +DB_NAME="$restore_db" \ +"$repo_root/scripts/backup/restore.sh" + +restored_users="$( + docker exec -e PGPASSWORD=password baron_postgres \ + psql -U baron -d "$restore_db" -Atc "select count(*) from public.users" +)" +source_users="$(wc -l <"$filtered_backup_dir/datasets/personnel/postgres/users.jsonl" | tr -d '[:space:]')" +[[ "$restored_users" == "$source_users" ]] || fail "restored users count mismatch: got $restored_users, want $source_users" + +rp_link_count="$( + docker exec -e PGPASSWORD=password baron_postgres \ + psql -U baron -d "$restore_db" -Atc "select count(*) from public.users where relying_party_id is not null" +)" +[[ "$rp_link_count" == "0" ]] || fail "restored personnel users must not keep relying_party_id" + +jq -e '.status == "succeeded" and .dataset == "personnel"' "$restore_exec_report" >/dev/null \ + || fail "personnel restore execution report must succeed" + +echo "OK: personnel dataset live E2E dump, restore-plan, and scoped restore passed" diff --git a/test/personnel_filter_from_full_dump_policy_test.sh b/test/personnel_filter_from_full_dump_policy_test.sh new file mode 100755 index 00000000..23e9359a --- /dev/null +++ b/test/personnel_filter_from_full_dump_policy_test.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +fail() { + echo "ERROR: $*" >&2 + exit 1 +} + +assert_contains() { + local output="$1" + local expected="$2" + grep -Fq -- "$expected" <<<"$output" || fail "output must contain: $expected" +} + +[[ -f "$repo_root/scripts/backup/filter_personnel_dump.sh" ]] \ + || fail "filter_personnel_dump.sh must exist" +grep -Fq "filter_personnel_dump.sh" "$repo_root/Makefile" \ + || fail "Makefile must expose the personnel filter script" + +dry_run="$( + make --dry-run --always-make -C "$repo_root" filter-personnel-dump \ + BACKUP="backups/full-example" \ + OUTPUT_BACKUP="backups/personnel-example" 2>&1 +)" + +assert_contains "$dry_run" "filter_personnel_dump.sh" +assert_contains "$dry_run" 'BACKUP="backups/full-example"' +assert_contains "$dry_run" 'OUTPUT_BACKUP="backups/personnel-example"' + +echo "OK: personnel filter-from-full dump target is exposed"