1
0
forked from baron/baron-sso

Add personnel dataset backup filtering

This commit is contained in:
2026-06-22 12:38:29 +09:00
parent 95485632a8
commit 1351c981a8
13 changed files with 1031 additions and 45 deletions

View File

@@ -23,11 +23,11 @@ assert_contains "$help_output" "Targets:"
assert_contains "$help_output" "Options:"
assert_contains "$help_output" "Restore Safety:"
for target in up dev code-check dump restore-plan code-check-userfront-e2e-tests; do
for target in up dev code-check dump filter-personnel-dump restore-plan code-check-userfront-e2e-tests; do
assert_contains "$help_output" "$target"
done
for option in DEV_SERVICES CODE_CHECK_TEST_JOBS PLAYWRIGHT_WORKERS BACKUP_USE_DOCKER DUMP_SERVICES RESTORE_SERVICES; do
for option in DEV_SERVICES CODE_CHECK_TEST_JOBS PLAYWRIGHT_WORKERS BACKUP_USE_DOCKER DUMP_SERVICES RESTORE_SERVICES DUMP_DATASET RESTORE_DATASET; do
assert_contains "$help_output" "$option"
done

View File

@@ -0,0 +1,74 @@
#!/usr/bin/env bash
set -euo pipefail
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
fail() {
echo "ERROR: $*" >&2
exit 1
}
assert_contains() {
local output="$1"
local expected="$2"
grep -Fq -- "$expected" <<<"$output" || fail "output must contain: $expected"
}
assert_not_exists() {
local path="$1"
[[ ! -e "$path" ]] || fail "path must not exist: $path"
}
source "$repo_root/scripts/backup/lib/common.sh"
source "$repo_root/scripts/backup/lib/manifest.sh"
source "$repo_root/scripts/backup/lib/personnel_dataset.sh"
[[ "$(normalize_dataset_profile full)" == "full" ]] || fail "full dataset profile must be accepted"
[[ "$(normalize_dataset_profile personnel)" == "personnel" ]] || fail "personnel dataset profile must be accepted"
if normalize_dataset_profile unknown >/tmp/baron-sso-dataset-profile.out 2>&1; then
fail "unknown dataset profile must be rejected"
fi
assert_contains "$(cat /tmp/baron-sso-dataset-profile.out)" "unknown backup dataset"
tmp_dir="$(mktemp -d /tmp/baron-sso-personnel-dataset.XXXXXX)"
trap 'rm -rf "$tmp_dir"' EXIT INT TERM
create_manifest "$tmp_dir" "maintenance" "postgres ory-postgres" "personnel"
jq -e '.dataset == "personnel" and .environment_scope == "staging-rehearsal"' "$tmp_dir/manifest.json" >/dev/null \
|| fail "personnel manifest must mark the staging rehearsal dataset scope"
mkdir -p "$tmp_dir/datasets/personnel/reports"
write_personnel_dataset_manifest "$tmp_dir" "postgres ory-postgres"
dataset_manifest="$tmp_dir/datasets/personnel/dataset-manifest.json"
jq -e '
.dataset == "personnel"
and (.excluded.databases | index("ory_hydra"))
and (.excluded.tables | index("public.relying_parties"))
and (.excluded.tables | index("public.rp_user_metadata"))
and (.excluded.tables | index("public.client_consents"))
and (.restore_policy.reset_credentials == true)
' "$dataset_manifest" >/dev/null || fail "personnel dataset manifest must document Hydra/RP exclusions and credential reset policy"
assert_not_exists "$tmp_dir/postgres/ory_hydra.dump"
assert_not_exists "$tmp_dir/postgres/baron.dump"
dump_dry_run="$(
make --dry-run --always-make -C "$repo_root" dump \
DUMP_SERVICES="postgres,ory-postgres" \
DUMP_DATASET="personnel" \
DUMP_MODE="maintenance" 2>&1
)"
assert_contains "$dump_dry_run" 'DUMP_DATASET="personnel"'
restore_dry_run="$(
make --dry-run --always-make -C "$repo_root" restore \
BACKUP="backups/example" \
RESTORE_SERVICES="postgres,ory-postgres" \
RESTORE_DATASET="personnel" \
CONFIRM_RESTORE="baron-sso" 2>&1
)"
assert_contains "$restore_dry_run" 'RESTORE_DATASET="personnel"'
echo "OK: personnel dataset backup policy excludes Hydra/RP data and exposes Makefile controls"

View File

@@ -0,0 +1,117 @@
#!/usr/bin/env bash
set -euo pipefail
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
fail() {
echo "ERROR: $*" >&2
exit 1
}
if [[ "${RUN_PERSONNEL_DATASET_LIVE_E2E:-}" != "1" ]]; then
echo "SKIP: set RUN_PERSONNEL_DATASET_LIVE_E2E=1 to run the Docker-backed personnel dataset E2E test"
exit 0
fi
tmp_dir="$(mktemp -d /tmp/baron-sso-personnel-live-e2e.XXXXXX)"
trap 'rm -rf "$tmp_dir"' EXIT INT TERM
backup_dir="$tmp_dir/backup"
filtered_backup_dir="$tmp_dir/filtered-personnel"
source_full_backup="${PERSONNEL_FILTER_SOURCE_BACKUP:-backups/baron-sso-backup-20260622-023904Z}"
restore_report="$tmp_dir/restore-report.json"
restore_exec_report="$tmp_dir/restore-exec-report.json"
restore_db="baron_personnel_restore_e2e_$$"
BACKUP_USE_DOCKER=false \
BACKUP="$backup_dir" \
DUMP_SERVICES="postgres,ory-postgres" \
DUMP_DATASET="personnel" \
DUMP_MODE="maintenance" \
"$repo_root/scripts/backup/dump.sh"
BACKUP_USE_DOCKER=false \
BACKUP="$source_full_backup" \
OUTPUT_BACKUP="$filtered_backup_dir" \
"$repo_root/scripts/backup/filter_personnel_dump.sh"
[[ -f "$filtered_backup_dir/datasets/personnel/dataset-manifest.json" ]] || fail "filtered dataset manifest must be created"
[[ ! -f "$filtered_backup_dir/postgres/ory_hydra.dump" ]] || fail "filtered personnel backup must not contain Hydra dump"
[[ ! -f "$filtered_backup_dir/postgres/baron.dump" ]] || fail "filtered personnel backup must not contain full Baron dump"
filtered_users="$(wc -l <"$filtered_backup_dir/datasets/personnel/postgres/users.jsonl" | tr -d '[:space:]')"
direct_users="$(wc -l <"$backup_dir/datasets/personnel/postgres/users.jsonl" | tr -d '[:space:]')"
[[ "$filtered_users" == "$direct_users" ]] || fail "filtered personnel users count mismatch: got $filtered_users, want $direct_users"
[[ -f "$backup_dir/datasets/personnel/dataset-manifest.json" ]] || fail "dataset manifest must be created"
[[ -f "$backup_dir/datasets/personnel/postgres/users.jsonl" ]] || fail "users JSONL must be created"
[[ -f "$backup_dir/datasets/personnel/postgres/tenants.jsonl" ]] || fail "tenants JSONL must be created"
[[ ! -f "$backup_dir/postgres/ory_hydra.dump" ]] || fail "Hydra dump must not be created for personnel dataset"
[[ ! -f "$backup_dir/postgres/baron.dump" ]] || fail "full Baron dump must not be created for personnel dataset"
if grep -R '"password_hash"' "$backup_dir/datasets/personnel/postgres/users.jsonl"; then
fail "personnel users export must not contain password_hash"
fi
if grep -R '"relying_party_id":"[^"]' "$backup_dir/datasets/personnel/postgres/users.jsonl"; then
fail "personnel users export must not keep RP ownership"
fi
BACKUP_USE_DOCKER=false \
BACKUP="$filtered_backup_dir" \
RESTORE_SERVICES="postgres,ory-postgres" \
RESTORE_DATASET="personnel" \
CONFIRM_RESTORE="baron-sso" \
RESTORE_REPORT="$restore_report" \
"$repo_root/scripts/backup/restore-plan.sh"
jq -e '
.status == "planned"
and .dataset == "personnel"
and (.restore_policy.excluded.databases | index("ory_hydra"))
and (.restore_policy.excluded.tables | index("public.relying_parties"))
' "$restore_report" >/dev/null || fail "restore plan report must describe personnel exclusions"
docker exec -e PGPASSWORD=password baron_postgres \
psql -U baron -d postgres -v ON_ERROR_STOP=1 \
-c "drop database if exists ${restore_db} with (force)" \
-c "create database ${restore_db}"
docker exec -e PGPASSWORD=password baron_postgres \
pg_dump -U baron -d baron_sso --schema-only \
| docker exec -i -e PGPASSWORD=password baron_postgres \
psql -U baron -d "$restore_db" -v ON_ERROR_STOP=1 >/dev/null
cleanup_restore_db() {
docker exec -e PGPASSWORD=password baron_postgres \
psql -U baron -d postgres -v ON_ERROR_STOP=1 \
-c "drop database if exists ${restore_db} with (force)" >/dev/null || true
}
trap 'cleanup_restore_db; rm -rf "$tmp_dir"' EXIT INT TERM
BACKUP_USE_DOCKER=false \
BACKUP="$filtered_backup_dir" \
RESTORE_SERVICES="postgres" \
RESTORE_DATASET="personnel" \
CONFIRM_RESTORE="baron-sso" \
ALLOW_NON_EMPTY_RESTORE="true" \
RESTORE_REPORT="$restore_exec_report" \
DB_NAME="$restore_db" \
"$repo_root/scripts/backup/restore.sh"
restored_users="$(
docker exec -e PGPASSWORD=password baron_postgres \
psql -U baron -d "$restore_db" -Atc "select count(*) from public.users"
)"
source_users="$(wc -l <"$filtered_backup_dir/datasets/personnel/postgres/users.jsonl" | tr -d '[:space:]')"
[[ "$restored_users" == "$source_users" ]] || fail "restored users count mismatch: got $restored_users, want $source_users"
rp_link_count="$(
docker exec -e PGPASSWORD=password baron_postgres \
psql -U baron -d "$restore_db" -Atc "select count(*) from public.users where relying_party_id is not null"
)"
[[ "$rp_link_count" == "0" ]] || fail "restored personnel users must not keep relying_party_id"
jq -e '.status == "succeeded" and .dataset == "personnel"' "$restore_exec_report" >/dev/null \
|| fail "personnel restore execution report must succeed"
echo "OK: personnel dataset live E2E dump, restore-plan, and scoped restore passed"

View File

@@ -0,0 +1,32 @@
#!/usr/bin/env bash
set -euo pipefail
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
fail() {
echo "ERROR: $*" >&2
exit 1
}
assert_contains() {
local output="$1"
local expected="$2"
grep -Fq -- "$expected" <<<"$output" || fail "output must contain: $expected"
}
[[ -f "$repo_root/scripts/backup/filter_personnel_dump.sh" ]] \
|| fail "filter_personnel_dump.sh must exist"
grep -Fq "filter_personnel_dump.sh" "$repo_root/Makefile" \
|| fail "Makefile must expose the personnel filter script"
dry_run="$(
make --dry-run --always-make -C "$repo_root" filter-personnel-dump \
BACKUP="backups/full-example" \
OUTPUT_BACKUP="backups/personnel-example" 2>&1
)"
assert_contains "$dry_run" "filter_personnel_dump.sh"
assert_contains "$dry_run" 'BACKUP="backups/full-example"'
assert_contains "$dry_run" 'OUTPUT_BACKUP="backups/personnel-example"'
echo "OK: personnel filter-from-full dump target is exposed"