1
0
forked from baron/baron-sso

fix: improve keto sync reliability and initial rebac permissions for super admin

This commit is contained in:
2026-04-06 10:10:27 +09:00
parent bd296f9425
commit 583755c189
11 changed files with 254 additions and 81 deletions

View File

@@ -234,9 +234,9 @@ func main() {
}
// [New] Sync existing data to Keto
if ketoService != nil {
if err := bootstrap.SyncKetoRelations(db, ketoService); err != nil {
slog.Warn("⚠️ Keto synchronization failed during startup", "error", err)
if ketoOutboxRepo != nil {
if err := bootstrap.SyncKetoRelations(db, ketoOutboxRepo); err != nil {
slog.Warn("⚠️ Keto synchronization queueing failed during startup", "error", err)
}
}
}

View File

@@ -2,52 +2,91 @@ package bootstrap
import (
"baron-sso-backend/internal/domain"
"baron-sso-backend/internal/service"
"baron-sso-backend/internal/repository"
"context"
"log/slog"
"gorm.io/gorm"
)
// SyncKetoRelations synchronizes all existing DB users and tenants to Ory Keto.
// SyncKetoRelations synchronizes all existing DB users, tenants and RPs to Ory Keto via Outbox.
// This ensures data consistency for existing data when ReBAC is introduced.
func SyncKetoRelations(db *gorm.DB, keto service.KetoService) error {
slog.Info("🚀 Starting Keto ReBAC relation synchronization...")
func SyncKetoRelations(db *gorm.DB, outbox repository.KetoOutboxRepository) error {
slog.Info("🚀 Starting Keto ReBAC relation synchronization (via Outbox)...")
ctx := context.Background()
// 1. Sync All Tenants (Ensure they exist in Keto if needed)
// 1. Sync All Tenants
var tenants []domain.Tenant
if err := db.Find(&tenants).Error; err != nil {
return err
}
slog.Info("Syncing tenants to Keto", "count", len(tenants))
slog.Info("Syncing tenants to Keto Outbox", "count", len(tenants))
for _, t := range tenants {
// Global Super Admin access to every tenant
_ = outbox.Create(ctx, &domain.KetoOutbox{
Namespace: "Tenant",
Object: t.ID,
Relation: "admins",
Subject: "System:global#super_admins",
Action: domain.KetoOutboxActionCreate,
})
if t.ParentID != nil {
_ = keto.CreateRelation(ctx, "Tenant", t.ID, "parents", "Tenant:"+*t.ParentID)
_ = outbox.Create(ctx, &domain.KetoOutbox{
Namespace: "Tenant",
Object: t.ID,
Relation: "parents",
Subject: "Tenant:" + *t.ParentID,
Action: domain.KetoOutboxActionCreate,
})
}
}
// 2. Sync All Users
// 2. Sync All RelyingParties (if needed)
// Note: We'll need a way to list them from Hydra or local DB if we had them.
// Assuming they are in a table domain.RelyingParty (though it was removed, let's see)
// Actually, the comment said SSOT is Hydra. But we might have them in a local table for metadata.
// If not, we skip for now or fetch from Hydra.
// 3. Sync All Users Roles and Tenant Memberships
var users []domain.User
if err := db.Find(&users).Error; err != nil {
return err
}
slog.Info("Syncing users to Keto", "count", len(users))
slog.Info("Syncing users to Keto Outbox", "count", len(users))
for _, u := range users {
role := domain.NormalizeRole(u.Role)
// Membership
// Tenant Membership
if u.TenantID != nil {
_ = keto.CreateRelation(ctx, "Tenant", *u.TenantID, "members", "User:"+u.ID)
_ = outbox.Create(ctx, &domain.KetoOutbox{
Namespace: "Tenant",
Object: *u.TenantID,
Relation: "members",
Subject: "User:" + u.ID,
Action: domain.KetoOutboxActionCreate,
})
}
// Roles
role := domain.NormalizeRole(u.Role)
if role == domain.RoleSuperAdmin {
_ = keto.CreateRelation(ctx, "System", "global", "super_admins", "User:"+u.ID)
_ = outbox.Create(ctx, &domain.KetoOutbox{
Namespace: "System",
Object: "global",
Relation: "super_admins",
Subject: "User:" + u.ID,
Action: domain.KetoOutboxActionCreate,
})
} else if role == domain.RoleTenantAdmin && u.TenantID != nil {
_ = keto.CreateRelation(ctx, "Tenant", *u.TenantID, "admins", "User:"+u.ID)
_ = outbox.Create(ctx, &domain.KetoOutbox{
Namespace: "Tenant",
Object: *u.TenantID,
Relation: "admins",
Subject: "User:" + u.ID,
Action: domain.KetoOutboxActionCreate,
})
}
}
slog.Info("✅ Keto ReBAC synchronization completed.")
slog.Info("✅ Keto ReBAC synchronization items added to Outbox.")
return nil
}

View File

@@ -363,8 +363,8 @@ func (h *DevHandler) checkAppManagerPermission(c *fiber.Ctx) (bool, error) {
return false, nil
}
// Check with Keto: System:AppManager#member
allowed, err := h.Keto.CheckPermission(c.Context(), subject, "System", "AppManager", "member")
// Check with Keto: System:global#manage_all
allowed, err := h.Keto.CheckPermission(c.Context(), subject, "System", "global", "manage_all")
if err != nil {
// Fail closed for dev private endpoints: deny on permission backend error.
slog.Warn("Dev private permission check failed; denying access", "subject", subject, "error", err)
@@ -442,8 +442,8 @@ func (h *DevHandler) checkAppManagerPermission(c *fiber.Ctx) (bool, error) {
}
}
// Check with Keto: System:AppManager#member
allowed, err := h.Keto.CheckPermission(c.Context(), tokenSubject, "System", "AppManager", "member")
// Check with Keto: System:global#manage_all
allowed, err := h.Keto.CheckPermission(c.Context(), tokenSubject, "System", "global", "manage_all")
if err != nil {
// Fail closed for dev private endpoints: deny on permission backend error.
slog.Warn("Dev private permission check failed; denying access", "subject", tokenSubject, "error", err)

View File

@@ -89,7 +89,7 @@ func TestDevHandler_Isolation(t *testing.T) {
})
app.Get("/api/v1/dev/clients", h.ListClients)
mockKeto.On("CheckPermission", mock.Anything, "user-a", "System", "AppManager", "member").Return(true, nil)
mockKeto.On("CheckPermission", mock.Anything, "user-a", "System", "global", "manage_all").Return(true, nil)
req := httptest.NewRequest(http.MethodGet, "/api/v1/dev/clients", nil)
resp, _ := app.Test(req, -1)

View File

@@ -121,7 +121,7 @@ func TestListClients_Success(t *testing.T) {
})
mockKeto := new(devMockKetoService)
mockKeto.On("CheckPermission", mock.Anything, mock.Anything, "System", "AppManager", "member").Return(true, nil)
mockKeto.On("CheckPermission", mock.Anything, mock.Anything, "System", "global", "manage_all").Return(true, nil)
h := &DevHandler{
Hydra: &service.HydraAdminService{
@@ -235,7 +235,7 @@ func TestListClients_ProtectedSystemClientHidden(t *testing.T) {
})
mockKeto := new(devMockKetoService)
mockKeto.On("CheckPermission", mock.Anything, mock.Anything, "System", "AppManager", "member").Return(true, nil)
mockKeto.On("CheckPermission", mock.Anything, mock.Anything, "System", "global", "manage_all").Return(true, nil)
h := &DevHandler{
Hydra: &service.HydraAdminService{
@@ -276,7 +276,7 @@ func TestListClients_ReservedSystemNameAliasHidden(t *testing.T) {
})
mockKeto := new(devMockKetoService)
mockKeto.On("CheckPermission", mock.Anything, mock.Anything, "System", "AppManager", "member").Return(true, nil)
mockKeto.On("CheckPermission", mock.Anything, mock.Anything, "System", "global", "manage_all").Return(true, nil)
h := &DevHandler{
Hydra: &service.HydraAdminService{
@@ -602,7 +602,7 @@ func TestGetStats_Success(t *testing.T) {
assert.Equal(t, int64(2), res.TotalClients)
assert.Equal(t, int64(7), res.AuthFailures)
assert.Equal(t, int64(3), res.ActiveSessions)
mockKeto.AssertNotCalled(t, "CheckPermission", mock.Anything, mock.Anything, "System", "AppManager", "member")
mockKeto.AssertNotCalled(t, "CheckPermission", mock.Anything, mock.Anything, "System", "global", "manage_all")
}
func TestDevHandler_NoAuditNoAction(t *testing.T) {

View File

@@ -106,27 +106,39 @@ func (s *ketoService) CheckPermission(ctx context.Context, subject, namespace, o
q.Set("subject_id", subject)
u.RawQuery = q.Encode()
req, _ := http.NewRequestWithContext(ctx, "GET", u.String(), nil)
resp, err := s.client.Do(req)
if err != nil {
return false, err
}
defer resp.Body.Close()
var lastErr error
maxRetries := 5
backoff := 200 * time.Millisecond
if resp.StatusCode == http.StatusForbidden {
return false, nil
}
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return false, fmt.Errorf("keto returned status %d: %s", resp.StatusCode, string(body))
for i := 0; i < maxRetries; i++ {
req, _ := http.NewRequestWithContext(ctx, "GET", u.String(), nil)
resp, err := s.client.Do(req)
if err == nil {
defer resp.Body.Close()
if resp.StatusCode == http.StatusOK {
var res checkResponse
if err := json.NewDecoder(resp.Body).Decode(&res); err != nil {
return false, err
}
return res.Allowed, nil
}
if resp.StatusCode == http.StatusForbidden {
return false, nil
}
body, _ := io.ReadAll(resp.Body)
lastErr = fmt.Errorf("keto returned status %d: %s", resp.StatusCode, string(body))
} else {
lastErr = err
}
if i < maxRetries-1 {
slog.Debug("Retrying Keto CheckPermission...", "attempt", i+1, "error", lastErr)
time.Sleep(backoff)
backoff *= 2
}
}
var res checkResponse
if err := json.NewDecoder(resp.Body).Decode(&res); err != nil {
return false, err
}
return res.Allowed, nil
return false, lastErr
}
func (s *ketoService) CreateRelation(ctx context.Context, namespace, object, relation, subject string) error {
@@ -141,8 +153,8 @@ func (s *ketoService) CreateRelation(ctx context.Context, namespace, object, rel
// Exponential Backoff Retry Logic
var lastErr error
maxRetries := 3
backoff := 100 * time.Millisecond
maxRetries := 5
backoff := 200 * time.Millisecond
for i := 0; i < maxRetries; i++ {
req, _ := http.NewRequestWithContext(ctx, "PUT", u, bytes.NewReader(body))
@@ -152,7 +164,7 @@ func (s *ketoService) CreateRelation(ctx context.Context, namespace, object, rel
if err == nil {
defer resp.Body.Close()
if resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusNoContent || resp.StatusCode == http.StatusOK {
slog.Info("Keto relation created", "namespace", namespace, "object", object, "relation", relation, "subject", subject)
slog.Debug("Keto relation created", "namespace", namespace, "object", object, "relation", relation, "subject", subject)
return nil
}
resBody, _ := io.ReadAll(resp.Body)
@@ -161,11 +173,14 @@ func (s *ketoService) CreateRelation(ctx context.Context, namespace, object, rel
lastErr = err
}
time.Sleep(backoff)
backoff *= 2
if i < maxRetries-1 {
slog.Debug("Retrying Keto CreateRelation...", "attempt", i+1, "error", lastErr)
time.Sleep(backoff)
backoff *= 2
}
}
slog.Error("Keto create relation failed after retries", "error", lastErr)
slog.Error("Keto create relation failed after retries", "error", lastErr, "namespace", namespace, "object", object, "relation", relation, "subject", subject)
return lastErr
}
@@ -178,20 +193,34 @@ func (s *ketoService) DeleteRelation(ctx context.Context, namespace, object, rel
q.Set("subject_id", subject)
u.RawQuery = q.Encode()
req, _ := http.NewRequestWithContext(ctx, "DELETE", u.String(), nil)
resp, err := s.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
var lastErr error
maxRetries := 5
backoff := 200 * time.Millisecond
if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
resBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("keto returned status %d: %s", resp.StatusCode, string(resBody))
for i := 0; i < maxRetries; i++ {
req, _ := http.NewRequestWithContext(ctx, "DELETE", u.String(), nil)
resp, err := s.client.Do(req)
if err == nil {
defer resp.Body.Close()
if resp.StatusCode == http.StatusNoContent || resp.StatusCode == http.StatusOK {
slog.Debug("Keto relation deleted", "namespace", namespace, "object", object, "relation", relation, "subject", subject)
return nil
}
resBody, _ := io.ReadAll(resp.Body)
lastErr = fmt.Errorf("keto returned status %d: %s", resp.StatusCode, string(resBody))
} else {
lastErr = err
}
if i < maxRetries-1 {
slog.Debug("Retrying Keto DeleteRelation...", "attempt", i+1, "error", lastErr)
time.Sleep(backoff)
backoff *= 2
}
}
slog.Info("Keto relation deleted", "namespace", namespace, "object", object, "relation", relation, "subject", subject)
return nil
slog.Error("Keto delete relation failed after retries", "error", lastErr)
return lastErr
}
func (s *ketoService) ListObjects(ctx context.Context, namespace, relation, subject string) ([]string, error) {

View File

@@ -120,6 +120,15 @@ func (s *tenantService) RegisterTenant(ctx context.Context, name, slug, tenantTy
// [Keto] Sync hierarchy and ownership via Outbox
if s.outboxRepo != nil {
// Global Super Admin access to every tenant
_ = s.outboxRepo.Create(ctx, &domain.KetoOutbox{
Namespace: "Tenant",
Object: tenant.ID,
Relation: "admins",
Subject: "System:global#super_admins",
Action: domain.KetoOutboxActionCreate,
})
// Sync hierarchy
if tenant.ParentID != nil {
if err := s.outboxRepo.Create(ctx, &domain.KetoOutbox{
@@ -198,6 +207,17 @@ func (s *tenantService) RequestRegistration(ctx context.Context, name, slug, des
return nil, err
}
// [Keto] Global Super Admin access to every tenant (even pending ones)
if s.outboxRepo != nil {
_ = s.outboxRepo.Create(ctx, &domain.KetoOutbox{
Namespace: "Tenant",
Object: tenant.ID,
Relation: "admins",
Subject: "System:global#super_admins",
Action: domain.KetoOutboxActionCreate,
})
}
// Add Domain as unverified
if err := s.repo.AddDomain(ctx, tenant.ID, domainName, false); err != nil {
return nil, err

View File

@@ -32,7 +32,7 @@ test.describe("DevFront security and isolation", () => {
).toBeVisible();
});
test("RBAC: non-AppManager user should not see private apps", async ({
test("RBAC: user without manage_all permission should not see private apps", async ({
page,
}) => {
const state = {

View File

@@ -2,11 +2,23 @@ import { Namespace, Subject, Context, SubjectSet } from "@ory/keto-definitions"
class User implements Namespace {}
class System implements Namespace {
related: {
super_admins: User[]
authenticated_users: User[]
}
permits = {
manage_all: (ctx: Context): boolean =>
this.related.super_admins.includes(ctx.subject)
}
}
class Tenant implements Namespace {
related: {
owners: User[]
admins: User[]
members: User[]
owners: (User | SubjectSet<System, "super_admins">)[]
admins: (User | SubjectSet<System, "super_admins">)[]
members: (User | SubjectSet<System, "super_admins"> | SubjectSet<Tenant, "admins"> | SubjectSet<Tenant, "owners">)[]
parents: Tenant[]
}
@@ -33,9 +45,9 @@ class Tenant implements Namespace {
class RelyingParty implements Namespace {
related: {
admins: User[]
admins: (User | SubjectSet<System, "super_admins"> | SubjectSet<Tenant, "admins"> | SubjectSet<Tenant, "owners">)[]
parents: Tenant[]
access: (User | SubjectSet<Tenant, "members"> | SubjectSet<System, "authenticated_users">)[]
access: (User | SubjectSet<Tenant, "members"> | SubjectSet<System, "authenticated_users"> | SubjectSet<System, "super_admins">)[]
}
permits = {
@@ -52,15 +64,3 @@ class RelyingParty implements Namespace {
this.permits.manage(ctx)
}
}
class System implements Namespace {
related: {
super_admins: User[]
authenticated_users: User[]
}
permits = {
manage_all: (ctx: Context): boolean =>
this.related.super_admins.includes(ctx.subject)
}
}

View File

@@ -0,0 +1,85 @@
# 스테이징 배포 및 DB 초기화 프로세스 분석
현재 Gitea Actions(`staging_release.yml`)를 통해 스테이징 서버에 배포가 진행될 때 발생하는 **데이터 초기화(Wipe)****초기 관리자 계정 생성(Seed)** 과정을 설명하는 다이어그램입니다.
---
## 1. 스테이징 배포 파이프라인 (데이터가 날아가는 이유)
배포 스크립트에 포함된 `docker compose down -v` 명령어의 `-v` 옵션으로 인해, 컨테이너가 내려갈 때 영구 저장소(Volumes)까지 통째로 삭제되는 흐름입니다.
```mermaid
graph TD
Start[Gitea Action 수동 실행<br/>Release Baron SSO to Staging] --> SSH(Staging 서버 SSH 접속)
SSH --> Env[최신 환경변수 및 .env 파일 생성]
Env --> Pull[docker compose pull<br/>최신 이미지 다운로드]
Pull --> DownV{docker compose down -v}
style DownV fill:#ffebee,stroke:#ff0000,stroke-width:2px
DownV -->|1. 컨테이너 종료| StopC[Backend, Frontend, Kratos 등<br/>모든 컨테이너 중지]
DownV -->|2. 볼륨 완전 삭제| WipeDB[(데이터베이스 볼륨 파괴<br/>postgres_data<br/>ory_postgres_data<br/>clickhouse_data)]
StopC --> Up[docker compose up -d]
WipeDB --> Up
Up --> CleanState[새로운 컨테이너 시작<br/>완전히 텅 빈 Clean DB 상태]
```
**📌 분석 포인트:**
* 배포할 때마다 붉은색으로 칠해진 `down -v` 단계가 실행됩니다.
* 이 단계에서 기존에 생성해두었던 **테넌트, 일반 유저, 조직도, 권한 등 모든 데이터가 공장 초기화**됩니다. (Dev 서버와 DB를 공유하지 않습니다)
---
## 2. 백엔드 Bootstrap (어드민 계정이 새로 생성되는 이유)
데이터베이스가 완전히 텅 빈 상태로 컨테이너가 새로 시작된 직후, 백엔드 서버가 부팅되면서 `.env`에 정의된 시스템 관리자 계정을 자동으로 생성(Seed)하는 흐름입니다.
```mermaid
sequenceDiagram
autonumber
participant Docker as Staging Server
participant BE as Backend (kratos_seed.go)
participant Kratos as Ory Kratos (DB)
participant Keto as Ory Keto (RBAC)
Docker->>BE: 1. 컨테이너 시작 (Bootstrapping)
activate BE
Note over BE: 백엔드 서버 구동 전 초기화 스크립트 실행
BE->>BE: 2. .env 읽기<br/>(ADMIN_EMAIL, ADMIN_PASSWORD)
BE->>Kratos: 3. CreateUser API 호출<br/>(email, password, role: "super_admin")
activate Kratos
Note over Kratos: 텅 빈 DB에<br/>최초의 계정 1개 생성
Kratos-->>BE: 4. Identity ID 반환
deactivate Kratos
BE->>Keto: 5. 권한 동기화 API 호출<br/>(System 네임스페이스에 super_admin 할당)
activate Keto
Keto-->>BE: 6. 권한 부여 완료
deactivate Keto
Note over BE: 백엔드 서버 HTTP 요청 수신 준비 완료
deactivate BE
```
**📌 분석 포인트:**
* 이전 단계에서 DB가 모두 날아갔기 때문에 기존 계정은 하나도 없습니다.
* 하지만 백엔드가 구동되면서 **(3)번 과정**을 통해 Gitea Secrets에 저장된 `STG_ADMIN_PASSWORD` 정보로 **가장 권한이 높은 슈퍼 어드민 계정 단 1개**를 Kratos DB에 밀어 넣습니다.
* 그래서 방금 전 배포가 끝난 스테이징 서버에 들어가면, 예전에 만든 데이터는 없지만 **이 스크립트가 방금 만들어준 어드민 계정으로는 로그인이 성공**하게 되는 것입니다.
---
### 💡 (참고) 데이터를 유지하고 싶다면?
스테이징 배포 시마다 데이터가 날아가는 것을 방지하려면, `.gitea/workflows/staging_release.yml` 파일 내부의 배포 스크립트에서 `-v` 옵션을 제거해야 합니다.
```bash
# 수정 전 (데이터 완전 삭제)
docker compose -f compose.infra.yml -f compose.ory.yml -f docker-compose.yml down -v || true
# 수정 후 (컨테이너만 재시작, DB 볼륨 유지)
docker compose -f compose.infra.yml -f compose.ory.yml -f docker-compose.yml down || true
```