백업 테이블 운영

This commit is contained in:
Lectom C Han
2025-12-09 19:29:34 +09:00
parent c4bb3525d3
commit 8762db2c0e
17 changed files with 88913 additions and 13 deletions

2
.gitignore vendored
View File

@@ -1,3 +1,5 @@
update_data
# Go build artifacts
bin/
*.exe

View File

@@ -3,7 +3,7 @@
## Project Structure & Module Organization
- `cmd/server/main.go` is the Fiber entrypoint that wires config, routes, and startup logging.
- `internal/geo` owns GeoLite2 lookups, IP validation, and response shaping.
- `docker-compose.yml` defines the container entry; `Dockerfile` builds a static binary. `GeoLite2-City.mmdb` sits at the repo root and is mounted to `/data/GeoLite2-City.mmdb`.
- `docker-compose.yml` defines the container entry; `Dockerfile` builds a static binary. `GeoLite2-City.mmdb` sits at the repo root and is mounted to `/initial_data/GeoLite2-City.mmdb`.
- Keep `cmd/server` thin; place new logic in `internal/<domain>` with clear boundaries.
## Build, Test, and Development Commands

View File

@@ -9,19 +9,29 @@ RUN go mod download
COPY . .
RUN CGO_ENABLED=0 go build -o /bin/geoip ./cmd/server && \
CGO_ENABLED=0 go build -o /bin/geoip-loader ./cmd/loader
CGO_ENABLED=0 go build -o /bin/geoip-loader ./cmd/loader && \
CGO_ENABLED=0 go build -o /bin/user-program-import ./cmd/user_program_import
FROM debian:trixie-slim
RUN apt-get update && \
apt-get install -y --no-install-recommends mysql-client && \
rm -rf /var/lib/apt/lists/*
RUN useradd --create-home --shell /usr/sbin/nologin appuser
WORKDIR /app
COPY --from=builder /bin/geoip /usr/local/bin/geoip
COPY --from=builder /bin/geoip-loader /usr/local/bin/geoip-loader
COPY GeoLite2-City.mmdb /data/GeoLite2-City.mmdb
COPY --from=builder /bin/user-program-import /usr/local/bin/user-program-import
COPY initial_data /app/initial_data
COPY scripts /app/scripts
RUN mkdir -p /app/update_data /app/log && \
chmod 0755 /app/scripts/dump_and_import.sh && \
chmod -R 0755 /app/scripts
ENV GEOIP_DB_PATH=/data/GeoLite2-City.mmdb
ENV GEOIP_DB_PATH=/app/initial_data/GeoLite2-City.mmdb
USER appuser
EXPOSE 8080

View File

@@ -27,7 +27,7 @@ docker compose up --build
- `postgres` (5432): `Dockerfile.postgres``maxminddb_fdw`를 빌드하여 확장 설치 후 `GeoLite2-City.mmdb`를 FDW로 읽고, 로컬 테이블로 적재합니다. 초기 적재 완료 후 mmdb 없이도 DB에서 조회가 가능합니다.
- `api` (8080): 기본적으로 Postgres 백엔드(`GEOIP_BACKEND=postgres`)를 사용해 조회합니다.
- 볼륨
- `./GeoLite2-City.mmdb:/data/GeoLite2-City.mmdb:ro` (Postgres 초기 적재용)
- `./GeoLite2-City.mmdb:/initial_data/GeoLite2-City.mmdb:ro` (Postgres 초기 적재용)
- `pgdata` (DB 데이터 지속)
## 환경 변수
@@ -35,7 +35,7 @@ docker compose up --build
- `PORT` (기본 `8080`): 서버 리스닝 포트
- `GEOIP_BACKEND` (`mmdb`|`postgres`, 기본 `mmdb`)
- MMDB 모드
- `GEOIP_DB_PATH` (기본 `/data/GeoLite2-City.mmdb`): GeoIP 데이터베이스 경로
- `GEOIP_DB_PATH` (기본 `/initial_data/GeoLite2-City.mmdb`): GeoIP 데이터베이스 경로
- Postgres 모드
- `DATABASE_URL`: 예) `postgres://geoip_readonly:geoip_readonly@postgres:5432/geoip?sslmode=disable`
- `GEOIP_LOOKUP_QUERY` (선택): 기본은 `geoip.lookup_city($1)` 사용

View File

@@ -18,7 +18,7 @@ import (
)
const (
defaultMMDBPath = "/data/GeoLite2-City.mmdb"
defaultMMDBPath = "/initial_data/GeoLite2-City.mmdb"
defaultSchema = "geoip"
defaultLoaderTimeout = 30 * time.Minute
)

View File

@@ -1,19 +1,24 @@
package main
import (
"context"
"errors"
"log"
"net/url"
"os"
"time"
"github.com/gofiber/fiber/v2"
"geoip-rest/internal/geo"
"geoip-rest/internal/schedule"
)
const (
defaultPort = "8080"
defaultDBPath = "/data/GeoLite2-City.mmdb"
defaultDBPath = "/initial_data/GeoLite2-City.mmdb"
defaultCron = ""
defaultScript = "./scripts/dump_and_import.sh"
)
func main() {
@@ -87,6 +92,14 @@ func main() {
log.Printf("using mmdb path %s", dbPath)
}
stopScheduler := maybeStartScheduler()
defer func() {
if stopScheduler != nil {
ctx := stopScheduler()
<-ctx.Done()
}
}()
if err := app.Listen(":" + port); err != nil {
log.Fatalf("server stopped: %v", err)
}
@@ -106,3 +119,33 @@ func sanitizeDBURL(raw string) string {
}
return u.Redacted()
}
func maybeStartScheduler() func() context.Context {
cronExpr := env("USER_PROGRAM_CRON", defaultCron)
if cronExpr == "" {
return nil
}
script := env("USER_PROGRAM_SCRIPT", defaultScript)
sched, err := schedule.Start(schedule.Config{
CronExpr: cronExpr,
ScriptPath: script,
Logger: log.Default(),
})
if err != nil {
log.Printf("scheduler not started (error=%v)", err)
return nil
}
return func() context.Context {
ctx := sched.Stop()
timer := time.NewTimer(2 * time.Second)
select {
case <-ctx.Done():
timer.Stop()
return ctx
case <-timer.C:
return ctx
}
}
}

View File

@@ -0,0 +1,59 @@
package main
import (
"context"
"log"
"os"
"time"
"github.com/jackc/pgx/v5"
"geoip-rest/internal/importer"
)
const (
defaultCSVPath = "./initial_data/user_program_info_init_20251208.csv"
defaultUpdateDir = "./update_data"
defaultTimeout = 10 * time.Minute
defaultSchema = "public"
defaultLogDir = "./log"
targetTableName = "user_program_info_replica"
)
func main() {
dbURL := os.Getenv("DATABASE_URL")
if dbURL == "" {
log.Fatal("DATABASE_URL is required")
}
csvPath := env("USER_PROGRAM_INFO_CSV", defaultCSVPath)
updateDir := env("USER_PROGRAM_UPDATE_DIR", defaultUpdateDir)
schema := env("USER_PROGRAM_INFO_SCHEMA", defaultSchema)
logDir := env("USER_PROGRAM_IMPORT_LOG_DIR", defaultLogDir)
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout)
defer cancel()
conn, err := pgx.Connect(ctx, dbURL)
if err != nil {
log.Fatalf("failed to connect to database: %v", err)
}
defer conn.Close(context.Background())
if err := importer.EnsureUserProgramReplica(ctx, conn, csvPath, schema, logDir); err != nil {
log.Fatalf("failed to ensure %s table: %v", targetTableName, err)
}
if err := importer.ImportUserProgramUpdates(ctx, conn, updateDir, schema, logDir); err != nil {
log.Fatalf("failed to import updates from %s: %v", updateDir, err)
}
log.Printf("%s is ready in schema %s using data from %s (updates: %s)", targetTableName, schema, csvPath, updateDir)
}
func env(key, fallback string) string {
if val := os.Getenv(key); val != "" {
return val
}
return fallback
}

View File

@@ -10,7 +10,7 @@ services:
- "${PORT:-8080}:8080"
environment:
- PORT=${PORT:-8080}
- GEOIP_DB_PATH=${GEOIP_DB_PATH:-/data/GeoLite2-City.mmdb}
- GEOIP_DB_PATH=${GEOIP_DB_PATH:-/app/initial_data/GeoLite2-City.mmdb}
- GEOIP_BACKEND=${GEOIP_BACKEND:-mmdb}
- GEOIP_LOADER_TIMEOUT=${GEOIP_LOADER_TIMEOUT:-30m}
- DATABASE_URL=postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST:-db}:${POSTGRES_PORT:-5432}/${POSTGRES_DB}?sslmode=disable
@@ -26,7 +26,9 @@ services:
exec geoip
'
volumes:
- ./GeoLite2-City.mmdb:/data/GeoLite2-City.mmdb:ro
- ./initial_data:/app/initial_data:ro
- ./update_data:/app/update_data
- ./log:/app/log
networks:
- geo-ip
@@ -41,7 +43,6 @@ services:
ports:
- "${POSTGRES_PORT:-5432}:5432"
volumes:
- ./GeoLite2-City.mmdb:/data/GeoLite2-City.mmdb:ro
- ./deploy/postgres/init:/docker-entrypoint-initdb.d:ro
- postgres_data:/var/lib/postgresql/data
healthcheck:

3
go.mod
View File

@@ -6,6 +6,8 @@ require (
github.com/gofiber/fiber/v2 v2.52.8
github.com/jackc/pgx/v5 v5.7.6
github.com/oschwald/geoip2-golang v1.9.0
github.com/oschwald/maxminddb-golang v1.11.0
github.com/robfig/cron/v3 v3.0.1
)
require (
@@ -18,7 +20,6 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/oschwald/maxminddb-golang v1.11.0 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/fasthttp v1.51.0 // indirect

2
go.sum
View File

@@ -32,6 +32,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=

View File

Before

Width:  |  Height:  |  Size: 60 MiB

After

Width:  |  Height:  |  Size: 60 MiB

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,507 @@
package importer
import (
"context"
"database/sql"
"encoding/csv"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"slices"
"strconv"
"strings"
"time"
"github.com/jackc/pgx/v5"
)
const (
defaultSchema = "public"
replicaTable = "user_program_info_replica"
)
var (
kstLocation = func() *time.Location {
loc, err := time.LoadLocation("Asia/Seoul")
if err != nil {
return time.FixedZone("KST", 9*60*60)
}
return loc
}()
userProgramColumns = []string{
"id",
"product_name",
"login_id",
"user_employee_id",
"login_version",
"login_public_ip",
"login_local_ip",
"user_company",
"user_department",
"user_position",
"user_login_time",
"created_at",
"user_family_flag",
}
timeLayouts = []string{
"2006-01-02 15:04:05.000",
"2006-01-02 15:04:05",
}
)
// EnsureUserProgramReplica ensures the target table exists, then imports one or more CSVs.
// csvPath can point to a single file or a directory (all *.csv will be processed in name order).
// Logs are written to logDir for every processed file.
func EnsureUserProgramReplica(ctx context.Context, conn *pgx.Conn, csvPath, schema, logDir string) error {
if schema == "" {
schema = defaultSchema
}
if logDir == "" {
logDir = "log"
}
if err := createReplicaTable(ctx, conn, schema, replicaTable); err != nil {
return err
}
files, err := resolveCSVTargets(csvPath)
if err != nil {
return err
}
if len(files) == 0 {
return fmt.Errorf("no csv files found at %s", csvPath)
}
for _, file := range files {
if err := importSingle(ctx, conn, file, schema, logDir); err != nil {
return err
}
}
return nil
}
// ImportUserProgramUpdates imports all CSV files under updateDir (non-recursive) into an existing replica table.
// Each file is processed independently; failure stops the sequence and logs the error.
func ImportUserProgramUpdates(ctx context.Context, conn *pgx.Conn, updateDir, schema, logDir string) error {
if updateDir == "" {
return nil
}
files, err := resolveCSVTargets(updateDir)
if err != nil {
return err
}
if len(files) == 0 {
return nil
}
latestDate, err := latestCreatedDate(ctx, conn, schema, replicaTable)
if err != nil {
return err
}
for _, file := range files {
fileDate, err := dateFromFilename(file)
if err != nil {
_ = writeImportLog(logDir, importLog{
StartedAt: time.Now(),
CSVPath: file,
Status: "skipped",
Error: fmt.Sprintf("cannot parse date from filename: %v", err),
LatestDate: latestDate,
})
continue
}
if !fileDate.After(latestDate) {
_ = writeImportLog(logDir, importLog{
StartedAt: time.Now(),
CSVPath: file,
Status: "skipped",
Error: fmt.Sprintf("file date %s not after latest date %s", fileDate.Format("2006-01-02"), latestDate.Format("2006-01-02")),
LatestDate: latestDate,
})
continue
}
if err := importSingle(ctx, conn, file, schema, logDir); err != nil {
return err
}
latestDate = fileDate
}
return nil
}
func tableExists(ctx context.Context, conn *pgx.Conn, schema, table string) (bool, error) {
const q = `
SELECT EXISTS (
SELECT 1
FROM information_schema.tables
WHERE table_schema = $1 AND table_name = $2
);`
var exists bool
if err := conn.QueryRow(ctx, q, schema, table).Scan(&exists); err != nil {
return false, err
}
return exists, nil
}
func createReplicaTable(ctx context.Context, conn *pgx.Conn, schema, table string) error {
identifier := pgx.Identifier{schema, table}.Sanitize()
ddl := fmt.Sprintf(`
CREATE TABLE IF NOT EXISTS %s (
id bigint PRIMARY KEY,
product_name text,
login_id text,
user_employee_id text,
login_version text,
login_public_ip text,
login_local_ip text,
user_company text,
user_department text,
user_position text,
user_login_time timestamp,
created_at timestamp,
user_family_flag boolean
);`, identifier)
_, err := conn.Exec(ctx, ddl)
return err
}
type importResult struct {
rowsCopied int64
rowsUpserted int64
finishedAt time.Time
}
func copyAndUpsertCSV(ctx context.Context, conn *pgx.Conn, path, schema, table string) (importResult, error) {
res := importResult{}
f, err := os.Open(path)
if err != nil {
return res, err
}
defer f.Close()
reader := csv.NewReader(f)
reader.FieldsPerRecord = -1
header, err := reader.Read()
if err != nil {
return res, err
}
if len(header) != len(userProgramColumns) {
return res, fmt.Errorf("unexpected column count in CSV: got %d, want %d", len(header), len(userProgramColumns))
}
tx, err := conn.Begin(ctx)
if err != nil {
return res, err
}
defer func() {
_ = tx.Rollback(ctx)
}()
tempTable := fmt.Sprintf("%s_import_tmp", table)
if _, err := tx.Exec(ctx, fmt.Sprintf(`CREATE TEMP TABLE %s (LIKE %s INCLUDING ALL);`, quoteIdent(tempTable), pgx.Identifier{schema, table}.Sanitize())); err != nil {
return res, err
}
source := &csvSource{
reader: reader,
}
copied, err := tx.CopyFrom(ctx, pgx.Identifier{tempTable}, userProgramColumns, source)
if err != nil {
return res, err
}
if copied == 0 {
return res, errors.New("no rows were copied from CSV")
}
quotedColumns := quoteColumns(userProgramColumns)
upsertSQL := fmt.Sprintf(`
INSERT INTO %s (%s)
SELECT %s FROM %s
ON CONFLICT (id) DO UPDATE SET
product_name = EXCLUDED.product_name,
login_id = EXCLUDED.login_id,
user_employee_id = EXCLUDED.user_employee_id,
login_version = EXCLUDED.login_version,
login_public_ip = EXCLUDED.login_public_ip,
login_local_ip = EXCLUDED.login_local_ip,
user_company = EXCLUDED.user_company,
user_department = EXCLUDED.user_department,
user_position = EXCLUDED.user_position,
user_login_time = EXCLUDED.user_login_time,
created_at = EXCLUDED.created_at,
user_family_flag = EXCLUDED.user_family_flag;
`, pgx.Identifier{schema, table}.Sanitize(), strings.Join(quotedColumns, ", "), strings.Join(quotedColumns, ", "), quoteIdent(tempTable))
upsertRes, err := tx.Exec(ctx, upsertSQL)
if err != nil {
return res, err
}
if err := tx.Commit(ctx); err != nil {
return res, err
}
res.rowsCopied = copied
res.rowsUpserted = upsertRes.RowsAffected()
res.finishedAt = time.Now()
return res, nil
}
type csvSource struct {
reader *csv.Reader
record []string
err error
}
func (s *csvSource) Next() bool {
if s.err != nil {
return false
}
rec, err := s.reader.Read()
if err != nil {
if errors.Is(err, io.EOF) {
return false
}
s.err = err
return false
}
s.record = rec
return true
}
func (s *csvSource) Values() ([]any, error) {
if len(s.record) != len(userProgramColumns) {
return nil, fmt.Errorf("unexpected record length: got %d, want %d", len(s.record), len(userProgramColumns))
}
id, err := strconv.ParseInt(s.record[0], 10, 64)
if err != nil {
return nil, fmt.Errorf("parse id: %w", err)
}
loginTime, err := parseTimestamp(s.record[10])
if err != nil {
return nil, fmt.Errorf("parse user_login_time: %w", err)
}
createdAt, err := parseTimestamp(s.record[11])
if err != nil {
return nil, fmt.Errorf("parse created_at: %w", err)
}
var familyFlag any
if v := s.record[12]; v == "" {
familyFlag = nil
} else {
switch v {
case "1", "true", "TRUE", "t", "T":
familyFlag = true
case "0", "false", "FALSE", "f", "F":
familyFlag = false
default:
parsed, err := strconv.ParseBool(v)
if err != nil {
return nil, fmt.Errorf("parse user_family_flag: %w", err)
}
familyFlag = parsed
}
}
return []any{
id,
nullOrString(s.record[1]),
nullOrString(s.record[2]),
nullOrString(s.record[3]),
nullOrString(s.record[4]),
nullOrString(s.record[5]),
nullOrString(s.record[6]),
nullOrString(s.record[7]),
nullOrString(s.record[8]),
nullOrString(s.record[9]),
loginTime,
createdAt,
familyFlag,
}, nil
}
func (s *csvSource) Err() error {
return s.err
}
func parseTimestamp(raw string) (any, error) {
if raw == "" {
return nil, nil
}
for _, layout := range timeLayouts {
if t, err := time.ParseInLocation(layout, raw, kstLocation); err == nil {
return t, nil
}
}
return nil, fmt.Errorf("unsupported timestamp format: %s", raw)
}
func nullOrString(val string) any {
if val == "" {
return nil
}
return val
}
func importSingle(ctx context.Context, conn *pgx.Conn, csvPath, schema, logDir string) error {
startedAt := time.Now()
res, err := copyAndUpsertCSV(ctx, conn, csvPath, schema, replicaTable)
logStatus := "succeeded"
logErrMsg := ""
if err != nil {
logStatus = "failed"
logErrMsg = err.Error()
}
_ = writeImportLog(logDir, importLog{
StartedAt: startedAt,
FinishedAt: res.finishedAt,
CSVPath: csvPath,
Status: logStatus,
RowsCopied: res.rowsCopied,
RowsUpserted: res.rowsUpserted,
Error: logErrMsg,
})
return err
}
func resolveCSVTargets(path string) ([]string, error) {
info, err := os.Stat(path)
if err != nil {
return nil, err
}
if info.IsDir() {
entries, err := os.ReadDir(path)
if err != nil {
return nil, err
}
var files []string
for _, e := range entries {
if e.IsDir() {
continue
}
if strings.HasSuffix(strings.ToLower(e.Name()), ".csv") {
files = append(files, filepath.Join(path, e.Name()))
}
}
slices.Sort(files)
return files, nil
}
return []string{path}, nil
}
type importLog struct {
StartedAt time.Time
FinishedAt time.Time
CSVPath string
Status string
RowsCopied int64
RowsUpserted int64
Error string
LatestDate time.Time
}
func writeImportLog(logDir string, entry importLog) error {
if err := os.MkdirAll(logDir, 0o755); err != nil {
return err
}
now := time.Now().In(kstLocation)
if entry.StartedAt.IsZero() {
entry.StartedAt = now
}
filename := fmt.Sprintf("user_program_import_%s.log", now.Format("20060102_150405"))
path := filepath.Join(logDir, filename)
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
start := entry.StartedAt.In(kstLocation).Format(time.RFC3339)
finish := ""
if !entry.FinishedAt.IsZero() {
finish = entry.FinishedAt.In(kstLocation).Format(time.RFC3339)
}
lines := []string{
fmt.Sprintf("status=%s", entry.Status),
fmt.Sprintf("csv_path=%s", entry.CSVPath),
fmt.Sprintf("started_at=%s", start),
fmt.Sprintf("finished_at=%s", finish),
fmt.Sprintf("rows_copied=%d", entry.RowsCopied),
fmt.Sprintf("rows_upserted=%d", entry.RowsUpserted),
}
if entry.Error != "" {
lines = append(lines, fmt.Sprintf("error=%s", entry.Error))
}
if !entry.LatestDate.IsZero() {
lines = append(lines, fmt.Sprintf("latest_date=%s", entry.LatestDate.In(kstLocation).Format("2006-01-02")))
}
for _, line := range lines {
if _, err := f.WriteString(line + "\n"); err != nil {
return err
}
}
return nil
}
func quoteIdent(s string) string {
return `"` + strings.ReplaceAll(s, `"`, `""`) + `"`
}
func quoteColumns(cols []string) []string {
out := make([]string, len(cols))
for i, c := range cols {
out[i] = quoteIdent(c)
}
return out
}
func latestCreatedDate(ctx context.Context, conn *pgx.Conn, schema, table string) (time.Time, error) {
var ts sql.NullTime
query := fmt.Sprintf("SELECT MAX(created_at) FROM %s", pgx.Identifier{schema, table}.Sanitize())
if err := conn.QueryRow(ctx, query).Scan(&ts); err != nil {
return time.Time{}, err
}
if !ts.Valid {
return time.Time{}, nil
}
return truncateToKSTDate(ts.Time), nil
}
func truncateToKSTDate(t time.Time) time.Time {
kst := t.In(kstLocation)
return time.Date(kst.Year(), kst.Month(), kst.Day(), 0, 0, 0, 0, kstLocation)
}
func dateFromFilename(path string) (time.Time, error) {
base := filepath.Base(path)
re := regexp.MustCompile(`(\d{8})`)
match := re.FindStringSubmatch(base)
if len(match) < 2 {
return time.Time{}, fmt.Errorf("no date in filename: %s", base)
}
return time.ParseInLocation("20060102", match[1], kstLocation)
}

View File

@@ -0,0 +1,95 @@
package schedule
import (
"context"
"errors"
"log"
"os"
"os/exec"
"time"
"github.com/robfig/cron/v3"
)
type Config struct {
CronExpr string
ScriptPath string
Logger *log.Logger
}
type Scheduler struct {
cron *cron.Cron
logger *log.Logger
}
// Start configures and starts the cron scheduler. It runs the given script at the
// specified cron expression (KST). The caller owns the returned scheduler and must
// call Stop on shutdown.
func Start(cfg Config) (*Scheduler, error) {
if cfg.CronExpr == "" {
return nil, errors.New("CronExpr is required")
}
if cfg.ScriptPath == "" {
return nil, errors.New("ScriptPath is required")
}
if cfg.Logger == nil {
cfg.Logger = log.Default()
}
if _, err := os.Stat(cfg.ScriptPath); err != nil {
return nil, err
}
kst, err := time.LoadLocation("Asia/Seoul")
if err != nil {
kst = time.FixedZone("KST", 9*60*60)
}
parser := cron.NewParser(cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow)
spec, err := parser.Parse(cfg.CronExpr)
if err != nil {
return nil, err
}
c := cron.New(cron.WithLocation(kst), cron.WithParser(parser))
c.Schedule(spec, cron.FuncJob(func() {
runScript(cfg.Logger, cfg.ScriptPath)
}))
c.Start()
cfg.Logger.Printf("scheduler started with cron=%s script=%s tz=%s", cfg.CronExpr, cfg.ScriptPath, kst)
return &Scheduler{
cron: c,
logger: cfg.Logger,
}, nil
}
// Stop halts the scheduler. It does not cancel a currently running job.
func (s *Scheduler) Stop() context.Context {
if s == nil || s.cron == nil {
return context.Background()
}
return s.cron.Stop()
}
func runScript(logger *log.Logger, script string) {
start := time.Now()
logger.Printf("scheduler: running %s", script)
cmd := exec.Command("/bin/bash", script)
cmd.Env = os.Environ()
out, err := cmd.CombinedOutput()
duration := time.Since(start)
if len(out) > 0 {
logger.Printf("scheduler: output:\n%s", string(out))
}
if err != nil {
logger.Printf("scheduler: %s failed after %s: %v", script, duration, err)
return
}
logger.Printf("scheduler: %s completed in %s", script, duration)
}

View File

@@ -0,0 +1,7 @@
status=failed
csv_path=import/user_program_info_20251208.csv
started_at=2025-12-09T18:47:58+09:00
finished_at=
rows_copied=0
rows_upserted=0
error=open import/user_program_info_20251208.csv: no such file or directory

View File

@@ -0,0 +1,67 @@
#!/usr/bin/env bash
set -euo pipefail
LOG_DIR="${USER_PROGRAM_IMPORT_LOG_DIR:-/app/log}"
UPDATE_DIR="${USER_PROGRAM_UPDATE_DIR:-/app/update_data}"
SCHEMA="${USER_PROGRAM_INFO_SCHEMA:-public}"
CSV_DIR="${USER_PROGRAM_INFO_CSV_DIR:-/app/initial_data}"
MYSQL_HOST="${USER_PROGRAM_INFO_HOST:?USER_PROGRAM_INFO_HOST is required}"
MYSQL_PORT="${USER_PROGRAM_INFO_PORT:-3306}"
MYSQL_USER="${USER_PROGRAM_INFO_USERNAME:?USER_PROGRAM_INFO_USERNAME is required}"
MYSQL_PASS="${USER_PROGRAM_INFO_PASSWORD:?USER_PROGRAM_INFO_PASSWORD is required}"
MYSQL_DB="${USER_PROGRAM_INFO_DB:-user_program_info}"
MYSQL_TABLE="${USER_PROGRAM_INFO_TABLE:-user_program_info}"
mkdir -p "${LOG_DIR}" "${UPDATE_DIR}"
# Target date: yesterday in KST unless USER_PROGRAM_TARGET_DATE=YYYY-MM-DD is provided.
TARGET_DATE="${USER_PROGRAM_TARGET_DATE:-$(TZ=Asia/Seoul date -d 'yesterday' +%Y-%m-%d)}"
TARGET_DATE_COMPACT="${TARGET_DATE//-/}"
OUT_FILE="${UPDATE_DIR}/user_program_info_${TARGET_DATE_COMPACT}.csv"
TMP_FILE="${OUT_FILE}.tmp"
QUERY=$(cat <<SQL
SET time_zone = '+00:00';
SELECT
id,
product_name,
login_id,
user_employee_id,
login_version,
login_public_ip,
login_local_ip,
user_company,
user_department,
user_position,
user_login_time,
created_at,
user_family_flag
FROM ${MYSQL_TABLE}
WHERE DATE(CONVERT_TZ(created_at, '+00:00', '+09:00')) = '${TARGET_DATE}';
SQL
)
echo "[scheduler] dumping data for ${TARGET_DATE} to ${OUT_FILE}"
mysql --host="${MYSQL_HOST}" --port="${MYSQL_PORT}" --user="${MYSQL_USER}" --password="${MYSQL_PASS}" \
--database="${MYSQL_DB}" --batch --raw --silent --skip-column-names -e "${QUERY}" \
| python - <<'PY'
import csv, sys, os
out_path = os.environ["TMP_FILE"]
writer = csv.writer(open(out_path, "w", newline=""))
writer.writerow([
"id","product_name","login_id","user_employee_id","login_version",
"login_public_ip","login_local_ip","user_company","user_department",
"user_position","user_login_time","created_at","user_family_flag",
])
for line in sys.stdin:
row = line.rstrip("\n").split("\t")
writer.writerow(row)
PY
mv "${TMP_FILE}" "${OUT_FILE}"
echo "[scheduler] running import for ${OUT_FILE}"
DATABASE_URL="${DATABASE_URL:?DATABASE_URL is required}" USER_PROGRAM_UPDATE_DIR="${UPDATE_DIR}" USER_PROGRAM_IMPORT_LOG_DIR="${LOG_DIR}" \
user-program-import

View File

@@ -1,6 +1,6 @@
# TODO 기록
- 업데이트 시각 (KST): 2025-12-09 13:49:09 KST
- 업데이트 시각 (KST): 2025-12-09 19:28:55 KST
## 완료된 항목
- [x] Go Fiber 기반 GeoIP API 구조 결정 및 엔트리포인트 구현 (`cmd/server`)
@@ -10,6 +10,9 @@
- [x] Dockerfile 빌더/런타임 이미지 1.25.5-trixie로 전환하고 불필요 패키지 제거
- [x] README 작성 및 응답 샘플 추가
- [x] resolver 단위 테스트 추가 (`internal/geo/resolver_test.go`)
- [x] `user_program_info_replica` DDL/CSV 임포터 추가 (`id bigint`, 텍스트 컬럼, timestamp KST 파싱, bool 플래그) 완료: 2025-12-09 18:32 KST
- [x] 초기/일간 CSV 디렉토리 기반 임포트 + 로그 파일 기록(`log/`), upsert 로직 업데이트 완료: 2025-12-09 19:06 KST
- [x] Fiber 프로세스 내 cron 스케줄러 추가(전일 덤프 스크립트 실행 + update_data 적용, KST cron 지원) 완료: 2025-12-09 19:28 KST
## 진행 예정
- [x] PostgreSQL 전용 Docker 이미지(또는 build 단계)에서 `maxminddb_fdw` 설치 후 `GeoLite2-City.mmdb` 볼륨을 `/data`로 마운트하는 `postgres` 서비스 추가 및 5432 외부 노출
@@ -28,3 +31,7 @@
- [ ] compose에서 loader 단독 서비스 제거, api entrypoint에서 loader 실행 → post-start 훅으로 문서화 및 대기 전략 검토
- [ ] Postgres 초기 설정 튜닝: `max_wal_size`를 4GB로 확대해 초기 bulk load 시 checkpoint 난발 방지 (deploy/postgres/init/01_tuning.sql 반영)
- [ ] compose에서 api가 loader 완료 대기 때문에 기동 지연됨 → loader `service_started` 조건으로 완화, 향후 API 기동/데이터 적재 병행 여부 문서화 필요
- [ ] MySQL `user_program_info` 증분 백업 설계: Postgres 백업 테이블 DDL(동일 컬럼, PK=id, `created_at` 인덱스), `login_public_ip varchar(45)`, UTC 기준
- [ ] `sync_meta(last_synced_at)` 테이블 작성 및 워터마크 쿼리 정의: `created_at > last_synced_at - interval '5 minutes'` + `max(created_at)`로 메타 갱신
- [ ] 증분 적재 파이프라인 구현: MySQL pull → Postgres upsert(ON CONFLICT id) 배치 처리, 빈 배치 처리/타임존 변환/정합성 로그
- [ ] 운영 트리거 설계: 15분 cron 기본, API 수동 트리거(health 포함) 여부 결정, 실패 재시도 및 알림 연동