경로 통일

This commit is contained in:
Lectom C Han
2025-12-10 10:46:21 +09:00
parent b1a9204e22
commit 199bc29115
11 changed files with 612 additions and 359 deletions

View File

@@ -0,0 +1,35 @@
package importer
import (
"context"
"database/sql"
"fmt"
"github.com/jackc/pgx/v5"
)
// LatestID returns the maximum id in the replica table.
func LatestID(ctx context.Context, conn *pgx.Conn, schema, table string) (int64, error) {
var id sql.NullInt64
query := fmt.Sprintf("SELECT MAX(id) FROM %s", pgx.Identifier{schema, table}.Sanitize())
if err := conn.QueryRow(ctx, query).Scan(&id); err != nil {
return 0, err
}
if !id.Valid {
return 0, nil
}
return id.Int64, nil
}
// CountUpToID returns the number of rows with id <= maxID.
func CountUpToID(ctx context.Context, conn *pgx.Conn, schema, table string, maxID int64) (int64, error) {
var count sql.NullInt64
query := fmt.Sprintf("SELECT COUNT(*) FROM %s WHERE id <= $1", pgx.Identifier{schema, table}.Sanitize())
if err := conn.QueryRow(ctx, query, maxID).Scan(&count); err != nil {
return 0, err
}
if !count.Valid {
return 0, nil
}
return count.Int64, nil
}

View File

@@ -20,7 +20,7 @@ import (
const (
defaultSchema = "public"
replicaTable = "user_program_info_replica"
ReplicaTable = "user_program_info_replica"
)
var (
@@ -63,7 +63,7 @@ func EnsureUserProgramReplica(ctx context.Context, conn *pgx.Conn, csvPath, sche
logDir = "log"
}
if err := createReplicaTable(ctx, conn, schema, replicaTable); err != nil {
if err := createReplicaTable(ctx, conn, schema, ReplicaTable); err != nil {
return err
}
@@ -97,38 +97,10 @@ func ImportUserProgramUpdates(ctx context.Context, conn *pgx.Conn, updateDir, sc
return nil
}
latestDate, err := latestCreatedDate(ctx, conn, schema, replicaTable)
if err != nil {
return err
}
for _, file := range files {
fileDate, err := dateFromFilename(file)
if err != nil {
_ = writeImportLog(logDir, importLog{
StartedAt: time.Now(),
CSVPath: file,
Status: "skipped",
Error: fmt.Sprintf("cannot parse date from filename: %v", err),
LatestDate: latestDate,
})
continue
}
if !fileDate.After(latestDate) {
_ = writeImportLog(logDir, importLog{
StartedAt: time.Now(),
CSVPath: file,
Status: "skipped",
Error: fmt.Sprintf("file date %s not after latest date %s", fileDate.Format("2006-01-02"), latestDate.Format("2006-01-02")),
LatestDate: latestDate,
})
continue
}
if err := importSingle(ctx, conn, file, schema, logDir); err != nil {
return err
}
latestDate = fileDate
}
return nil
}
@@ -362,7 +334,7 @@ func nullOrString(val string) any {
func importSingle(ctx context.Context, conn *pgx.Conn, csvPath, schema, logDir string) error {
startedAt := time.Now()
res, err := copyAndUpsertCSV(ctx, conn, csvPath, schema, replicaTable)
res, err := copyAndUpsertCSV(ctx, conn, csvPath, schema, ReplicaTable)
logStatus := "succeeded"
logErrMsg := ""
if err != nil {
@@ -479,7 +451,7 @@ func quoteColumns(cols []string) []string {
return out
}
func latestCreatedDate(ctx context.Context, conn *pgx.Conn, schema, table string) (time.Time, error) {
func LatestCreatedDate(ctx context.Context, conn *pgx.Conn, schema, table string) (time.Time, error) {
var ts sql.NullTime
query := fmt.Sprintf("SELECT MAX(created_at) FROM %s", pgx.Identifier{schema, table}.Sanitize())
if err := conn.QueryRow(ctx, query).Scan(&ts); err != nil {

View File

@@ -0,0 +1,138 @@
package userprogram
import (
"fmt"
"os"
"path/filepath"
"regexp"
"strconv"
"time"
)
const (
DefaultUpdateDir = "/update_data"
DefaultLogDir = "/log"
DefaultSchema = "public"
DefaultInitialCSV = "/initial_data/user_program_info_init_20251208.csv"
DefaultTable = "user_program_info"
DefaultDatabase = "user_program_info"
defaultTargetRange = "20060102"
)
type MySQLConfig struct {
Host string
Port int
User string
Password string
Database string
Table string
}
type Paths struct {
UpdateDir string
LogDir string
InitialCSV string
Schema string
}
func NewMySQLConfigFromEnv() (MySQLConfig, error) {
port, err := strconv.Atoi(env("USER_PROGRAM_INFO_PORT", "3306"))
if err != nil {
return MySQLConfig{}, fmt.Errorf("invalid USER_PROGRAM_INFO_PORT: %w", err)
}
host, err := envRequiredValue("USER_PROGRAM_INFO_HOST")
if err != nil {
return MySQLConfig{}, err
}
user, err := envRequiredValue("USER_PROGRAM_INFO_USERNAME")
if err != nil {
return MySQLConfig{}, err
}
password, err := envRequiredValue("USER_PROGRAM_INFO_PASSWORD")
if err != nil {
return MySQLConfig{}, err
}
cfg := MySQLConfig{
Host: host,
Port: port,
User: user,
Password: password,
Database: env("USER_PROGRAM_INFO_DB", DefaultDatabase),
Table: env("USER_PROGRAM_INFO_TABLE", DefaultTable),
}
if cfg.Host == "" || cfg.User == "" || cfg.Password == "" {
return MySQLConfig{}, fmt.Errorf("mysql connection envs are required")
}
return cfg, nil
}
func NewPathsFromEnv() (Paths, error) {
paths := Paths{
UpdateDir: env("USER_PROGRAM_UPDATE_DIR", DefaultUpdateDir),
LogDir: env("USER_PROGRAM_IMPORT_LOG_DIR", DefaultLogDir),
InitialCSV: env("USER_PROGRAM_INFO_CSV", DefaultInitialCSV),
Schema: env("USER_PROGRAM_INFO_SCHEMA", DefaultSchema),
}
for _, dir := range []string{paths.UpdateDir, paths.LogDir} {
if dir == "" {
continue
}
if err := os.MkdirAll(dir, 0o755); err != nil {
return Paths{}, fmt.Errorf("create dir %s: %w", dir, err)
}
}
return paths, nil
}
func ParseTargetDate(raw string) (time.Time, error) {
if raw == "" {
return yesterdayKST(), nil
}
t, err := time.ParseInLocation("2006-01-02", raw, kst())
if err != nil {
return time.Time{}, fmt.Errorf("invalid date %q (expected YYYY-MM-DD)", raw)
}
return t, nil
}
func DateFromFilename(path string) (time.Time, error) {
base := filepath.Base(path)
re := regexp.MustCompile(`(\d{8})`)
match := re.FindStringSubmatch(base)
if len(match) < 2 {
return time.Time{}, fmt.Errorf("no date in filename: %s", base)
}
return time.ParseInLocation(defaultTargetRange, match[1], kst())
}
func yesterdayKST() time.Time {
now := time.Now().In(kst())
yesterday := now.AddDate(0, 0, -1)
return time.Date(yesterday.Year(), yesterday.Month(), yesterday.Day(), 0, 0, 0, 0, kst())
}
func kst() *time.Location {
loc, err := time.LoadLocation("Asia/Seoul")
if err != nil {
return time.FixedZone("KST", 9*60*60)
}
return loc
}
func env(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
}
return fallback
}
func envRequiredValue(key string) (string, error) {
v := os.Getenv(key)
if v == "" {
return "", fmt.Errorf("%s is required", key)
}
return v, nil
}

View File

@@ -0,0 +1,243 @@
package userprogram
import (
"context"
"database/sql"
"encoding/csv"
"fmt"
"os"
"path/filepath"
"strconv"
"time"
"github.com/go-sql-driver/mysql"
)
type Dumper struct {
cfg MySQLConfig
updateDir string
db *sql.DB
}
func NewDumper(cfg MySQLConfig, updateDir string) (*Dumper, error) {
if updateDir == "" {
updateDir = DefaultUpdateDir
}
if err := os.MkdirAll(updateDir, 0o755); err != nil {
return nil, err
}
dsn := (&mysql.Config{
User: cfg.User,
Passwd: cfg.Password,
Net: "tcp",
Addr: netAddr(cfg.Host, cfg.Port),
DBName: cfg.Database,
Params: map[string]string{"parseTime": "true", "loc": "UTC", "charset": "utf8mb4"},
AllowNativePasswords: true,
}).FormatDSN()
db, err := sql.Open("mysql", dsn)
if err != nil {
return nil, fmt.Errorf("open mysql: %w", err)
}
db.SetMaxOpenConns(5)
db.SetMaxIdleConns(2)
db.SetConnMaxIdleTime(5 * time.Minute)
if _, err := db.Exec("SET time_zone = '+00:00'"); err != nil {
_ = db.Close()
return nil, fmt.Errorf("set timezone: %w", err)
}
return &Dumper{
cfg: cfg,
updateDir: updateDir,
db: db,
}, nil
}
func (d *Dumper) Close() error {
if d.db == nil {
return nil
}
return d.db.Close()
}
// MaxIDUntil returns the maximum id with created_at up to and including cutoff (KST).
func (d *Dumper) MaxIDUntil(ctx context.Context, cutoff time.Time) (int64, error) {
query := fmt.Sprintf(`SELECT COALESCE(MAX(id), 0) FROM %s WHERE DATE(CONVERT_TZ(created_at, '+00:00', '+09:00')) <= ?`, d.cfg.Table)
var maxID sql.NullInt64
if err := d.db.QueryRowContext(ctx, query, cutoff.In(kst()).Format("2006-01-02")).Scan(&maxID); err != nil {
return 0, err
}
if !maxID.Valid {
return 0, nil
}
return maxID.Int64, nil
}
// CountUpToID returns count(*) where id <= maxID in source.
func (d *Dumper) CountUpToID(ctx context.Context, maxID int64) (int64, error) {
query := fmt.Sprintf(`SELECT COUNT(*) FROM %s WHERE id <= ?`, d.cfg.Table)
var count sql.NullInt64
if err := d.db.QueryRowContext(ctx, query, maxID).Scan(&count); err != nil {
return 0, err
}
if !count.Valid {
return 0, nil
}
return count.Int64, nil
}
// DumpRange exports rows with id in (startID, endID] to a CSV file.
func (d *Dumper) DumpRange(ctx context.Context, startID, endID int64, label time.Time) (string, error) {
if endID <= startID {
return "", nil
}
query := fmt.Sprintf(`
SELECT
id,
product_name,
login_id,
user_employee_id,
login_version,
login_public_ip,
login_local_ip,
user_company,
user_department,
user_position,
user_login_time,
created_at,
user_family_flag
FROM %s
WHERE id > ? AND id <= ?
ORDER BY id;`, d.cfg.Table)
rows, err := d.db.QueryContext(ctx, query, startID, endID)
if err != nil {
return "", fmt.Errorf("query: %w", err)
}
defer rows.Close()
filename := fmt.Sprintf("user_program_info_%s.csv", label.In(kst()).Format(defaultTargetRange))
outPath := filepath.Join(d.updateDir, filename)
tmpPath := outPath + ".tmp"
f, err := os.Create(tmpPath)
if err != nil {
return "", err
}
defer f.Close()
writer := csv.NewWriter(f)
defer writer.Flush()
header := []string{
"id",
"product_name",
"login_id",
"user_employee_id",
"login_version",
"login_public_ip",
"login_local_ip",
"user_company",
"user_department",
"user_position",
"user_login_time",
"created_at",
"user_family_flag",
}
if err := writer.Write(header); err != nil {
return "", err
}
for rows.Next() {
record, err := scanRow(rows)
if err != nil {
return "", err
}
if err := writer.Write(record); err != nil {
return "", err
}
}
if err := rows.Err(); err != nil {
return "", err
}
writer.Flush()
if err := writer.Error(); err != nil {
return "", err
}
if err := os.Rename(tmpPath, outPath); err != nil {
return "", err
}
return outPath, nil
}
func scanRow(rows *sql.Rows) ([]string, error) {
var (
id sql.NullInt64
productName sql.NullString
loginID sql.NullString
employeeID sql.NullString
loginVersion sql.NullString
loginPublicIP sql.NullString
loginLocalIP sql.NullString
userCompany sql.NullString
userDepartment sql.NullString
userPosition sql.NullString
userLoginTime sql.NullString
createdAt sql.NullString
userFamilyFlag sql.NullString
)
if err := rows.Scan(
&id,
&productName,
&loginID,
&employeeID,
&loginVersion,
&loginPublicIP,
&loginLocalIP,
&userCompany,
&userDepartment,
&userPosition,
&userLoginTime,
&createdAt,
&userFamilyFlag,
); err != nil {
return nil, err
}
if !id.Valid {
return nil, fmt.Errorf("row missing id")
}
return []string{
strconv.FormatInt(id.Int64, 10),
nullToString(productName),
nullToString(loginID),
nullToString(employeeID),
nullToString(loginVersion),
nullToString(loginPublicIP),
nullToString(loginLocalIP),
nullToString(userCompany),
nullToString(userDepartment),
nullToString(userPosition),
nullToString(userLoginTime),
nullToString(createdAt),
nullToString(userFamilyFlag),
}, nil
}
func nullToString(v sql.NullString) string {
if v.Valid {
return v.String
}
return ""
}
func netAddr(host string, port int) string {
return fmt.Sprintf("%s:%d", host, port)
}

View File

@@ -0,0 +1,118 @@
package userprogram
import (
"context"
"fmt"
"log"
"time"
"github.com/jackc/pgx/v5"
"geoip-rest/internal/importer"
)
type SyncConfig struct {
MySQL MySQLConfig
DatabaseURL string
InitialCSV string
UpdateDir string
LogDir string
Schema string
Logger *log.Logger
}
func (c *SyncConfig) defaults() {
if c.InitialCSV == "" {
c.InitialCSV = DefaultInitialCSV
}
if c.UpdateDir == "" {
c.UpdateDir = DefaultUpdateDir
}
if c.LogDir == "" {
c.LogDir = DefaultLogDir
}
if c.Schema == "" {
c.Schema = DefaultSchema
}
if c.Logger == nil {
c.Logger = log.Default()
}
}
// Sync ensures replica table exists and imports initial data, then dumps and imports
// updates using the primary key high-water mark up to yesterday (KST).
func Sync(ctx context.Context, cfg SyncConfig) error {
cfg.defaults()
dumper, err := NewDumper(cfg.MySQL, cfg.UpdateDir)
if err != nil {
return fmt.Errorf("init dumper: %w", err)
}
defer dumper.Close()
conn, err := pgx.Connect(ctx, cfg.DatabaseURL)
if err != nil {
return fmt.Errorf("connect postgres: %w", err)
}
defer conn.Close(context.Background())
if err := importer.EnsureUserProgramReplica(ctx, conn, cfg.InitialCSV, cfg.Schema, cfg.LogDir); err != nil {
return fmt.Errorf("ensure replica: %w", err)
}
lastID, err := importer.LatestID(ctx, conn, cfg.Schema, importer.ReplicaTable)
if err != nil {
return fmt.Errorf("read latest id: %w", err)
}
endDate := yesterdayKST()
upperID, err := dumper.MaxIDUntil(ctx, endDate)
if err != nil {
return fmt.Errorf("read upstream max id: %w", err)
}
if upperID <= lastID {
cfg.Logger.Printf("no dump needed (last_id=%d upstream_max=%d)", lastID, upperID)
return nil
}
cfg.Logger.Printf("dumping ids (%d, %d] to %s", lastID, upperID, cfg.UpdateDir)
csvPath, err := dumper.DumpRange(ctx, lastID, upperID, endDate)
if err != nil {
return fmt.Errorf("dump range: %w", err)
}
if csvPath == "" {
cfg.Logger.Printf("no rows dumped (last_id=%d upstream_max=%d)", lastID, upperID)
return nil
}
if err := importer.ImportUserProgramUpdates(ctx, conn, csvPath, cfg.Schema, cfg.LogDir); err != nil {
return fmt.Errorf("import updates: %w", err)
}
cfg.Logger.Printf("sync complete (last_id=%d -> %d)", lastID, upperID)
if err := verifyCounts(ctx, cfg, dumper, conn, upperID); err != nil {
cfg.Logger.Printf("sync verification warning: %v", err)
}
return nil
}
func toKST(t time.Time) time.Time {
return t.In(kst())
}
func verifyCounts(ctx context.Context, cfg SyncConfig, dumper *Dumper, conn *pgx.Conn, upperID int64) error {
sourceCount, err := dumper.CountUpToID(ctx, upperID)
if err != nil {
return fmt.Errorf("source count: %w", err)
}
targetCount, err := importer.CountUpToID(ctx, conn, cfg.Schema, importer.ReplicaTable, upperID)
if err != nil {
return fmt.Errorf("target count: %w", err)
}
if targetCount != sourceCount {
return fmt.Errorf("count mismatch up to id %d (source=%d target=%d)", upperID, sourceCount, targetCount)
}
return nil
}