1
0
forked from baron/baron-sso
Files
baron-sso/backend/cmd/server/health_monitor.go
2026-01-28 20:07:52 +09:00

133 lines
2.5 KiB
Go

package main
import (
"context"
"fmt"
"log/slog"
"net/http"
"sync"
"time"
)
type HTTPProbe struct {
name string
url string
interval time.Duration
timeout time.Duration
client *http.Client
mu sync.RWMutex
status string
lastError string
lastChecked time.Time
lastSuccess time.Time
}
type ProbeSnapshot struct {
Status string
Error string
LastChecked time.Time
LastSuccess time.Time
}
func NewHTTPProbe(name, url string, interval, timeout time.Duration) *HTTPProbe {
if interval <= 0 {
interval = 10 * time.Second
}
if timeout <= 0 {
timeout = 2 * time.Second
}
return &HTTPProbe{
name: name,
url: url,
interval: interval,
timeout: timeout,
client: &http.Client{
Timeout: timeout,
},
}
}
// Start는 프로브를 백그라운드에서 주기적으로 실행합니다.
func (p *HTTPProbe) Start() {
go func() {
p.checkOnce()
ticker := time.NewTicker(p.interval)
defer ticker.Stop()
for range ticker.C {
p.checkOnce()
}
}()
}
func (p *HTTPProbe) Snapshot() ProbeSnapshot {
p.mu.RLock()
defer p.mu.RUnlock()
return ProbeSnapshot{
Status: p.status,
Error: p.lastError,
LastChecked: p.lastChecked,
LastSuccess: p.lastSuccess,
}
}
func (p *HTTPProbe) StatusText() string {
s := p.Snapshot()
if s.Status == "ok" {
return "ok"
}
if s.Status == "" {
return "unknown"
}
if s.Error == "" {
return "error"
}
return "error: " + s.Error
}
func (p *HTTPProbe) checkOnce() {
ctx, cancel := context.WithTimeout(context.Background(), p.timeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, p.url, nil)
if err != nil {
p.update("error", fmt.Sprintf("request build failed: %v", err), false)
return
}
resp, err := p.client.Do(req)
if err != nil {
p.update("error", err.Error(), false)
return
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
p.update("error", fmt.Sprintf("status=%d", resp.StatusCode), false)
return
}
p.update("ok", "", true)
}
func (p *HTTPProbe) update(status, errMsg string, success bool) {
p.mu.Lock()
prevStatus := p.status
p.status = status
p.lastError = errMsg
p.lastChecked = time.Now()
if success {
p.lastSuccess = p.lastChecked
}
p.mu.Unlock()
if prevStatus == status {
return
}
if status == "ok" {
slog.Info("Service probe recovered", "name", p.name, "url", p.url)
return
}
slog.Error("Service probe failed", "name", p.name, "url", p.url, "error", errMsg)
}