feat: MySQL DB 정규화(Master/History) 및 시계열 데이터 수집 시스템 통합

1. 마스터/히스토리 테이블 분리 및 마이그레이션 완료\n2. 날짜별 데이터 축적 및 대시보드 필터링 기능 추가\n3. Playwright 수집 로직(날짜필터, 좌표클릭, 정밀합산) 완전 복구
This commit is contained in:
2026-03-10 16:24:13 +09:00
parent 743cce543b
commit 4a995c11f4
9 changed files with 268 additions and 89 deletions

Binary file not shown.

View File

@@ -96,27 +96,26 @@ def crawler_thread_worker(msg_queue, user_id, password):
await page.wait_for_selector("h4.list__contents_aria_group_body_list_item_label", timeout=60000)
await asyncio.sleep(3)
# [Phase 1] DB 기초 정보 동기화 (엄격한 매칭)
# [Phase 1] DB 기초 정보 동기화 (마스터 테이블)
if captured_data["project_list"]:
conn = get_db_connection()
try:
with conn.cursor() as cursor:
for p_info in captured_data["project_list"]:
p_nm = p_info.get("project_nm")
try:
sql = """
INSERT INTO overseas_projects (project_id, project_nm, short_nm, master, continent, country)
INSERT INTO projects_master (project_id, project_nm, short_nm, master, continent, country)
VALUES (%s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
project_id = VALUES(project_id), project_nm = VALUES(project_nm),
short_nm = VALUES(short_nm), master = VALUES(master),
continent = VALUES(continent), country = VALUES(country)
project_nm = VALUES(project_nm), short_nm = VALUES(short_nm),
master = VALUES(master), continent = VALUES(continent), country = VALUES(country)
"""
cursor.execute(sql, (p_info.get("project_id"), p_nm, p_info.get("short_nm", "").strip(),
p_info.get("master"), p_info.get("large_class"), p_info.get("mid_class")))
cursor.execute(sql, (p_info.get("project_id"), p_info.get("project_nm"),
p_info.get("short_nm", "").strip(), p_info.get("master"),
p_info.get("large_class"), p_info.get("mid_class")))
except: continue
conn.commit()
msg_queue.put(json.dumps({'type': 'log', 'message': f'DB 기초 정보 동기화 완료 ({len(captured_data["project_list"])}개)'}))
msg_queue.put(json.dumps({'type': 'log', 'message': f'DB 마스터 정보 동기화 완료.'}))
finally: conn.close()
# [Phase 2] h4 태그 기반 수집 루프
@@ -207,15 +206,21 @@ def crawler_thread_worker(msg_queue, user_id, password):
msg_queue.put(json.dumps({'type': 'log', 'message': f' - [구성] 데이터 채택 성공: ...{captured_data.get("_tree_url", "")[-40:]}'}))
msg_queue.put(json.dumps({'type': 'log', 'message': f' - [구성] 최종 정밀 합산 성공 ({file_count}개)'}))
# 4. DB 실시간 저장 (ID 기반)
# 4. DB 실시간 저장 (히스토리 테이블)
if current_p_id:
conn = get_db_connection()
try:
with conn.cursor() as cursor:
sql = "UPDATE overseas_projects SET recent_log = %s, file_count = %s WHERE project_id = %s"
cursor.execute(sql, (recent_log, file_count, current_p_id))
# 오늘 날짜 히스토리 데이터 삽입 또는 업데이트
sql = """
INSERT INTO projects_history (project_id, crawl_date, recent_log, file_count)
VALUES (%s, CURRENT_DATE(), %s, %s)
ON DUPLICATE KEY UPDATE
recent_log = VALUES(recent_log), file_count = VALUES(file_count)
"""
cursor.execute(sql, (current_p_id, recent_log, file_count))
conn.commit()
msg_queue.put(json.dumps({'type': 'log', 'message': f' - [DB] 업데이트 완료 (ID: {current_p_id})'}))
msg_queue.put(json.dumps({'type': 'log', 'message': f' - [DB] 히스토리 업데이트 완료 (ID: {current_p_id})'}))
finally: conn.close()
await page.goto("https://overseas.projectmastercloud.com/dashboard", wait_until="domcontentloaded")

View File

@@ -18,27 +18,60 @@ const continentOrder = {
async function init() {
const container = document.getElementById('projectAccordion');
const baseDateStrong = document.getElementById('baseDate');
if (!container) return;
// 서버에서 최신 sheet.csv 데이터 가져오기 (캐시 방지 위해 timestamp 추가)
// 1. 가용한 날짜 목록 가져오기 및 셀렉트 박스 생성
try {
const response = await fetch(`/project-data?t=${new Date().getTime()}`);
rawData = await response.json();
console.log("Loaded rawData:", rawData);
if (rawData.error) throw new Error(rawData.error);
const datesRes = await fetch('/available-dates');
const dates = await datesRes.json();
if (dates && dates.length > 0) {
let selectHtml = `<select id="dateSelector" onchange="loadDataByDate(this.value)" style="margin-left:10px; border:none; background:none; font-weight:700; cursor:pointer;">`;
dates.forEach(d => {
selectHtml += `<option value="${d}">${d}</option>`;
});
selectHtml += `</select>`;
// 기준날짜 텍스트 영역을 셀렉트 박스로 교체
const baseDateInfo = document.querySelector('.base-date-info');
if (baseDateInfo) {
baseDateInfo.innerHTML = `기준날짜: ${selectHtml}`;
}
}
} catch (e) {
console.error("날짜 목록 로드 실패:", e);
}
// 2. 기본 데이터 로드 (최신 날짜)
loadDataByDate();
}
async function loadDataByDate(selectedDate = "") {
const container = document.getElementById('projectAccordion');
try {
const url = selectedDate ? `/project-data?date=${selectedDate}` : `/project-data?t=${new Date().getTime()}`;
const response = await fetch(url);
const data = await response.json();
if (data.error) throw new Error(data.error);
rawData = data.projects || [];
renderDashboard(rawData);
} catch (e) {
console.error("데이터 로드 실패:", e);
alert("데이터를 가져오는 데 실패했습니다.");
return;
}
}
function renderDashboard(data) {
const container = document.getElementById('projectAccordion');
container.innerHTML = ''; // 초기화
const groupedData = {};
rawData.forEach((item, index) => {
const projectName = item[0];
// DB에서 넘어온 대륙과 국가 정보 사용 (item[5], item[6])
let continent = item[5] || "기타";
data.forEach((item, index) => {
let continent = item[5] || "기기타";
let country = item[6] || "미분류";
if (!groupedData[continent]) groupedData[continent] = {};

43
migrate_db_history.py Normal file
View File

@@ -0,0 +1,43 @@
import pymysql
import os
def get_db():
return pymysql.connect(
host='localhost', user='root', password='45278434',
database='crawling', charset='utf8mb4'
)
def migrate_to_timeseries():
conn = get_db()
try:
with conn.cursor() as cursor:
# 1. 기존 고유 제약 조건 제거 (project_id 중복 허용을 위함)
try:
cursor.execute("ALTER TABLE overseas_projects DROP INDEX project_id")
print(">>> 기존 project_id 고유 제약 제거")
except: pass
# 2. crawl_date 컬럼 추가 (날짜별 데이터 구분을 위함)
cursor.execute("DESCRIBE overseas_projects")
cols = [row[0] for row in cursor.fetchall()]
if 'crawl_date' not in cols:
cursor.execute("ALTER TABLE overseas_projects ADD COLUMN crawl_date DATE AFTER project_id")
print(">>> crawl_date 컬럼 추가")
# 3. 기존 데이터의 crawl_date를 오늘로 채움
cursor.execute("UPDATE overseas_projects SET crawl_date = DATE(updated_at) WHERE crawl_date IS NULL")
# 4. 새로운 복합 고유 제약 추가 (ID + 날짜 조합으로 중복 방지)
# 같은 날짜에 다시 크롤링하면 덮어쓰고, 날짜가 다르면 새로 생성됨
try:
cursor.execute("ALTER TABLE overseas_projects ADD UNIQUE INDEX idx_project_date (project_id, crawl_date)")
print(">>> 복합 고유 제약(project_id + crawl_date) 추가 완료")
except: pass
conn.commit()
print(">>> DB 시계열 마이그레이션 성공!")
finally:
conn.close()
if __name__ == "__main__":
migrate_to_timeseries()

67
migrate_normalized.py Normal file
View File

@@ -0,0 +1,67 @@
import pymysql
import os
def get_db():
return pymysql.connect(
host='localhost', user='root', password='45278434',
database='crawling', charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor
)
def migrate_to_normalized_tables():
conn = get_db()
try:
with conn.cursor() as cursor:
# 1. 마스터 테이블 생성 (고유 정보)
cursor.execute("""
CREATE TABLE IF NOT EXISTS projects_master (
project_id VARCHAR(100) PRIMARY KEY,
project_nm VARCHAR(255) NOT NULL,
short_nm VARCHAR(255),
department VARCHAR(255),
continent VARCHAR(100),
country VARCHAR(100),
master VARCHAR(100),
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
""")
# 2. 히스토리 테이블 생성 (일일 변동 정보)
cursor.execute("""
CREATE TABLE IF NOT EXISTS projects_history (
id INT AUTO_INCREMENT PRIMARY KEY,
project_id VARCHAR(100) NOT NULL,
crawl_date DATE NOT NULL,
recent_log VARCHAR(255),
file_count INT DEFAULT 0,
recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE KEY idx_proj_date (project_id, crawl_date),
FOREIGN KEY (project_id) REFERENCES projects_master(project_id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
""")
# 3. 기존 데이터 이전
# 3-1. 마스터 정보 이전
cursor.execute("""
INSERT IGNORE INTO projects_master (project_id, project_nm, short_nm, department, continent, country, master)
SELECT project_id, project_nm, short_nm, department, continent, country, master
FROM overseas_projects
""")
# 3-2. 히스토리 정보 이전
cursor.execute("""
INSERT IGNORE INTO projects_history (project_id, crawl_date, recent_log, file_count)
SELECT project_id, crawl_date, recent_log, file_count
FROM overseas_projects
""")
# 4. 기존 단일 테이블 삭제 (성공 후 삭제)
# cursor.execute("DROP TABLE IF EXISTS overseas_projects")
conn.commit()
print(">>> DB 정규화 마이그레이션 완료 (Master / History 분리)")
finally:
conn.close()
if __name__ == "__main__":
migrate_to_normalized_tables()

View File

@@ -54,24 +54,57 @@ def get_db_connection():
cursorclass=pymysql.cursors.DictCursor
)
@app.get("/project-data")
async def get_project_data():
@app.get("/available-dates")
async def get_available_dates():
"""
MySQL overseas_projects 테이블에서 프로젝트 현황 데이터를 반환
히스토리 테이블에서 유니크한 크롤링 날짜 목록을 반환
"""
try:
conn = get_db_connection()
try:
with conn.cursor() as cursor:
# 대시보드에 필요한 모든 정보를 쿼리 (short_nm 포함)
cursor.execute("SELECT project_nm, short_nm, department, master, recent_log, file_count, continent, country FROM overseas_projects ORDER BY id ASC")
cursor.execute("SELECT DISTINCT crawl_date FROM projects_history ORDER BY crawl_date DESC")
rows = cursor.fetchall()
dates = [row['crawl_date'].strftime("%Y.%m.%d") for row in rows if row['crawl_date']]
return dates
finally:
conn.close()
except Exception as e:
return {"error": str(e)}
@app.get("/project-data")
async def get_project_data(date: str = None):
"""
특정 날짜의 데이터를 JOIN하여 반환
"""
try:
conn = get_db_connection()
try:
with conn.cursor() as cursor:
if not date or date == "-":
cursor.execute("SELECT MAX(crawl_date) as last_date FROM projects_history")
target_date_row = cursor.fetchone()
target_date = target_date_row['last_date']
else:
target_date = date.replace(".", "-")
if not target_date:
return {"projects": [], "last_updated": "-"}
# 마스터 정보와 히스토리 정보를 JOIN
sql = """
SELECT m.project_nm, m.short_nm, m.department, m.master,
h.recent_log, h.file_count, m.continent, m.country
FROM projects_master m
JOIN projects_history h ON m.project_id = h.project_id
WHERE h.crawl_date = %s
ORDER BY m.project_id ASC
"""
cursor.execute(sql, (target_date,))
rows = cursor.fetchall()
# 프론트엔드 기대 형식에 맞춰 반환
# [표시될 프로젝트명(short_nm), 담당부서, 담당자, 최근활동로그, 파일수, 대륙, 국가]
projects = []
for row in rows:
# short_nm이 있으면 그것을 쓰고, 없으면 project_nm 사용
display_name = row['short_nm'] if row['short_nm'] and row['short_nm'].strip() else row['project_nm']
projects.append([
display_name,
@@ -82,11 +115,11 @@ async def get_project_data():
row['continent'],
row['country']
])
return projects
return {"projects": projects, "last_updated": target_date.strftime("%Y.%m.%d") if hasattr(target_date, 'strftime') else str(target_date).replace("-", ".")}
finally:
conn.close()
except Exception as e:
print(f"Error fetching from DB: {e}")
return {"error": str(e)}
@app.get("/")

View File

@@ -55,52 +55,45 @@
box-shadow: var(--box-shadow-lg);
}
.portal-card .icon {
font-size: 32px;
width: 64px;
height: 64px;
background: var(--bg-muted);
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
transition: 0.3s;
}
.portal-card:hover .icon {
background: var(--primary-color);
color: #fff;
}
.portal-card h2 {
font-size: 20px;
font-weight: 700;
}
.portal-card p {
color: var(--text-sub);
font-size: 14px;
line-height: 1.5;
}
@media screen and (max-width: 600px) {
.button-grid {
grid-template-columns: 1fr;
}
}
/* Dashboard List & Console */
header {
position: fixed;
top: 36px;
left: 0;
right: 0;
z-index: 1000;
background: #fff;
display: flex;
justify-content: space-between;
align-items: center;
padding: var(--space-md) var(--space-lg);
border-bottom: 1px solid var(--border-color);
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
}
.main-content {
margin-top: 100px;
padding: var(--space-lg);
max-width: 1400px;
margin-left: auto;
margin-right: auto;
}
.log-console {
position: sticky;
top: 100px;
z-index: 999;
background: #000;
color: #0f0;
font-family: monospace;
padding: 15px;
margin-bottom: 20px;
border-radius: 4px;
max-height: 200px;
max-height: 250px;
overflow-y: auto;
font-size: 12px;
line-height: 1.5;
box-shadow: 0 10px 20px rgba(0,0,0,0.2);
}
.log-console-header {
@@ -126,6 +119,10 @@
}
.accordion-list-header {
position: sticky;
top: 100px;
background: var(--bg-muted);
z-index: 10;
font-size: 11px;
font-weight: 700;
color: var(--text-sub);
@@ -186,7 +183,7 @@
font-weight: 700;
}
/* Accordion Multi-level (Continent/Country) */
/* Multi-level Groups */
.continent-group,
.country-group {
margin-bottom: 10px;
@@ -263,24 +260,7 @@
font-weight: 600;
}
/* Header & Sync Button */
header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: var(--space-lg);
padding-bottom: var(--space-md);
border-bottom: 1px solid var(--border-color);
}
.main-content {
margin-top: 36px;
padding: var(--space-lg);
max-width: 1400px;
margin-left: auto;
margin-right: auto;
}
/* Sync Button & Admin Info */
.sync-btn {
display: flex;
align-items: center;
@@ -291,6 +271,8 @@ header {
border-radius: var(--radius-lg);
font-size: 13px;
font-weight: 600;
cursor: pointer;
border: none;
box-shadow: var(--box-shadow);
}
@@ -316,3 +298,18 @@ header {
color: var(--primary-color);
font-weight: 700;
}
.base-date-info {
font-size: 13px;
color: var(--text-sub);
background: #f8f9fa;
padding: 6px 15px;
border-radius: 6px;
border: 1px solid var(--border-color);
}
.base-date-info strong {
color: #333;
font-weight: 700;
margin-left: 5px;
}

View File

@@ -31,8 +31,9 @@
<main class="main-content">
<header>
<h1>프로젝트 현황</h1>
<div class="header-actions" style="display: flex; align-items: center;">
<h2>프로젝트 현황</h2>
<div class="header-actions" style="display: flex; align-items: center; gap: 15px;">
<div class="base-date-info">기준날짜: <strong id="baseDate">-</strong></div>
<button id="syncBtn" class="sync-btn" onclick="syncData()">
<span class="spinner"></span>
데이터 동기화 (크롤링)