feat: MySQL DB 정규화(Master/History) 및 시계열 데이터 수집 시스템 통합

1. 마스터/히스토리 테이블 분리 및 마이그레이션 완료\n2. 날짜별 데이터 축적 및 대시보드 필터링 기능 추가\n3. Playwright 수집 로직(날짜필터, 좌표클릭, 정밀합산) 완전 복구
This commit is contained in:
2026-03-10 16:24:13 +09:00
parent 743cce543b
commit 4a995c11f4
9 changed files with 268 additions and 89 deletions

View File

@@ -96,27 +96,26 @@ def crawler_thread_worker(msg_queue, user_id, password):
await page.wait_for_selector("h4.list__contents_aria_group_body_list_item_label", timeout=60000)
await asyncio.sleep(3)
# [Phase 1] DB 기초 정보 동기화 (엄격한 매칭)
# [Phase 1] DB 기초 정보 동기화 (마스터 테이블)
if captured_data["project_list"]:
conn = get_db_connection()
try:
with conn.cursor() as cursor:
for p_info in captured_data["project_list"]:
p_nm = p_info.get("project_nm")
try:
sql = """
INSERT INTO overseas_projects (project_id, project_nm, short_nm, master, continent, country)
INSERT INTO projects_master (project_id, project_nm, short_nm, master, continent, country)
VALUES (%s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
project_id = VALUES(project_id), project_nm = VALUES(project_nm),
short_nm = VALUES(short_nm), master = VALUES(master),
continent = VALUES(continent), country = VALUES(country)
project_nm = VALUES(project_nm), short_nm = VALUES(short_nm),
master = VALUES(master), continent = VALUES(continent), country = VALUES(country)
"""
cursor.execute(sql, (p_info.get("project_id"), p_nm, p_info.get("short_nm", "").strip(),
p_info.get("master"), p_info.get("large_class"), p_info.get("mid_class")))
cursor.execute(sql, (p_info.get("project_id"), p_info.get("project_nm"),
p_info.get("short_nm", "").strip(), p_info.get("master"),
p_info.get("large_class"), p_info.get("mid_class")))
except: continue
conn.commit()
msg_queue.put(json.dumps({'type': 'log', 'message': f'DB 기초 정보 동기화 완료 ({len(captured_data["project_list"])}개)'}))
msg_queue.put(json.dumps({'type': 'log', 'message': f'DB 마스터 정보 동기화 완료.'}))
finally: conn.close()
# [Phase 2] h4 태그 기반 수집 루프
@@ -207,15 +206,21 @@ def crawler_thread_worker(msg_queue, user_id, password):
msg_queue.put(json.dumps({'type': 'log', 'message': f' - [구성] 데이터 채택 성공: ...{captured_data.get("_tree_url", "")[-40:]}'}))
msg_queue.put(json.dumps({'type': 'log', 'message': f' - [구성] 최종 정밀 합산 성공 ({file_count}개)'}))
# 4. DB 실시간 저장 (ID 기반)
# 4. DB 실시간 저장 (히스토리 테이블)
if current_p_id:
conn = get_db_connection()
try:
with conn.cursor() as cursor:
sql = "UPDATE overseas_projects SET recent_log = %s, file_count = %s WHERE project_id = %s"
cursor.execute(sql, (recent_log, file_count, current_p_id))
# 오늘 날짜 히스토리 데이터 삽입 또는 업데이트
sql = """
INSERT INTO projects_history (project_id, crawl_date, recent_log, file_count)
VALUES (%s, CURRENT_DATE(), %s, %s)
ON DUPLICATE KEY UPDATE
recent_log = VALUES(recent_log), file_count = VALUES(file_count)
"""
cursor.execute(sql, (current_p_id, recent_log, file_count))
conn.commit()
msg_queue.put(json.dumps({'type': 'log', 'message': f' - [DB] 업데이트 완료 (ID: {current_p_id})'}))
msg_queue.put(json.dumps({'type': 'log', 'message': f' - [DB] 히스토리 업데이트 완료 (ID: {current_p_id})'}))
finally: conn.close()
await page.goto("https://overseas.projectmastercloud.com/dashboard", wait_until="domcontentloaded")