feat: 크롤러 부서 정보 수집 추가 및 대시보드 데이터 정확도 개선

- getData API 가로채기 기능을 통한 부서(department) 자동 수집 구현
- 파일 0개 기준의 "데이터 없음" 분류 로직 최적화 (LEFT JOIN 적용)
- 관리자 권한 인증 모달 스타일 복구 및 UI 정밀 조정
- 중복 등록 프로젝트(sm-25-032-phlinfra) DB 정리 및 테스트 파일 삭제
This commit is contained in:
2026-03-11 17:52:12 +09:00
parent 9f06857bea
commit 600c54c1f0
7 changed files with 178 additions and 81 deletions

View File

@@ -58,7 +58,7 @@ def crawler_thread_worker(msg_queue, user_id, password):
browser = None
try:
msg_queue.put(json.dumps({'type': 'log', 'message': '브라우저 엔진 가동 (전 기능 복구 모드)...'}))
browser = await p.chromium.launch(headless=False, args=[
browser = await p.chromium.launch(headless=True, args=[
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-blink-features=AutomationControlled"
@@ -68,7 +68,7 @@ def crawler_thread_worker(msg_queue, user_id, password):
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
captured_data = {"tree": None, "_is_root_archive": False, "project_list": []}
captured_data = {"tree": None, "_is_root_archive": False, "project_list": [], "last_project_data": None}
async def global_interceptor(response):
url = response.url
@@ -84,6 +84,8 @@ def crawler_thread_worker(msg_queue, user_id, password):
if is_root:
captured_data["tree"] = await response.json()
captured_data["_is_root_archive"] = True
elif "getData" in url and "overview" in url:
captured_data["last_project_data"] = await response.json()
except: pass
context.on("response", global_interceptor)
@@ -144,6 +146,32 @@ def crawler_thread_worker(msg_queue, user_id, password):
else: await target_el.click(force=True)
await page.wait_for_selector("text=활동로그", timeout=30000)
# [부서 정보 수집] getData 응답 대기 및 DB 업데이트
for _ in range(10):
if captured_data.get("last_project_data"): break
await asyncio.sleep(0.5)
last_data = captured_data.get("last_project_data")
if last_data:
if isinstance(last_data, list) and len(last_data) > 0:
last_data = last_data[0]
if isinstance(last_data, dict):
proj_data = last_data.get("data", {})
if isinstance(proj_data, list) and len(proj_data) > 0:
proj_data = proj_data[0]
if isinstance(proj_data, dict):
dept = proj_data.get("department")
p_id = proj_data.get("project_id")
if dept and p_id:
with get_db_connection() as conn:
with conn.cursor() as cursor:
cursor.execute("UPDATE projects_master SET department = %s WHERE project_id = %s", (dept, p_id))
conn.commit()
captured_data["last_project_data"] = None # 초기화
await asyncio.sleep(2)
recent_log = "데이터 없음"; file_count = 0
@@ -183,12 +211,18 @@ def crawler_thread_worker(msg_queue, user_id, password):
await asyncio.sleep(0.5)
if captured_data["tree"]:
tree = captured_data["tree"].get('currentTreeObject', captured_data["tree"])
total = len(tree.get("file", {}))
folders = tree.get("folder", {})
if isinstance(folders, dict):
for f in folders.values(): total += int(f.get("filesCount", 0))
file_count = total
tree_data = captured_data["tree"]
if isinstance(tree_data, list) and len(tree_data) > 0:
tree_data = tree_data[0]
if isinstance(tree_data, dict):
tree = tree_data.get('currentTreeObject', tree_data)
if isinstance(tree, dict):
total = len(tree.get("file", {}))
folders = tree.get("folder", {})
if isinstance(folders, dict):
for f in folders.values(): total += int(f.get("filesCount", 0))
file_count = total
# 4. DB 실시간 저장
if current_p_id: