feat: 프로젝트 활성도 분석 시스템 및 크롤링 인증/중단 기능 구현 - DB 연결 최적화, 활성도 위젯 및 내비게이션, 관리자 인증 모달, 중단 기능, UI 레이아웃 최적화, 코드 리팩토링 및 파일 정리

2026-03-11 14:03:26 +09:00
parent 4a995c11f4
commit 9f06857bea
26 changed files with 587 additions and 1323 deletions
--- a/server.py
+++ b/server.py
@@ -1,37 +1,33 @@
 import os
 import sys
-
-# 한글 환경 및 Tesseract 경로 강제 설정
-os.environ["PYTHONIOENCODING"] = "utf-8"
-os.environ["TESSDATA_PREFIX"] = r"C:\Users\User\AppData\Local\Programs\Tesseract-OCR\tessdata"
-
-from fastapi import FastAPI
+import re
+import asyncio
+import pymysql
+from datetime import datetime
+from pydantic import BaseModel
+from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, FileResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
+
 from analyze import analyze_file_content
-from crawler_service import run_crawler_service
-import asyncio
-from fastapi import Request
+from crawler_service import run_crawler_service, crawl_stop_event
+
+# --- 환경 설정 ---
+os.environ["PYTHONIOENCODING"] = "utf-8"
+# Tesseract 경로는 환경에 따라 다를 수 있으므로 환경변수 우선 사용 권장
+TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX", r"C:\Users\User\AppData\Local\Programs\Tesseract-OCR\tessdata")
+os.environ["TESSDATA_PREFIX"] = TESSDATA_PREFIX

 app = FastAPI(title="Project Master Overseas API")
 templates = Jinja2Templates(directory="templates")

-# --- 유틸리티: 동기 함수를 스레드 풀에서 실행 ---
-async def run_in_threadpool(func, *args):
-    loop = asyncio.get_event_loop()
-    return await loop.run_in_executor(None, func, *args)
-
-# 정적 파일 및 미들웨어 설정
+# 정적 파일 마운트
 app.mount("/style", StaticFiles(directory="style"), name="style")
 app.mount("/js", StaticFiles(directory="js"), name="js")
 app.mount("/sample_files", StaticFiles(directory="sample"), name="sample_files")

-@app.get("/sample.png")
-async def get_sample_img():
-    return FileResponse("sample.png")
-
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
@@ -40,88 +36,29 @@ app.add_middleware(
    allow_headers=["*"],
 )

+# --- 데이터 모델 ---
+class AuthRequest(BaseModel):
+    user_id: str
+    password: str

-# --- HTML 라우팅 ---
-import pymysql
-
+# --- 유틸리티 함수 ---
 def get_db_connection():
+    """MySQL 데이터베이스 연결을 반환 (환경변수 기반)"""
    return pymysql.connect(
-        host='localhost',
-        user='root',
-        password='45278434',
-        database='crawling',
+        host=os.getenv('DB_HOST', 'localhost'),
+        user=os.getenv('DB_USER', 'root'),
+        password=os.getenv('DB_PASSWORD', '45278434'),
+        database=os.getenv('DB_NAME', 'PM_proto'),
        charset='utf8mb4',
        cursorclass=pymysql.cursors.DictCursor
    )

-@app.get("/available-dates")
-async def get_available_dates():
-    """
-    히스토리 테이블에서 유니크한 크롤링 날짜 목록을 반환
-    """
-    try:
-        conn = get_db_connection()
-        try:
-            with conn.cursor() as cursor:
-                cursor.execute("SELECT DISTINCT crawl_date FROM projects_history ORDER BY crawl_date DESC")
-                rows = cursor.fetchall()
-                dates = [row['crawl_date'].strftime("%Y.%m.%d") for row in rows if row['crawl_date']]
-                return dates
-        finally:
-            conn.close()
-    except Exception as e:
-        return {"error": str(e)}
-
-@app.get("/project-data")
-async def get_project_data(date: str = None):
-    """
-    특정 날짜의 데이터를 JOIN하여 반환
-    """
-    try:
-        conn = get_db_connection()
-        try:
-            with conn.cursor() as cursor:
-                if not date or date == "-":
-                    cursor.execute("SELECT MAX(crawl_date) as last_date FROM projects_history")
-                    target_date_row = cursor.fetchone()
-                    target_date = target_date_row['last_date']
-                else:
-                    target_date = date.replace(".", "-")
-
-                if not target_date:
-                    return {"projects": [], "last_updated": "-"}
-
-                # 마스터 정보와 히스토리 정보를 JOIN
-                sql = """
-                    SELECT m.project_nm, m.short_nm, m.department, m.master, 
-                           h.recent_log, h.file_count, m.continent, m.country 
-                    FROM projects_master m
-                    JOIN projects_history h ON m.project_id = h.project_id
-                    WHERE h.crawl_date = %s
-                    ORDER BY m.project_id ASC
-                """
-                cursor.execute(sql, (target_date,))
-                rows = cursor.fetchall()
-                
-                projects = []
-                for row in rows:
-                    display_name = row['short_nm'] if row['short_nm'] and row['short_nm'].strip() else row['project_nm']
-                    projects.append([
-                        display_name,
-                        row['department'],
-                        row['master'],
-                        row['recent_log'],
-                        row['file_count'],
-                        row['continent'],
-                        row['country']
-                    ])
-                
-                return {"projects": projects, "last_updated": target_date.strftime("%Y.%m.%d") if hasattr(target_date, 'strftime') else str(target_date).replace("-", ".")}
-        finally:
-            conn.close()
-    except Exception as e:
-        return {"error": str(e)}
+async def run_in_threadpool(func, *args):
+    """동기 함수를 비차단 방식으로 실행"""
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(None, func, *args)

+# --- HTML 라우팅 ---
@app.get("/")
 async def root(request: Request):
    return templates.TemplateResponse("index.html", {"request": request})
@@ -131,37 +68,119 @@ async def get_dashboard(request: Request):
    return templates.TemplateResponse("dashboard.html", {"request": request})

@app.get("/mailTest")
-@app.get("/mailTest.html")
 async def get_mail_test(request: Request):
    return templates.TemplateResponse("mailTest.html", {"request": request})

-# --- 데이터 API ---
-@app.get("/attachments")
-async def get_attachments():
-    sample_path = "sample"
-    if not os.path.exists(sample_path):
-        os.makedirs(sample_path)
-    files = []
-    for f in os.listdir(sample_path):
-        f_path = os.path.join(sample_path, f)
-        if os.path.isfile(f_path):
-            files.append({
-                "name": f,
-                "size": f"{os.path.getsize(f_path) / 1024:.1f} KB"
-            })
-    return files
+# --- 분석 및 수집 API ---
+@app.get("/available-dates")
+async def get_available_dates():
+    """히스토리 날짜 목록 반환"""
+    try:
+        with get_db_connection() as conn:
+            with conn.cursor() as cursor:
+                cursor.execute("SELECT DISTINCT crawl_date FROM projects_history ORDER BY crawl_date DESC")
+                rows = cursor.fetchall()
+                return [row['crawl_date'].strftime("%Y.%m.%d") for row in rows if row['crawl_date']]
+    except Exception as e:
+        return {"error": str(e)}

-@app.get("/analyze-file")
-async def analyze_file(filename: str):
-    """
-    분석 서비스(analyze.py) 호출 - 스레드 풀에서 비차단 방식으로 실행
-    """
-    return await run_in_threadpool(analyze_file_content, filename)
+@app.get("/project-data")
+async def get_project_data(date: str = None):
+    """특정 날짜의 프로젝트 정보 JOIN 반환"""
+    try:
+        target_date = date.replace(".", "-") if date and date != "-" else None
+        with get_db_connection() as conn:
+            with conn.cursor() as cursor:
+                if not target_date:
+                    cursor.execute("SELECT MAX(crawl_date) as last_date FROM projects_history")
+                    res = cursor.fetchone()
+                    target_date = res['last_date']
+                
+                if not target_date: return {"projects": []}
+
+                sql = """
+                    SELECT m.project_nm, m.short_nm, m.department, m.master, 
+                           h.recent_log, h.file_count, m.continent, m.country 
+                    FROM projects_master m
+                    JOIN projects_history h ON m.project_id = h.project_id
+                    WHERE h.crawl_date = %s ORDER BY m.project_id ASC
+                """
+                cursor.execute(sql, (target_date,))
+                rows = cursor.fetchall()
+                
+                projects = []
+                for r in rows:
+                    name = r['short_nm'] if r['short_nm'] and r['short_nm'].strip() else r['project_nm']
+                    projects.append([name, r['department'], r['master'], r['recent_log'], r['file_count'], r['continent'], r['country']])
+                return {"projects": projects}
+    except Exception as e:
+        return {"error": str(e)}
+
+@app.get("/project-activity")
+async def get_project_activity(date: str = None):
+    """활성도 분석 API"""
+    try:
+        with get_db_connection() as conn:
+            with conn.cursor() as cursor:
+                if not date or date == "-":
+                    cursor.execute("SELECT MAX(crawl_date) as last_date FROM projects_history")
+                    res = cursor.fetchone()
+                    target_date_val = res['last_date'] if res['last_date'] else datetime.now().date()
+                else:
+                    target_date_val = datetime.strptime(date.replace(".", "-"), "%Y-%m-%d").date()
+
+                target_date_dt = datetime.combine(target_date_val, datetime.min.time())
+                sql = """
+                    SELECT m.project_id, m.project_nm, m.short_nm, h.recent_log, h.file_count
+                    FROM projects_master m
+                    LEFT JOIN projects_history h ON m.project_id = h.project_id AND h.crawl_date = %s
+                """
+                cursor.execute(sql, (target_date_val,))
+                rows = cursor.fetchall()
+                
+                analysis = {"summary": {"active": 0, "warning": 0, "stale": 0, "unknown": 0}, "details": []}
+                for r in rows:
+                    log, files = r['recent_log'], r['file_count']
+                    status, days = "unknown", 999
+                    if log and log != "데이터 없음" and files and files > 0:
+                        match = re.search(r'(\d{4})\.(\d{2})\.(\d{2})', log)
+                        if match:
+                            diff = (target_date_dt - datetime.strptime(match.group(0), "%Y.%m.%d")).days
+                            status = "active" if diff <= 7 else "warning" if diff <= 14 else "stale"
+                            days = diff
+                    analysis["summary"][status] += 1
+                    analysis["details"].append({"name": r['short_nm'] or r['project_nm'], "status": status, "days_ago": days})
+                return analysis
+    except Exception as e:
+        return {"error": str(e)}
+
+@app.post("/auth/crawl")
+async def auth_crawl(req: AuthRequest):
+    """크롤링 인증"""
+    if req.user_id == os.getenv("PM_USER_ID") and req.password == os.getenv("PM_PASSWORD"):
+        return {"success": True}
+    return {"success": False, "message": "크롤링을 할 수 없습니다."}

@app.get("/sync")
 async def sync_data():
-    """
-    크롤링 서비스(crawler_service.py) 호출
-    """
-    print(">>> /sync request received")
    return StreamingResponse(run_crawler_service(), media_type="text_event-stream")
+
+@app.get("/stop-sync")
+async def stop_sync():
+    crawl_stop_event.set()
+    return {"success": True}
+
+@app.get("/attachments")
+async def get_attachments():
+    path = "sample"
+    if not os.path.exists(path): os.makedirs(path)
+    return [{"name": f, "size": f"{os.path.getsize(os.path.join(path, f))/1024:.1f} KB"} 
+            for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
+
+@app.get("/analyze-file")
+async def analyze_file(filename: str):
+    return await run_in_threadpool(analyze_file_content, filename)
+
+@app.get("/sample.png")
+async def get_sample_img():
+    return FileResponse("sample.png")