test-mcp/server.py

import os
import sys

# 한글 환경 및 Tesseract 경로 강제 설정
os.environ["PYTHONIOENCODING"] = "utf-8"
os.environ["TESSDATA_PREFIX"] = r"C:\Users\User\AppData\Local\Programs\Tesseract-OCR\tessdata"

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from analyze import analyze_file_content
from crawler_service import run_crawler_service
import asyncio
from fastapi import Request

app = FastAPI(title="Project Master Overseas API")
templates = Jinja2Templates(directory="templates")

# --- 유틸리티: 동기 함수를 스레드 풀에서 실행 ---
async def run_in_threadpool(func, *args):
    loop = asyncio.get_event_loop()
    return await loop.run_in_executor(None, func, *args)

# 정적 파일 및 미들웨어 설정
app.mount("/style", StaticFiles(directory="style"), name="style")
app.mount("/js", StaticFiles(directory="js"), name="js")
app.mount("/sample_files", StaticFiles(directory="sample"), name="sample_files")

@app.get("/sample.png")
async def get_sample_img():
    return FileResponse("sample.png")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=False,
    allow_methods=["*"],
    allow_headers=["*"],
)


# --- HTML 라우팅 ---
import pymysql

def get_db_connection():
    return pymysql.connect(
        host='localhost',
        user='root',
        password='45278434',
        database='crawling',
        charset='utf8mb4',
        cursorclass=pymysql.cursors.DictCursor
    )

@app.get("/available-dates")
async def get_available_dates():
    """
    히스토리 테이블에서 유니크한 크롤링 날짜 목록을 반환
    """
    try:
        conn = get_db_connection()
        try:
            with conn.cursor() as cursor:
                cursor.execute("SELECT DISTINCT crawl_date FROM projects_history ORDER BY crawl_date DESC")
                rows = cursor.fetchall()
                dates = [row['crawl_date'].strftime("%Y.%m.%d") for row in rows if row['crawl_date']]
                return dates
        finally:
            conn.close()
    except Exception as e:
        return {"error": str(e)}

@app.get("/project-data")
async def get_project_data(date: str = None):
    """
    특정 날짜의 데이터를 JOIN하여 반환
    """
    try:
        conn = get_db_connection()
        try:
            with conn.cursor() as cursor:
                if not date or date == "-":
                    cursor.execute("SELECT MAX(crawl_date) as last_date FROM projects_history")
                    target_date_row = cursor.fetchone()
                    target_date = target_date_row['last_date']
                else:
                    target_date = date.replace(".", "-")

                if not target_date:
                    return {"projects": [], "last_updated": "-"}

                # 마스터 정보와 히스토리 정보를 JOIN
                sql = """
                    SELECT m.project_nm, m.short_nm, m.department, m.master,
                           h.recent_log, h.file_count, m.continent, m.country
                    FROM projects_master m
                    JOIN projects_history h ON m.project_id = h.project_id
                    WHERE h.crawl_date = %s
                    ORDER BY m.project_id ASC
                """
                cursor.execute(sql, (target_date,))
                rows = cursor.fetchall()

                projects = []
                for row in rows:
                    display_name = row['short_nm'] if row['short_nm'] and row['short_nm'].strip() else row['project_nm']
                    projects.append([
                        display_name,
                        row['department'],
                        row['master'],
                        row['recent_log'],
                        row['file_count'],
                        row['continent'],
                        row['country']
                    ])

                return {"projects": projects, "last_updated": target_date.strftime("%Y.%m.%d") if hasattr(target_date, 'strftime') else str(target_date).replace("-", ".")}
        finally:
            conn.close()
    except Exception as e:
        return {"error": str(e)}

@app.get("/")
async def root(request: Request):
    return templates.TemplateResponse("index.html", {"request": request})

@app.get("/dashboard")
async def get_dashboard(request: Request):
    return templates.TemplateResponse("dashboard.html", {"request": request})

@app.get("/mailTest")
@app.get("/mailTest.html")
async def get_mail_test(request: Request):
    return templates.TemplateResponse("mailTest.html", {"request": request})

# --- 데이터 API ---
@app.get("/attachments")
async def get_attachments():
    sample_path = "sample"
    if not os.path.exists(sample_path):
        os.makedirs(sample_path)
    files = []
    for f in os.listdir(sample_path):
        f_path = os.path.join(sample_path, f)
        if os.path.isfile(f_path):
            files.append({
                "name": f,
                "size": f"{os.path.getsize(f_path) / 1024:.1f} KB"
            })
    return files

@app.get("/analyze-file")
async def analyze_file(filename: str):
    """
    분석 서비스(analyze.py) 호출 - 스레드 풀에서 비차단 방식으로 실행
    """
    return await run_in_threadpool(analyze_file_content, filename)

@app.get("/sync")
async def sync_data():
    """
    크롤링 서비스(crawler_service.py) 호출
    """
    print(">>> /sync request received")
    return StreamingResponse(run_crawler_service(), media_type="text_event-stream")