import os import re import asyncio import json from playwright.async_api import async_playwright from dotenv import load_dotenv load_dotenv() async def run_crawler_service(): """ Playwright를 이용해 데이터를 수집하고 SSE(Server-Sent Events)용 제너레이터를 반환합니다. """ user_id = os.getenv("PM_USER_ID") password = os.getenv("PM_PASSWORD") if not user_id or not password: yield f"data: {json.dumps({'type': 'log', 'message': '오류: .env 파일에 계정 정보가 없습니다.'})}\n\n" return results = [] async with async_playwright() as p: yield f"data: {json.dumps({'type': 'log', 'message': '브라우저 실행 중...'})}\n\n" browser = await p.chromium.launch(headless=True, args=[ "--no-sandbox", "--disable-dev-shm-usage", "--disable-blink-features=AutomationControlled" ]) context = await browser.new_context( viewport={'width': 1920, 'height': 1080}, user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" ) page = await context.new_page() try: yield f"data: {json.dumps({'type': 'log', 'message': '사이트 접속 및 로그인 중...'})}\n\n" await page.goto("https://overseas.projectmastercloud.com/", wait_until="domcontentloaded") await page.click("#login-by-id", timeout=10000) await page.fill("#user_id", user_id) await page.fill("#user_pw", password) await page.click("#login-btn") yield f"data: {json.dumps({'type': 'log', 'message': '대시보드 목록 대기 중...'})}\n\n" await page.wait_for_selector("h4.list__contents_aria_group_body_list_item_label", timeout=60000) locators = page.locator("h4.list__contents_aria_group_body_list_item_label") count = await locators.count() yield f"data: {json.dumps({'type': 'log', 'message': f'총 {count}개의 프로젝트 발견. 수집 시작.'})}\n\n" for i in range(count): try: proj = page.locator("h4.list__contents_aria_group_body_list_item_label").nth(i) project_name = (await proj.inner_text()).strip() yield f"data: {json.dumps({'type': 'log', 'message': f'[{i+1}/{count}] {project_name} - 시작'})}\n\n" await proj.scroll_into_view_if_needed() await proj.click(force=True) await asyncio.sleep(5) await page.wait_for_selector("div.footer", state="visible", timeout=20000) recent_log = "기존데이터유지" file_count = 0 # 로그 수집 try: log_btn_sel = "body > div.footer > div.left > div.wrap.log-wrap > div.title.text" log_btn = page.locator(log_btn_sel).first if await log_btn.is_visible(timeout=5000): await log_btn.click(force=True) await asyncio.sleep(5) date_sel = "article.archive-modal .log-body .date .text" user_sel = "article.archive-modal .log-body .user .text" act_sel = "article.archive-modal .log-body .activity .text" if await page.locator(date_sel).count() > 0: raw_date = (await page.locator(date_sel).first.inner_text()).strip() user_name = (await page.locator(user_sel).first.inner_text()).strip() activity = (await page.locator(act_sel).first.inner_text()).strip() formatted_date = re.sub(r'[-/]', '.', raw_date)[:10] recent_log = f"{formatted_date}, {user_name}, {activity}" yield f"data: {json.dumps({'type': 'log', 'message': f' - [로그] 수집 완료'})}\n\n" await page.click("article.archive-modal div.close", timeout=3000) await asyncio.sleep(1.5) except: pass # 구성 수집 try: sitemap_btn_sel = "body > div.footer > div.left > div.wrap.site-map-wrap" sitemap_btn = page.locator(sitemap_btn_sel).first if await sitemap_btn.is_visible(timeout=5000): await sitemap_btn.click(force=True) popup_page = None for _ in range(20): for p_item in context.pages: if "composition" in p_item.url: popup_page = p_item break if popup_page: break await asyncio.sleep(0.5) if popup_page: target_selector = "#composition-list h6:nth-child(3)" await asyncio.sleep(5) # 로딩 대기 locators_h6 = popup_page.locator(target_selector) h6_count = await locators_h6.count() current_total = 0 for j in range(h6_count): text = (await locators_h6.nth(j).inner_text()).strip() nums = re.findall(r'\d+', text.split('\n')[-1]) if nums: current_total += int(nums[0]) file_count = current_total yield f"data: {json.dumps({'type': 'log', 'message': f' - [구성] {file_count}개 확인'})}\n\n" await popup_page.close() except: pass results.append({"projectName": project_name, "recentLog": recent_log, "fileCount": file_count}) # 홈 복귀 await page.locator("div.header div.title div").first.click(force=True) await page.wait_for_selector("h4.list__contents_aria_group_body_list_item_label", timeout=20000) await asyncio.sleep(2) except Exception: await page.goto("https://overseas.projectmastercloud.com/dashboard", wait_until="domcontentloaded") yield f"data: {json.dumps({'type': 'done', 'data': results})}\n\n" except Exception as e: yield f"data: {json.dumps({'type': 'log', 'message': f'치명적 오류: {str(e)}'})}\n\n" finally: await browser.close()