From ff9146cfeed48b66bb1fb9daf7af528b642196f5 Mon Sep 17 00:00:00 2001 From: Taehoon Date: Fri, 27 Feb 2026 17:52:34 +0900 Subject: [PATCH] =?UTF-8?q?=EB=A9=94=EC=9D=BC=20=EB=B6=84=EC=84=9D?= =?UTF-8?q?=EC=8B=9C=EC=8A=A4=ED=85=9C=20=EB=B3=B4=EC=99=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 40 +++ __pycache__/analyze.cpython-312.pyc | Bin 5268 -> 13194 bytes __pycache__/crawler_api.cpython-312.pyc | Bin 15406 -> 3161 bytes __pycache__/crawler_service.cpython-312.pyc | Bin 0 -> 9977 bytes __pycache__/server.cpython-312.pyc | Bin 0 -> 3452 bytes analyze.py | 234 ++++++++++----- crawler_api.py | 235 --------------- crawler_service.py | 137 +++++++++ mailTest.html | 275 ++++++++++++++++-- .../10.교량배수시설 시공계획서 제출의 건.pdf | Bin 1350106 -> 0 bytes ...험계획서(변경) 승인 요청[어천-공주(4차)].pdf | Bin 99797 -> 0 bytes ...변경계약(토공사 및 철근콘크리트공사) 제출.pdf | Bin 36500735 -> 0 bytes ...이탈계(안전관리자) 승인[어천~공주(4차)].pdf | Bin 35756 -> 0 bytes ...약 통보(발파공사)에 따른 검토보고[어천-공주(4차)].pdf | Bin 0 -> 278137 bytes ...트공사2) 타절정산 통보에 따른 검토보고[어천-공주(4차)].pdf | Bin 0 -> 213885 bytes ....현장직원(안전관리자) 이탈계 제출의 건.pdf | Bin 0 -> 57151 bytes ...공사) 타절정산 통보에 따른 검토보고[어천-공주(4차)].pdf | Bin 0 -> 206715 bytes sample/23.하도급변경계약(철거공사) 제출.pdf | Bin 0 -> 1154697 bytes ...트공사1) 타절정산 통보에 따른 검토보고[어천-공주(4차)].pdf | Bin 0 -> 207762 bytes ...사(설계,시공,감리) 추진관련 업무이행 철저.pdf | Bin 0 -> 43645 bytes ...사2) 타절정산 통보에 따른 검토보고[어천-공주(4차)].pdf | Bin 0 -> 209008 bytes ...휴 대비 도로시설물 안전점검 및 정비 철저.pdf | Bin 0 -> 33101 bytes ...고[품질관리비 변경반영, 어천-공주(4차)].pdf | Bin 333305 -> 0 bytes ...정보고 승인알림(품질관리비 추가 반영).pdf | Bin 155048 -> 0 bytes ...45.실정보고서(토지임대료 연장) 제출 건.pdf | Bin 0 -> 44520 bytes ...적확정측량비 사용계획 보고, 어천-공주(4차)].pdf | Bin 359344 -> 0 bytes ...정보고 승인 알림 (지적확정측량비 반영).pdf | Bin 156251 -> 0 bytes ...장대리인 변경 승인 알림[어천~공주(4차)].pdf | Bin 76810 -> 0 bytes ...정기점검에 따른 기술지원기술인 출장 요청.pdf | Bin 40652 -> 0 bytes ...장기술자(현장대리인) 변경 신고서 제출.pdf | Bin 883395 -> 0 bytes ...대료연장에 대한 실정보고(어천~공주(4차)).pdf | Bin 0 -> 765520 bytes server.log | Bin 6224 -> 10858 bytes server.py | 69 +++++ 33 files changed, 643 insertions(+), 347 deletions(-) create mode 100644 __pycache__/crawler_service.cpython-312.pyc create mode 100644 __pycache__/server.cpython-312.pyc delete mode 100644 crawler_api.py create mode 100644 crawler_service.py delete mode 100644 sample/10.교량배수시설 시공계획서 제출의 건.pdf delete mode 100644 sample/10.품질시험계획서(변경) 승인 요청[어천-공주(4차)].pdf delete mode 100644 sample/11.하도급변경계약(토공사 및 철근콘크리트공사) 제출.pdf delete mode 100644 sample/11.현장이탈계(안전관리자) 승인[어천~공주(4차)].pdf create mode 100644 sample/13.건설공사의 하도급 변경계약 통보(발파공사)에 따른 검토보고[어천-공주(4차)].pdf create mode 100644 sample/22.건설공사의 하도급변경계약(철근콘크리트공사2) 타절정산 통보에 따른 검토보고[어천-공주(4차)].pdf create mode 100644 sample/22.현장직원(안전관리자) 이탈계 제출의 건.pdf create mode 100644 sample/23.건설공사의 하도급변경계약(토공사) 타절정산 통보에 따른 검토보고[어천-공주(4차)].pdf create mode 100644 sample/23.하도급변경계약(철거공사) 제출.pdf create mode 100644 sample/24.건설공사의 하도급변경계약(철근콘크리트공사1) 타절정산 통보에 따른 검토보고[어천-공주(4차)].pdf create mode 100644 sample/24.도로건설공사(설계,시공,감리) 추진관련 업무이행 철저.pdf create mode 100644 sample/25.건설공사의 하도급변경계약(토공사2) 타절정산 통보에 따른 검토보고[어천-공주(4차)].pdf create mode 100644 sample/25.설 연휴 대비 도로시설물 안전점검 및 정비 철저.pdf delete mode 100644 sample/35.실정보고[품질관리비 변경반영, 어천-공주(4차)].pdf delete mode 100644 sample/38.실정보고 승인알림(품질관리비 추가 반영).pdf create mode 100644 sample/45.실정보고서(토지임대료 연장) 제출 건.pdf delete mode 100644 sample/53.실정보고[지적확정측량비 사용계획 보고, 어천-공주(4차)].pdf delete mode 100644 sample/56.실정보고 승인 알림 (지적확정측량비 반영).pdf delete mode 100644 sample/8.현장대리인 변경 승인 알림[어천~공주(4차)].pdf delete mode 100644 sample/9.현장 정기점검에 따른 기술지원기술인 출장 요청.pdf delete mode 100644 sample/9.현장기술자(현장대리인) 변경 신고서 제출.pdf create mode 100644 sample/토지임대료연장에 대한 실정보고(어천~공주(4차)).pdf create mode 100644 server.py diff --git a/README.md b/README.md index 570b9a0..39180ec 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,48 @@ +# 🚀 서버 정책 (Server Policy) + +**서버 구동 시 반드시 아래 명령어를 사용한다:** +```bash +uvicorn server:app --host 0.0.0.0 --port 8000 --reload +``` +- **Host**: `0.0.0.0` (외부 접속 허용) +- **Port**: `8000` +- **Reload**: 코드 수정 시 자동 재시작 활성화 + +--- + +# 🤖 메일시스템 AI판단가이드 (AI Reasoning Guide) + +AI는 파일을 분류할 때 단순한 키워드 매칭이 아닌, 아래의 **5단계 통합 추론 모델**을 사용하여 '실무자처럼' 생각하고 판단한다. + +### 1단계: 전수 데이터 수집 (Holistic Reading) +- **무제한 스캔**: 페이지 수에 관계없이 문서 전체를 전수 조사한다. +- **무조건적 OCR**: 디지털 텍스트 유무와 상관없이 모든 페이지에 고해상도(300 DPI) OCR을 실행하여 이미지 속 도장, 수기, 표 데이터까지 완벽히 수집한다. + +### 2단계: 파일명 가중치 적용 (Title Steering) +- **파일명 = 보관 의도**: 사용자가 지은 파일명은 분류의 가장 강력한 '방향타'이다. +- **최종 조율**: 본문의 데이터가 다른 도메인에 쏠려 있더라도, 파일명에 명확한 업무 용어(`실정보고`, `하도급` 등)가 있다면 이를 최종 분류의 가장 큰 무게추로 삼는다. + +### 3단계: 문서의 물리적 틀(Format) 분석 +- **공문 골격 확인**: 문서의 시작(`수신/발신`)과 끝(`직인/끝.`)의 구조를 확인한다. +- **껍데기 vs 알맹이**: + - **공문 본체**: 골격이 완벽하고 뒤따르는 기술 데이터가 적은 경우 → **[공사관리 > 공문]** + - **첨부 본체**: 공문 뒤에 대량의 산출서, 계약서, 도면이 붙어 있는 경우 → **[해당 기술 카테고리]** (공문은 전달 수단으로만 간주) + +### 4단계: 비즈니스 도메인 상식 결합 (Common Sense) +- **지명 교차 검증**: 파일명과 본문의 지명(어천, 공주, 대술, 정안 등)을 대조하여 정확한 프로젝트를 선택한다. (임의 기본값 지정 금지) +- **실무 맥락 매칭**: '임대료/연장'은 사업비 성격의 '기타'로, '비계'는 '구조물'로 연결하는 등 건설 실무 상식을 추론에 반영한다. + +### 5단계: 최종 지도 매칭 (Hierarchy Mapping) +- 수집된 모든 정보를 종합하여 사용자가 정의한 **표준 분류 체계(Tab > Category > Sub)** 지도 위에서 가장 논리적이고 실무적인 위치를 최종 확정한다. + +--- + # 프로젝트 관리 규칙 1. **언어 설정**: 영어로 생각하되, 모든 답변은 한국어로 작성한다. (일본어, 중국어는 절대 사용하지 않는다.) 2. **수정 권한 제한**: 사용자가 명시적으로 지시한 사항 외에는 **절대 절대 절대** 코드를 임의로 수정하지 않는다. 3. **로그 기록 철저**: 모달 오픈 여부, 수집 성공/실패 여부 등 진행 상황을 실시간 로그에 상세히 표시한다. +4. **선보고 후승인**: 모든 기능 수정 및 코드 변경 전에는 예상 방안을 먼저 보고하고, 사용자가 **'진행시켜'**라고 명령한 경우에만 작업을 수행한다. --- diff --git a/__pycache__/analyze.cpython-312.pyc b/__pycache__/analyze.cpython-312.pyc index 6dc4152cc04480c4e71910a779cbe6628c5c87ec..9492ae50b3f4b314e865784d4f3c2be464e29a0e 100644 GIT binary patch literal 13194 zcmdT~dvKH2mH+g~dig0EKL8snUdCSlV;mqr2<2f2?{|3^r>^lQJ2u94KS_WcnVMi) z5$IyKI2MTwA}>VaOcgMRnISW5cP7Km{v%;`?N#YaW_JJBSf6Gf2{TRl&;HK6k}Mmi zoo%}_+i#@%-N(7-o^$TG=bn4+_irgFi2|NGd#~4)Ef9qNq(bybjby&|Y6PKMkOf&& zFO>1OrcA@iVws4ySg$>vP?n(KZMrfY>a_LxJrL~vaZY|>&p_YMysxljTjw- zzblfWWvj_5)J=_+#@@1FH412(K2{ga)y;_J#@<%rhoG9Cva}jOHol`lYI;W_C-#VP zlAK&4qMX8NQ&F2%L+xhd(rYww2L4le400xisin3o2Aj>&9KcOwJ*FWyUCyl$v9y4t^I2NRQVDR+$qN{L3z1*M@D(Aq7})7w z+@qD3^cc#_a!I>TmM%YU&8QaTrR@o2Q<^o>vM-70osuSK+AUY_wN}bj1%>pg#)cDC z#a>aZG#;;LszbNMEA5r#+Z(GY>&tg5jWtT;aa;L*s|_%f zRrW=@Hta38M@vQ4Q`VLj|4Ixp#&Ns^GD}yLtG)fwTdM@2PL~ zhpzX8e$`Xn)Y#NiZ&enREGt@4QdC^Nz4oY5sWg`#t!)UV?cca>U+KF2>ne8d-MDGX zAt}L;45XvC+tsemM*G`BAN28x0iW9baik&*5Nh|iP+z}#qfNcgqqg@jAhmZueY=AL zNqR@d7+CK;Ko*u1mn@SUd6QL#&b3>>hU#gT9N7Q}`FhlM@2R)k7S+==I^YZ4?jODF zVJJhl2h`X5`1o{KuGg2K@IZb^Kq2X4+BZ)IG>vBj}~ zr;zU^u|>;5z50IWZo4JqbE{X+g}Ys&zBbF~t+w#bJb;i)5uA6zKfA-Ss0Ws(#67x& z&$>c4`_(QNSfgqaW6r0#+|knLosQ78^U+fHQXj^nQWlYVrvqqtZzkyXqHv*4+CmrG zqHyY4Hz8n@ECm)wD}0vnl2I`NheAUtWHA&n5em((l0petCxs@{K}qO>7o4fD`_xZp zMlpdzfeKBc8OFfvT-qcS)^ekbL~`8+v5&P7rqMfns3tK38KBADfMMtogm?)v+724W znpqb{^cb2PLIa(1dDN>Np}qm$@j{>4>wZe`>cDj-G#Ub;a1bluw=Sajx1WaHop?(I z2QeO-%BRG4gpW(@m=ne{k#+5SjA_;-^m@PA{Sgt)CW~nvN{Sf>9Mi_IKwh_@wJvw~ z&GURrB^|909OLL#8;G{H-s*q0 z?(7+fEr=*qQpiue&>l%jb4&px(S`$IcNfORv`7xx7oznfyBn_1yWVJHe5W6NPp7#0 zLMW!>$dALgJ=STVWos-yp3Kmtc9Qz_9?8LGIUYL1qvf21w^*7cB&t(pF|36gy0 zTwDvSxhQj?8+X;N%cCDp5IvR?hMug26BTWVM-_GtfKysg1mu=s`2JLV&BNLvDFoUte5H%_ELd=6ak%AYJ1MK+5|7(j!@qQ?H(b1ece&c3 zUhRuiFdN1u1Onq*4KtOJX^ahS;h9}$43jo0G*b#KFojH(q>BS5uDO9=h8l?YxE?A$Z}Z4e~d_!bM}>`Vkz5wZH2n_RLF`oeDv zSg=!u{|9C;__xTkg7fH&ydXw-pBnkZI?P=L&+F&WKfI(;)$=;JAI)W@VPImw?48I zhCD790!%&3GYsda_aa{JB%q_84p`JH0}`9q(Carb`4}bK)#D(Gg~@`W0Fv8arJphx zGaOi0 zHR~OSP}dJL5fL1-ZC3_j6_V~L*&fx?iEf1?;0qp!Rgo+Iv+ITQqAFU2;Vw6}dl0}3 z8t{ia@3Sl!{kvYt$ox6JEWsnZc~2LU2VWXdc1c29rQ&NMriV{b9Lo_Hcx3%;7@D6m zxHMu-SZb_^HVV$DrI0Mu2Yu>$of0>{x9`9ojhvHkKtUT>sH+hMphaiT@KwqVBxFQ< zZdSnw3w_eAUOVHUgGlH%_b>ynOk}WmVS?3C$aPjo%_Rft0#}n{!CYYGlasRFz6ZJR zx(j3g$LiQDu{xU1f$L9MMZ}qBIz<)K1&oiK_BbuPqDllr^r7>F7XPn*nIH&1H~l;j zF%3;!BuWqkqPJ5MjUBX#PO*;qOn79CEPfclJ!!i(u3a~wGm0NZs@^a;g-QFz0b7ALU~LlpauqzX)Xnl<6-*B?2nxFG3{)D&<)yIkiXEs+H5u zC!P>&39^~qN{Uk}r=Jo}iDX#ij25hvDs7dxno3h5keM;atnY!$jzH$bAg6v0jBeNT@W5y+V_$ov?jC7xb884UI~wNCA1k*~g4Q9Wu+u#-(-kIUprL`-Kk zq&DYSsVSm7w_)lt(DMLYFe&u>28~?Us&gjD>BkBM!Dc22GwDjlO0h;?LOG9>pJU}^ z@&c#sZ9!hxt8LZCj{^-`gi(qn9c6KYK`uUT!VHS?5{y{VYLK6I>Vape)9|*?n;=|2&^2%c{D1ckl{30Op87E1gJfA3! za=_@HFRw1g>TET(>gAU}#Tudt`t<`womO7cYG^RYYtNhHb@KY@0;70CtHEhxw2o1% zlS?Bs<}tb!#mj=t&_+mO(}J4jnG|+w^S0*g(hf%z7)i9?L6``38i4?c zH()=q;0y#S2X_&nhkI&whXn_1vd+JTqvy8FM}j@cLdTx>I@P=G`JwmkS#U&yTVYY} z^r-Kgv#6h34qf*!?B8iDLE$@1VpY8DgH05k$N%fKNV+uJ1Px-oPT>`p_g2k$@z2}I zH#OZFjc_q1L1-2}(w@}Z)a<;e3F@jE9S!!I+Mrle9Ml{OnyTupl?@eD_ER>psTNDy z(-OSL1p$|=MC7r~T4mo+X|r3E@^xD_G|E;4A?@Xr4VCrHE!Lu@=I>0aYpe~{Q%%a6 z7AaC)QC;77vO=*|+8P^b8)}MH*HawRwx%csx&vd`EJ*(N-@+f;gs%i)>K@Vi6SwJF z+Lbh~W_apzPgvErUz04(=FZe$-y-wM9fDOX&piR2hg+lecvi9fk(_I6Z`kdxcCgKE z^wTzY3akxr%+Lp)g)X=d+2j?z%b}~k;1&OOlb|4=FDMHsA!j}+bA@EC&{Y2~WVe$L z(0?F#keqKXCzxepX~ zjv!59F3;6I^~M?XgI=t5E>)W=ki^tw+F{YBaZkIJeYxrSH?FBaN|sdcottYH#+G|jF1466T(u_fbdi1q_eO%&$7TU z&$1A5P%EFTk#s?0V|8_HRc&Q`(1<%CJ6-hz#aES}zUtM=h8kuIp+7m)#k1|L1C}tjS z3O5s!#ptgTQ?dj}&{&NE&fbOWo&4vX=jroX!&lue*id6{Vb+qvE!qNXZiks-QRour*`$T|j6lE$M6c5p>JX(^)uX!7m24VSD*#g87A@Rr> zmlun!F(HkMw<)xFD|AfzlFm?EiWDY_M(@7UNRtb)0N`;x5KfL!jf!Yd;y5kh!uy*y z*&2*$qvLc$V=PB}>pbb6+V^G#EJwa&^Kv#dT@#50s#nP}gxmOSz~F@B5}}C8uL3T;_mXmhxE~&)$T*krj2enuCz^XJslSG zo6mv}9xtvz$q^;*6Fz6S>n22%8D2Rd#kNr5vqVX(6JHuTcslDWiw}-CY314+# zG`jsIBHp+RvRGfbUBWHH)%Ne_5*TCm(T=sm<%RdixYL<{#hfhU5!}5pHbvMZ5b6X0 z-cSdm$bu~v$O4zgd?aNa&?vJhq2QuI#)&T=1f%bIS(f{^J8Qqe*+3!rI~tMDDdbka zC3NHcQ154m5z##@U2l5Mj13PyL1JWFYN3eag+5l%(_Y9qml%;t-^;qSLKg8hJ#~j~iR&jFTU@6frWIkSzUrAVm{t@>-4QiaY`ard zp&N8*5lM1<;0sz5yRoJG_-ky|gSRi{P0%p9;*Q9FO!yWa>iqvJxziIRw{>#KZH-Be zIR+H;f#l{-D!KRzalS5bljtD*CL46Ke=*|Ip{wrjWdxyPb}S+&AboCfq~kBuffuF~ z%FE;BtZ^}2|6MyDor}qE&js$7<>F?C8!X_9zvFPwp^1lCqbrue{1in$+ZKV0tYNZG zh`(YrV}%z<#$ZBSBR+yLDAuO>$|`G6hx4yh32JSQ<3a85%2PpYjny91RyH&Twe{8p zDG82EEi#qIt-+Ma`uYmH^_0D$zSd?BnyMX`vRG|z9_GGd-KY@QV`&+^RiD1+wbhRr1bbBS;1K*peX^{{z;z`TCY zTnf0foZ+>>)AV)Ryd3$m{v5HR@}Dv8>5gl65g%pd z4Cb`>mo^XOvP@Apd#afq~6~%Qim92rS!%_lO~lx4!0m zHITo2KtIqjxMJ&rqk$E>@s72|`j*}i$*7)~U zBORJo)43RItrJU$yNLhRAgzHgMbt5p2=!(Y9@SoG2J`+=UMfvppxk)ch9) zW<8w$;(h)7*8*$z`41d?xc1pyh~vW!aGB$k2=<=a2bq zr-5yQxLt&|XV+{O1C@=a*o!X=UD}a!i)V^Q=h^3-;??=niu@&3f3?GJJpqW5nu9bI zhr}v+9puiVEbb$xbIE>6%`7z`)f1J^Jv7XHEDGk$A}`N|Fm^8za{xE4a zx-8rxj-=Vf;JtcpOQ3Moz=8YQ0x#`;Sh#y+%7XJom)5m!By}d}OPbQr97wWwHv3G0 zxhwBaz5Dz~N|t+?XQ^j{J3o+8=r3ODoimb=)sfXv<(}ita#um8v;1lEG1hZOh;TDY zO-LD$3mzKgfvBO>9sU*Dkq#y80MJ3rNs-lV^{?16l)7~&X)9{?YpOu4^^2rhs3ErP z!$`($q3~b%%||&FTDM>l>;%W2I`VTSJav=YILI2O5Q|jfdq~`j2zBINPk5p?8+EbB z_ZR@~MBr7>qkLc|OTnJ82yd%)EM^)P0oK-;qp)M|D5mJURCuKsUU~we?2|w{6Ug_i zn=;lvnv2qllcI=kRkf`;duFsv`2i$FUm>u^sX^2^p#}Nq+fI%)dt_ao{=Vr)X9lX1C2m;2#D8c`55t1Y5$w2w;Lltfpjwmd3!-IbaukPCkkfl$W)9;!=ho!Y$@?PjlbVwP$0KFz z#12|{9G|SVBosBt)e52gl4$ty0TL-8m{?t_*zEWMmqIJ`2y*jma^yo-_GvCcGflPZ z;M!t1)~GD7Hq;;hl2DI$j3N;lu|-#Os@`@AgW%PYu%x(noFs+;V!jSX;G?244a88(qW z#O0I1oS~!55$;Ju_-;C=sSTQlc22RL(SV^EtCWajf?|^#6fw^Zkgw2DDX6j9C|nbD zg4mn;L76c>h{mO0n-%f4e**AX{B0MZCUAnp*}4}VWoCE3-1&0%_Rj5|sooWXnM+-! zk*U*NDcEQ(tvSDDFe!I9X>K5CuIIo|(!xhsGu_RDS(0nhqpayY&$~}tb6#-{<}UpA ztVJD~k-XX0wqM!qO~0~hIB!KDZ^dBVs)u=N2aa}_{xHSzSkz_BdNixVxAH;ygC+ku z#ee9qzwAih(2=1!hyS(HL$iMB*8fNDtVhz)VW~7AmEJ$_VBJ3-_W$rlLsEr%>qy=_ zZ>BFXAgv1It#WVq!aQTxJU3vT>pA>8^WrB{gzO@?LaEu8esumvgDEqHQwjnp1^)Rf zhEi7kJ~@9ReMH}ea@JRZ2I8KU=ic*nw)apV zZTatv%m4IPi^}h8FJM+)U!S=tQTT1nnoU~qU$WLOLg90*xXGaVT&JUaqG{6t?dMZc zHWg?;w`eF|phGHUDzwK2Ma3FSav*w&ODy{8J*aDHCbK|GL~#;?3B?koSbPbj&`0eX z_paNpzhdK|jltyDH}0id_A0MY59n&-ll+RR<8shoZ8%Y@G&V354C<|?;L+O@+C_rq zeTVn$-?*b<^A;f7yJ7QTwj31FHl}t83;I=2kq+128CM@~lpXcfHOdtf$!ggukU|_9 z&F=*h`(^w=$o;*5bUr2^eZQtHWh8A!Tk=Ti&bFj43YWDd4GTE|A!kUKH)2`XCjLC} zOrk$2FCfeqF=uoeIt|^aovD~I^P;wtuZ+T!1DdZyVVYHZqSL17;YH>Y`lZeOtSwYq zDHeU?9NUlFEi}w&bao^p8Jv)(CTzM zeUsdG?s?~&d++(aU#6sJ5j@?0J7?QxMCk8$B0XdRxceReBZx*cVMp!pH_=Xrb6L9# z-ZHzqmux2qq=|o3v?~BBw<~*9?J5G}NV~dM)2;zn(XOSH?K(!wD0;-TrRC!94!ty| zRqZLXhDqy^(b_?>-Ox`IAttpLu?mo)ZcjOkh7HG1pZqX7MwoSXFxzf3!E0LSu6>M^ zW>}aQJDrY~7?$hkVx7GmE-QBwkc#GBYd7QF+E8OT;AU92I9lplu5DJ%YH4wHTJ4rr z*4fQkd)<~c#tk&qPOf}U!#)cq%_^8a#`6n7A5ybv$-<$WmDu`^9AR{mJMX z=Y@B#M&BQg{%YLfa=Ki0hApppx?){bMWv<1_5y2V`zalAs=xmu_HH0%V@cc z*pIr=fXpZB(R`PhCM9YD`A`qu|0B<(#Jnfvkq?caJ}dCmm4u6g}w@e(?3<-NE;?a&2|=_M<2Sp{m&QIzuM}Y&y~>wLudc31cwI=U>$U7qCDF2PnvSb8l$l~}qjbw64{u};F%RT8a85#L`L zy&kaf{cT86KE0H(2t6?_QY9S_=r9Tp!@+n_DMH*DNpnzkN(WM&5Lek;xkTqnkz|1# zaK^!_pBum)F-MrFm?S)#wD;Sn_vfk7ts120E$x&TSkAwD9 z!Yb=3sAzBqN{;Z()fy@`JSQakg%2qGGST3AC}3)08lBTKH(Nq_K~~rOxJbn5p?GfAp;}kl-aNTy)nRTw)#& zT3)`pslkI|lA@^cB_g74PT@j0I7P+A&P1tdKy36k)51so#K}qy+T4jPo`I8#%IPAF132(B25 zA4ktee?3}5-N^*3S5EmT4{lwepAHIx6Y=x#@)qYYZ)$mYIn~zO*3w7;r1hq0wIZhR zP@9ObeFK%=n;OUBWigeg;n1;N30NHL_PQ`J zY*z58ZcleN%bky zdidX61;ZI)CnBKj^PwYZ$jO9{I3arpy+e!;1RB$o!C{|4RnqpvdSFA#yf zqvq2;3uo5<_bztBS5u9IS>{m!2=OCa9v8v+E7yb%u8Zaa!wEs`zVlataWWAhg0Z)U zgpY%fF|#U=K`kmBa64}rwu=HSQLl!Yz_vADdmg)m5#WQ#DS z7>57bB0Dh$uJtVTsD$EUuLmKPioNRh>;k?gs4>2XAj+q( z>Vp>^QfkX9*M2o~=)o_!t`}xucWQ!XQNHZ~)xEORlS|T*=~0;+1czTyy(f(RCH>5{_ITtXGMi!?JNOd?H;e)byc&_De^^NaWk5jCasl-$Z*HNq$s6}c#c5sk_b2a6kPccLOeQCfVqih;^!J}zsr zEx={KoQFW$kR)#!(nnOMR3rLR`q4wdz8OQMzxk#v<*n)y)o*P%v1Qaet1FHeazc6L zP}-W1dd+P^y1#iri4>`GWM-Jm3}yXvmfRjO=8Y7bDwr{@m@}HgM)QTD3+#-s%D>|w z%007WYa}Df-?AXnC^koQ8F+;mooP;29M%;F56$`oKxq9)wv;4!=D;%=x4Sa2sIoG9c&NfJvXbiENGXUWZx{gKa!s5-+4=w zIi7wlXDsJj!C1j$>TF*1f=sR1O8g^hRj_}0?~N6+?On53-9t?`^NWH_f5|UBr3?^( z^*2qc=1d#Irj1kX&+=wXI|J&7!8HEzT;Yar;fARL)7xhX8*h-|!j`b1<&+M#Y$^l& zq-4+OmxcAqLWR}W<-vj$gUpC5wjYUCw*jP1nAZ)CeelBd> zK4;hyHtd-(?42_l4jT@K+K+@ftTToe<_tYyLr>^MXXxkd83PBlQX`W&l5L7)6-2UE z29E~YLu>0kE4$$dwYG)o4}|JK8Qr0y?hw}>^7My}dPChWhcbPU^xA*vl&MO8)4g<* zo;8zMA1be#&8(j>*86wdR%MOmg;h@lk4~AxtDgC*E-R9jGrBpPRvanq3MfN4tHXwp zNRc<74D@|vpl%v7ll-&B`bg>Xi!^M4=Jrw~gOOna6)6Ho2-Jr4)Iv7Wr~7v;WT28G z#86WxqbRH|jue$hpJj+@lEGvXC~_e+POC%O^uXb;rXW&uct{yaD-5ftNMZ4ia>y4} zFN>It0MrMbaa(8f@4COxOAz3cNq5Pso>Oo7_Fg_RnsGL~U+zXVyZA9`*vBeJ;P;59I<5>rGxp~+jDTUgwZ zvm5bHjcHPnnXIF;mu8>FI9+qE%gJ($_&&knYn7M*#NvJzW=PBx57f22PTFHj}BB0aeN9>VYY3o4|}07Vk{k<7f~yRhW^2V(jm44t^catadR%-yjp2*h0Fu3Kq5nUBDg|Ezy>A ziDStM*g-vLhx8EN2duCy>oUhdD`G3U!g0uo+A%$bk${L`hMRH&Ja|Kollc5gJ?zU> zKxPD}vZ*wqc84Y5bU2+Usg4SYZ(Gky`9BU}P%jy3`V76*Ky56m(D{zM%$xE}oL z32^WtdeWzlKc@pbxd(fn&rS`n$M;~T`|Nj+nHiw(+k-ygU!EnY3xb)eOTG9ZptNVh zMq~REW`5wR%re#{*$2Y;^WWq4xKTB%T0;1qzlzjnHpCtLQ4*z ziKIunD*o*COl>VP4$DN7@Mw@W|z5 zO}%&H$L`&A&ZA+l&@i2b#qoMw?R{2sFz56*9L#u(BnTefe-RW_H^Q5W){-;`5cDen zHYp!mb^9*{A-DesP`_Q_vEZiA5P%C~4&4^7U`<>&0UI96y|w1{cQ?0htqp`dRNmq6 zSH@hj8Ogj5P?uZ!U-;vxwxel876P@ay!yQ!Uprd~UW3BP& zt>6qhQq19B*?YwTPOyKAud>gJ(>Tm-7Z0D1@S%<50|d)M5eeBH0nknq34t%tYw0i3 Sn|SO#s6N8pX`%;tgZ~fpFg_Ci literal 15406 zcmcJ03v?6Lnebe_CBGlo@=G4@1IrHy+p>)x*fGYu4GEC2I1nLw#Wy4cFCzDnUJ1EMd!^(m>y^P(VwXD!FD!mmLt z0%%cf)sm`kyHI-*b;w6i0m*%MOG-q;FL@~{M?iX?_9+S4OXISk(Jnu-r6s6yKfbVe zmi+v2x{SmJd^6JLWQ^>hgpps6#Ck;3s9#3;QPlSpyF`WC;uyBGRtlWYCpFX#_@wlX zjd}@}3gktZ+v;QWYEj~Hi?U&hXGm1A$Eq#lkG(FYyO#4&qV^!}Zm0wqBo=JKt>CBGzRCG8w^+fmel(c7hpXwb; z`c+ElX>>g!>s;>H+;fFz3&)O3)Cw6jrxlAyl>ahmbvQZWnVnDV3~TeQ7ZhD;I^T4u z<9x?Nqfk&cxi47IJXONyw@>x++Kr#drTMA9r=%%q^GYhUY(AAr&N#92mOB|Z{QAc9 zt~B(+tTuqY6k{}3))3vOFvOX#Kc1JMg%tit^w5AT&07ne8?_zU#o=j$9&rY^xG|xq z=8_lZWo{ijfLX>J5nVj4isl>=(YKNDjxfF_BZ(5I#_D9)W6LKQ8Rv3&xO(7X{Sr=c z$C8DlGG0~|%`I{x9(L~F2D=4O3zj05R8CQlR^rN&;J}8b_{T`DNMqp{Rh4Yl9RzVq zWcX26?NC^5;*zkb5;_dXPZRa<;O;XxEABRtU!ka_tYVt-Yoesg-p?Xv{A663EVP; zF)jm2ztV=MOiO-u`9f5RfNBL&==>^(vEqx;<)!fy$2#<@V!Ujk&L|GO4s!;z8r%M! zeA`yX+Am!F>KLVf#;>uJEIf%}cS)s)_?it5drN+?x`Ct>NKE_H%xakKP$2gjB7KQJ z3GmX3%7i+Qlc>@+m9f3ZVD7E zW5@VfRU$hE3jKwN@&?j@d$TjNFFb zWtRNlDOe()0}XBV>ufDc9>eWXij()qXAt+! z3beF!c`Y&90>zM7>1CweO{6YLH1F{j$GH1rGHYP9e|0pw;@n2BKnwTEZ4ZGKXwX9H z3be3&c`e*0Dw<=G-fai}eRLC8<6rZ~=?CU1B@5Q@$E61^bAvTXfnK6{W@(JaKU>Z& zTa-21?)GVBhri6$k)V8Mj1#5@Pu5~@D}i!(#{eB7a7YWu+BJcLH71=DhZb5dxU@ar&sV8BlZSfilUXN*_&ZtJG2=o%zOJFyF zDFBtDzbEk51inq+D+Kxo+y_uGI!E9|0-q$%PT*bw^8w07KO*oQ0{=IG+X&PGl#R|1 zh&hps`TnC6ouRm62a5#We@=SKx;mJx#GT{`e{Z%wM|j`~_B#6Rl7w@tes3 z&{rV;tNFl1N(380Bh)eJ0cr#hu6|44OC_)qa{zPsH;bNG$TKBC$>%OP znC6zyZCsHJ4eY%8#wY>Y~JuHh1zw1V1iAh0pbfJW*rAQ>< zS|rkNEjJp0na=@`97k2eC&-Rm?=DgEV2}CXz1@4v+ct?wfIrx|cklk)dp7wLjKyPV zp+%|PHRx>shn*XHVfOFGT4)38Nz$K=j?P{DKXd2K&@{jQI zngQ10@p7ztL#Nl{a=0SHi+I5C?tuz!gnp){&jV3+aISd;+ z``Nx7R*$ivzR6JENLTFGy02$vHEp*ZVd*Vw{}Gp-?i%7;4%S#(S7!hcKy|NWz`|Lh zO)z&yt?bCKi}Q#YuN!Y}4#LKt7q3apefLB~4{dSj^G4#6}MlqdnIimZ>c&iz7gG9}U z#p*G8VMo~ROD+MkTb(t1$7`&NH650xpn}q4b+9h4$EPYG>{*!@Haz0f#9DZqUOZQt z9X&Dk^ab#V*}uI&&wlsv?DH3CD4Us@jB#5#WHR6#Epu!~+ieDISj>Z*%R6lDb1}!w zdJ9%N|pRO3_ zxzl5FFP;VGx-j?cvxeK~(VKY6!{PT*pX3nj#Y_EkP4qd)Cm(Qe{p?M=hv3sN)}w|2 z*i2zLQF+wrwu19n>0J>=al-~a%i&1%;@3ftY3}(~XTLd#O>}B~|+-pS}{;RGfG0Q?uW? z9JLfCS(T6^O&BJ^86Z)xG~smxm9gVQD;w`i+0yT^9<_RoM;dItiT7`PvPWx-hi)Uo zO}vNVlT_1QywScyIj3Hk`TlDy^i3VEBLz7~6x~^ALFl<3d~5Cp7f2Qwo$wVW<_nt; zBFN2Jr;;exD#^Zi zmn`X@Mbqale3V8FXKNrG?{p1{YP_r5<$_e{(_*)X zoRc9zP68KWH%a{0t~4XZ&#s6v#^r>QL|ii{7j9q__Tp14b8n2ze)k)5<6om=1F^n@ zqj4Pt?BBzf45tFzE~hAGypCZvhmT~4s(w7;SiKHWVYhgl{X=4slO2hq+hZP4g>Qyo zkP&5rE{{u;_uH-gM?@J&XBRWbXaE_}>}KucR1us%aP?c@co9^@vWs%g;)KRh@Cgw% zrkyZi;f#e3ByrYZG2h*f$6d44>2aCy$Y!<=vb7&MqnkU|fddOh9MYZuN*#=nTsN zVamavMd{ay%IH`ns)j9Y_lS#w-eHE|wZqv8G%PCnI2hM)mM7yOun)&CKq^&KSU1OD zRU#{t!(b{VM}5Lf&R1z7PGN@hEo!h=fO4^F-~iX4xzFRo%ENjiH=>%PV^XHVSu=tW z{zyH@ccduE?d|V}NeUs0%%TboJHhED4>%#IOuAOZvZI&^R)V>XhlL|!xDsN;G@MKz zm&AEUT2vrvhF!xj#1c6b!)9DnO!L5m2o8e8t(_cW0;d6(I5jSaM!djn9%>X*`n?`_b%radf&JkC+;U9C6kjX*E6%v>CfuNb_$vL)5_~vtIjo_Z5}%$WL2G3{U$Bv{7^VM|AOgK z%lVcGFW-4ASbkh6@bUSN^N&9<4~G>xC0|ffN~a{8oH?DDeOh@Vt3=2u`vYZ>sLSUG zoXEI?!l7XKV?u$M&p*r`X8252NM^%@MVGdp-ySNg2^Q8&dOoLQT_}|=toeeH6{Y=w zlI5nI-ZHPDib_L;#$cf_T(Wl36wF&cFQE#1B=d5rXx$f-v?%otlr%RLYO1LG)uB9n zFi$V!RfRx~P4lvSl9ckWR(HX3$#>p2*}!+(gOv`U*vac$yvxn!c!YFMI4A#{^{kbr zxAXh0XRJbwEtGEK({0mGZaZrm*G;WDV-s>ZLg^iRdPlVOxMpIDkYfs^oA`9oLfKX! zry-Qyz^6ChGTgdQ$T5V{4Sc#`dNChE!Ah%8Y~yuD_#-YpXIMxdh8Cfm>R?VaUvrqZ zL1DF!GaO1E=F^9#3yLqbop0kS9u*1>o>7H$<(Hi2or2B~(i+al7YTUPbFKSY6TipL zcL#!%Uloe6v5Gq-06Qh!_>#LBTP|a>e$Ms0rWT#Nx7@`|_x^cR?Yg`@H*SuVK zxp2}d=pP8_cLeo21bt7aq$gO?6Rxg%)%1GHD=k;O&_^%YEBTC~I`-i{Ob=tL)jxva zBXG_dE>(}>2W5{+ri};3w}uHYn8&xwE2#SQ<2$a`)J@j?;7Qt{!nXQu(eNUWkL;1$aExVIwF`H6Y6kF`YgyNZgy#MHUp&emc$9BGIFTA&znO2^ za+SNLzgEES17|byj~wP3EEAgWy2e*Mulru{U2EXYSeqjei*1a!e^=x(f;MAK9z|R5 zWs77xzC4C@V7+&tomju?A;(Zu(=H5mN%r82aj)b{7%KNlrWZOP?YblFNNBAmB%8M4 z23sXNNxL3QP~Rh&t~9({dAX8rcm$H*M5R#4hDuq!l)YYA`*O|YnklJJ*%m5o<4fD- zHO<&>4eZ$NFf^*ZTs`@OP`M>k*%Pen0bed#7by`1hb>y79pH?lAcDPDlWT`lweo zcZHgF1e_`Y!)(bzU{dQ+&fC0N-Kac*#o=_TooQ0gFr zOoUMVVnS7fP#JKhqVniVspnHC{rsMT{1I<(?a}wNBcOj6y6ored{wug+Z@vE4C;1< zbbErjJ^X<~yl#)6dn}}VjMqLEA2RrcA)Xr{BgV0i_86}{c3oGtczk(RYnb;+sr>T! zuS%)B;^_RL@oH1Ba)(g7lh^fJ+sEhZ5z_a-1SMB@&T-c9x2{ln8J}MEhb}pu1VyGo zpOLB1t$7(tW^T=EV4`vhq{yzuFyjciHJ#pZE1YhE{|5Ls+yYrLzJl;2&vRP}>Ub^RdjK;MDO8(AnYL5ozXYyi;G zw%gZ>D?X{ zUby5?WgBioVAJ|PA2Z2Ro*Ap^82lS<-zX>r(s*tIa|-KuE6ktxA>Wu)&^;c~KK`Ee zagYI?{^yq(D!T}3rW`?w=2HBY1Ml@7;vX{$y@&Y&7QVYLSlKVYtVqZ5td%Fz#9+D& zD*_tc#-=O3eP%H33-6|v-JXv)!>x!nEcS{2a!*U)NOouO5hZ$SHDdA^DHysT4V ztCRk4En*54KddMM{6})c6f1tD$i;XOV%8{rq?2L1q`{gj`LPx;Wr`o?YcRC47m9jjhGFppRLZrcne}F)z4ZB0sna}Vx_8|YYQ-5k64B3=Oz`# zH`K9}vVSomR;~J%bVLX@2+hJTqr~AYL!sFF%xcnxgN2ls$Vu^Lcdyt z*tM!(<;XBzP&b$<3#Fg|qbiiz40u?H2I^E{RSCwc(+3(<;WirZ_Y2X$2F?3LorQ2C zHlsmGBes+Q{y{MslxjYpJ1MxiPNPAE>iX&mz&})@LAB~bO(WpHrqE!r=GUkVoYWYkVvQ=ni$`u%|mf~_F z0xQhaDUQgLGwYG9HF>643HU6fgmPa7e#-HIVT1-3=RMg)gI2$&-Phh0ZTam+a2&>7zde%5_6+>21oomJ=g=` z+#KFV5Hn&=VM&O5uIDCpN~viJN6Fe6DARe7}eLBa3c|$Y*;T z-d%uouE>{r9IQ`Jq7=T=5#J@81n*4q7)Wi$aibx%P{f{4)+ykL_RAaUvvj2GT4@E$+rB#*bc;C0wlqNI23Gb3t2O^C1gpFMm7kk zWgE6j8+Qw*rI;2Lvzso^(2cWAPv~jSsnhgBn|?-_w4i}O!o65e za>RRZ@mkg+6GwTE97dT}F{12I;%JWCs~S=FsBuh&(dw5{OTit*F|3C;jGL7=5v|jV z3%SnFVV}zp@H!8V1>Hl#4A3&YekbLi{0!|Ib)}faAq$`!G`sVDE`~jc;h-3UOA|bj z$`S!a3V$heKlY%RvZa}L6bs6tc!;Frj8a<6qmr=Yky21fN_BRL^XnL;J}3W{;;Z;+ z9LEk5Ls(44WJvQRH#A_+;}mg3j$vPwotA%2sl;OPhme$!R+0=!ipo7=ASFr3(SyA8 zu@9vg!l=x@#jj{zN_p#fETxM)rHdk}@Z@}q4KArHDx1nVE2DC+thz6+GOF~fTFEO} zzCxBN15)LIKF&*Pxk9vDw}GNXi~O9ysB(f0v{#WFHb>_1U&k5S@Z9-|r3N-<4T<0)Id zBVjA7QYCZEvrd|^JW6w8nGa#HF`7Y{BESrGmx zyOcjE9ibjHY3o6ArChRmB@bhk*7aB*D+5TDLq-tZiR9$QpWsl|E$I4tu+lQ*K)XJ3gUHku??mp7<{$Hf$Gj&q-dF&FlJW};kLv*!N0Te+s>akcsv;gY8DXJ4; z{@1Z{N~nVe@0#n=_6JpJ*G!ks&D4f89e*h=ACS>4k_SItK{y5#+ZrppkLL>|zO)rv z6fOE6RO}JpN@w9GRxCxtikEi}s!h_X1M7Pn_6|LqZ5oziC1G$^2X{-TN24X4CW*Q{ zj$$EvJTF=jE5#Xd|Gx)h=m5ZAM#^bViGrq9%xhuQ4?}E9V_6STpIf3? zP|+RtIm;yLwAiyrlD9mr*vFS6D)Y22&y%odW>g!^qPCuuOV)`e;B=>;$E6wRYVn~f zYWZA|ey3jp`~1pt_7c|vGJ2b&<5FcTah_0&|3UHKCyE_W8TCY(r?%u1h2o)no_bn> zM_K~?lQQLb*+-x3&{N=g?1XCkjtHkjcuIsvMYvsrwIVDQp$wpMd|rg#5#g6aSOHKm zo)F>dBD^TVzZT&h5k4xyGJx{&MG^i~gfED27eLwgzliV`W$^Uejjn%vNZyT&;Ue|C z2>(iiO(M((h>!o92)`%7#uO($-YjvBf4CkSe;2-Qlwr(fDeu-|K~l`LbWdp8S3IF9 z;CWb#o!i;u$OZbQ##J zOk!iWqyyNuScbvV@q=m-S)xFv$D}38{IIH~46DP$EJ3rxEJCwF_!#U8-C#G$n550$ zl8@o$r^4v)uOZ>{^wKMmxo>=*oIm&K{MRnuKJ_X&_r)o=xP5F&Bp;uczj*TYsfk

mvZm-u_(`c(DEBCs6 zlz%Klc0Nni)!J%XNgy;ew33IK8q8#SAmF9<(gRPrnVQD>^|tyZvhvC8&+dGxn)JF4 z(&RSUb;4mK(Mh}_u zpS^-Y`Gu?C>67!H`yx3v^(Aud!r5D|U7bG%MA31P%3+2Hgj#B9{E!qO+8MG1f_@L} zVn&=Hh7P*C{xD^8`A2GmT*^P<0>{Am=xrSM1{XAAPB-HS`xv)3qAi2CyM5My!&W!t z&V}bqn4o6dBeXxvM2Ip`JvSv`fw722;$VDXbPSjqKRW-ySxAbxFP|mnzIl1>!Z{LX zx2{e}+SUy>*u3r#<8Y)j2|2(UPRCHt9}YMM{M2EGc-zg;BMz@~fcCl%+`=%}gTWuK zo+HEP$@JFL+4*x5Yvou^zAFtTRd9hj`xz5i2%}Lu`eE&_fo8QH2 zBCJB*Zv@j@n)r zokGE)`3tWumFfK1$@z;%gXsNRq#YSSZX}miOI>*BK_-Qc1)TvKINOSl6pe8+UfRac zhZy&9kQ1qL1{t>tNY0>Z*gZ;HNBor2ONwlw2pd!@O^O^3F&PCfHM!=2D?43`d(_Pw zPBAFL+k+;vBf^sv`9p$gh(_<&1%)T%_X$cWJQ4^6(F+)X zaG_S@4vz>buQTj(4GWn*dMsIohZuoC7Xj!#f?~+e_ywiQ>vkO!6kwcJSS7XzC>BSE z_Cjy*Ly36(E+;~iE@Zl(3_5+lEC(M88n@2}tqdizpbRlVcRt^MjMsuzQ5P zOh~{3f_fkbJs%HB{AJu(Wo8|ctz1#K!T96pp5nq*oyptuDM$_wx> z@(%p{W5_YU_>ljQ>%y;0LJk8vA}z{GMiulFqZ)DyH^Ii)lu|#- z6%Ga=-5rddae4*Z16w=_uEG}{0k6@awcrz|p9g;a&;k=XT6)pO_Waq~Uq9Ykx8dpJ zezjBFuX@{eZ1Gd{vvi2*b%6)GkVg=WQMZe>1r7_VgJGZJWAxw%tcj{M^q;V^|10*# zquB2WEN9i1+@E((Xu0flN4L$u)#1+{o>;@>R2j9^LkSPAQjT{16M`O4EV}CwG5@w>xg?;R^S&1-)$V z0XEyo5za(W@p;V|4PRuA7n!FTKEM>4G;ERi4yGv1{18*D&OEMO&|t+RUsM$@s!EiW zU1H+-4GS`?XoqY;i4|Ah!Q{o74>9>_4a_94f;D{px_JINF291$sW`4!P|$dWDWTJ! zZ8+a@rsYzYZ6A)8ySWk%t3Svd^s{*ZP8&$%6`VGoG_!`s*)IBonadmGwS%m7a2Dv6 zla`78sXZqwTwW`$ZDqBsso4`xTvBs+wY;{L)z&W42rjRV*VeJxIz&Uo9E@UUf|&KD&u*TthtvR1Y#wZ=jF9tR_mQDEIA!L8D-b* z{w(}6%emGtp;YyCM`se~YpX2Q^7*=nV z-NCS|&9e9N^%IS!dr#mCvYq&5 z=5h^ZY30|p@@1RjWt+LOjuTJK8cZ)XTx_|}G8Mj7cWw8z=YH08Z7aL`8MbR5`%E|6 zy`SwJX7{^UcaYr?irW~jEXvbB=nE)`VDdY22S6~>snb|D^Si`&R8xr zOjtO5Ew8I(b+zy7jpwarte5blt*p)>^3}%mwNr-aZgyWEtFPts{k*Q9)%DLBn@>EE zFq%$0v7pM&Hzvy66PXE9`NirB)t8QNrdHn69yhg5s1wFY-dG}{=IN4ao3917rhs}Q=7Q*EqwWo zc=-;9h_RYCu8$knPb*mCdd|3&FWDL|*_tp}c~gDdRDX3XYpUl=n|Q+}*05>TY@K*w zW}W4d{qMUc@r14M<&i5RlaD7V8m_8-p!u%mt%}Ww#@0)kf2y=DsIi*1-x1ha+vV^~ zx%J}6g^{T(TzNB3Hpj{4g)FSR=7U^}v?T0%CUc|-?Ob^iPd2e+Q^MBF+a8PC9-A)Z zY`Z5j6LoET-R5}RX0EP-uj+sx)z(dB&6sVM4*f&vWP8H?=xa5v)Jz*Wdk1gd6}Ru= z?7R8u-F(%)c-20xs%Nrerpo$q%jK4-!K>5{Jm2+P({UR+uLa{9pW&*yCOd?Rs!M&} zcTKOIe(v>(?|I|aCocDa36>4e$!x73iwDgP23Yzz_PHbMo@l%!#?^e9wS5Lk`Y~At zs_crbsIrY)Wr^}yzPu@3-jp<)1;bTXzTtG|$Ux$cj;*6uUq~!ChSxEyj+yyrdA_ByEbNtG z1*U}qGAzF&)pbX&j>XHjaV6VX{f=uoHm{S@c0yZUtv_u&Y5j5yuQjq-k}t!oqU zw#9_5c5zl`fw6yaArCWb#upO?;5FFMG_nIOtM@M^ti3SyfFx{9Kbp~SH=!fH)^Qi+ z7#>H2O^DmL0m(P6U!1M#KnbVK)y0N!)V+(S0_fNmqV7bLGGh+z9r;)Q;_PqV6ZnkGX_o-fXPaoUfA1`-s z(3SO0)=9JC9u?OP{s|TV#=FP^ro|H#OD=s&YrMOV^yy;KsrS0}-$OKa4|I zH{yLd)$1lX!q>ISoF(!fR^WYwsvlOa0sIXm-ltc+p(;f98oY0f>WxPf2rp}NG|7Hc zg!hrE9~J8nUXS;cseaT@4ET?;@V;`@kF!M_Rx_)9Tv-J4pI~@jwdyDM286fZeKo3| zv{nNC(=xozuKMZPCcv+i)H^hapJn5Hjl|D#+E>BF&q=(mnfSS}7~#OyO8mUl0Qj4Q zc;80i&7u;7*W>*-@n!>o@JH?arHWtF;Qb2X7qxo8f2qOy)x<9|4G6Em`!&QbE2{zj zl@9OEB7T+M0Qj#n@cvxl*P4xhvjpD1nqV^!9#`Z21w@=^uY!xWKuj_5Rs&jy%f+G&2O5^0RL?X-d{`n)=&vJ zufY5330~QR@K(IPNyE3b0iM{J=iQi1S{b~zaMjuv+)h!adYK5zMQ9dbwFqlOXcu844zjK{i?CIM8xedL7a^!#s(2S12oOR5FiV8sIlxzo zut0>xA~cAwRMx*%{jO1k)`4KPDc-e+&?>@O5kee*-Xy{eB5ad6aP_+z)xh{ZK^s@tDLo{4z$Q-Yn4EmZImOt z8Tn+kMd!>{-T=R?(%b;Q0iG`h`i&wSe0ZZo<+Q4A7;)!X?G2L};bt|^%^5*>Lxcr4 zB7jcyLG<$hLAU&~FI)0=V9tO$`2ea){`@P5-sOmA_RAQwcEG&=?h@~ literal 0 HcmV?d00001 diff --git a/__pycache__/server.cpython-312.pyc b/__pycache__/server.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5cf328b10376077b9a66fbdf6dd680bc2d4e1e2 GIT binary patch literal 3452 zcmbtWZ%iA>6`%30jqUXxj0pw|iE}w}m=jRwpIokKQUale$UzQhb+z1DTkinN?b_Dt zk_3xLkRSz4Cvo`{PM7HJs*{=sPU?s1lzy%5+kJ6lu0?x9ZPiw46(Tt$CpGt}Z`NMh zv4Zm9M%tM-^M3Emd$aGonJ>LwH-h$yu21AI>Ja(|n^+CzX4`+;hR{tUAqgueRvd8* zOE!Y$cne$^CGHeVB>n)d-Q*(4p>Qf6<1y1$r`T0T%wf__rA~FmoT@A4GUqPEtqL*0 zq}_@~^~Ss=Ehs+KAM;~mLxh(+Ti#7vv>{ec>gSGC?6D}x2S`EkZ`E5!paYv+k&h8b zby(QSNE}kb43>gs57buUroBqyl?u|Lq(f!>O)F@#v>a;${m(;(%IM}Q^q~XLEf)F! zeyhco!WC_08=4M~YAZ{%SEOR01JH-d=p&?g+9pL-`0@@d2gn>P%e+*zL+b*L9{UeF zp|PxeUZ3uM&8WZd%c(cU$417_jSY?ujg0*)`{Tja<3HC3)yzIVkjV^*x)?v7PKrwW z0!`0QQPtv8L<5YN)Vs$AC*nHHBtRMdsdJN)LjzL-i3=0whDY9xVxw+Y)bxQ1BS88F z$0sJ=lqE?amqbdUHX|^pQzEKzYG#6HnRH4chG$q-h&2NtVNw@$ImuSFyVz(HQ=+nv zC5dTRNF>uKouqVt1?hY$k)+}!h0p|;c2Q1}Y#8jFBS}5-CfGn|Wc(udMMN!9w8yT? zxHxu!%^Pwii(oHlt@!~Mowu)pg2uZB7qUI zsAv{KCHibyDo~sk2$t)N=O{aC*2D0qa0?_>%{cj2VQwVK%@j>oHxS&9DP)VO=Zh!q6Gs8KNi3IC=!uBJgPq&{;O-nmhACG{;Au(so$Rx_VVn zR+x`jUzs$w_-^)kEwwK&m%$Q{3pzat=Df_z4O-?taUf6ghCeTK=J?K%y`ElC*Tv+l z3elrIVZ5#&pj>*<@$a*@%q zQ5&rX^#-rWSrT;_wzOvO8Bw1#9OSaB>6+nG#dio49L=axU;!wDpG(Ur!y#*|#u*MV zlOZX|sDq3!weW^e+$&)!vNM!s;Az7#ZLVl6V+X}6&>bLnpl ztxKc3f_MGI+KKhkYo|9}-4>4TI^FBSn(+SUwllKh@hzR(_4?OiYq1UC&)%0;@ope^ zGjt<#v+YLP#yfZV@`3JEN41LMUkOKdJc0G|Yv*^GTXw_kAD#H%#7Ad7ICJMm`EdX5 zr|ySO-aD3SJ$)~kYd-tf&b9i!L7dlLs6)Pvf)9BDOXr_z9%=G zXiwk(+PB-kmv8USotfAcCUgAcQ!`$#HZ|jDn6f9K!D$PS4J36@rsru<^h@NDcYLt~UBVci9={cbl4P_O~2wF@~2QLaUZK#WJ>Pr8OvMOcxYmUcs z)8HTt4ZE72Pw9qFl%zz(0E2@RGp|weo-(VynPuRKPR%$l&!Xn2zIC2b)6%>`PSau7 zm&K9x7W4%UWBfI8eU01&2YM;GWV`NOcISll`{?ja{eMM2d4OJh zfI1(b-oHD%xyHV2NBDKM(gW-*WCF)Z=8 h>*L^m5ey8|YAzH!7ABO(fyaQE!i?KcB+X~V{{c?W7ncA4 literal 0 HcmV?d00001 diff --git a/analyze.py b/analyze.py index 9c90006..bd5637f 100644 --- a/analyze.py +++ b/analyze.py @@ -2,91 +2,165 @@ import os import re import unicodedata from pypdf import PdfReader -try: - import pytesseract - from pdf2image import convert_from_path - from PIL import Image - TESSERACT_PATH = r'C:\Users\User\AppData\Local\Programs\Tesseract-OCR\tesseract.exe' - POPPLER_PATH = r'D:\이태훈\00크롬다운로드\poppler-25.12.0\Library\bin' - pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH - OCR_AVAILABLE = True -except ImportError: - OCR_AVAILABLE = False +import pytesseract +from pdf2image import convert_from_path -def analyze_file_content(filename: str): - file_path = os.path.join("sample", filename) - if not os.path.exists(file_path): - return {"error": "File not found"} - - log_steps = [] - - # Layer 1: 제목 분석 (Quick) - log_steps.append("1. 레이어: 파일 제목(Title) 스캔 중...") - title_text = filename.lower().replace(" ", "") - - # Layer 2: 텍스트 추출 (Fast) - log_steps.append("2. 레이어: PDF 텍스트 엔진(Extraction) 가동...") - text_content = "" - try: - if filename.lower().endswith(".pdf"): - reader = PdfReader(file_path) - for page in reader.pages[:5]: # 전체가 아닌 핵심 페이지 위주 - page_txt = page.extract_text() - if page_txt: text_content += page_txt + "\n" - text_content = unicodedata.normalize('NFC', text_content) - log_steps.append(f" - 텍스트 데이터 확보 완료 ({len(text_content)}자)") - except: - log_steps.append(" - 텍스트 추출 실패") +# 1. 시스템 설정 +TESSERACT_EXE = r'C:\Users\User\AppData\Local\Programs\Tesseract-OCR\tesseract.exe' +TESSDATA_DIR = r'C:\Users\User\AppData\Local\Programs\Tesseract-OCR\tessdata' +POPPLER_BIN = r'D:\이태훈\00크롬다운로드\poppler-25.12.0\Library\bin' - # Layer 3: OCR 정밀 분석 (Deep) - log_steps.append("3. 레이어: OCR 이미지 스캔(Vision) 강제 실행...") - ocr_content = "" - if OCR_AVAILABLE and os.path.exists(TESSERACT_PATH): - try: - # 상징적인 첫 페이지 위주 OCR (성능과 정확도 타협) - images = convert_from_path(file_path, first_page=1, last_page=2, poppler_path=POPPLER_PATH) - for i, img in enumerate(images): - page_ocr = pytesseract.image_to_string(img, lang='kor+eng') - ocr_content += unicodedata.normalize('NFC', page_ocr) + "\n" - log_steps.append(f" - OCR 스캔 완료 ({len(ocr_content)}자)") - except Exception as e: - log_steps.append(f" - OCR 오류: {str(e)[:20]}") +pytesseract.pytesseract.tesseract_cmd = TESSERACT_EXE +os.environ["TESSDATA_PREFIX"] = TESSDATA_DIR +OCR_AVAILABLE = os.path.exists(TESSERACT_EXE) + +SYSTEM_HIERARCHY = { + "행정": { + "계약": ["계약관리", "기성관리", "업무지시서", "인원관리"], + "업무관리": ["업무일지(2025)", "업무일지(2025년 이전)", "발주처 정기보고", "본사업무보고", "공사감독일지", "양식서류"] + }, + "설계성과품": { + "시방서": ["공사시방서", "장비 반입허가 검토서"], + "설계도면": ["공통", "토공", "비탈면안전공", "배수공", "교량공", "포장공", "교통안전시설공", "부대공", "용지공 & 기타공"], + "수량산출서": ["토공", "비탈면안전공", "배수공", "교량공", "포장공", "교통안전시설공", "부대공", "용지공 & 기타공"], + "내역서": ["단가산출서"], + "보고서": ["실시설계보고서", "지반조사보고서", "구조계산서", "수리 및 전기계산서", "기타보고서", "기술자문 및 심의"], + "측량계산부": ["측량계산부"], + "설계단계 수행협의": ["회의·협의"] + }, + "시공성과품": { + "설계도면": ["공통", "토공", "비탈면안전공", "배수공", "교량공", "포장공", "교통안전시설공", "부대공", "용지공 & 기타공"] + }, + "시공검측": { + "토공": ["검측 (깨기)", "검측 (연약지반)", "검측 (발파)", "검측 (노체)", "검측 (노상)", "검측 (토취장)"], + "배수공": ["검측 (V형측구)", "검측 (산마루측구)", "검측 (U형측구)", "검측 (U형측구)(안)", "검측 (L형측구, J형측구)", "검측 (도수로)", "검측 (도수로)(안)", "검측 (횡배수관)", "검측 (종배수관)", "검측 (맹암거)", "검측 (통로암거)", "검측 (수로암거)", "검측 (호안공)", "검측 (옹벽공)", "검측 (용수개거)"], + "구조물공": ["검측 (평목교-거더, 부대공)", "검측 (평목교)(안)", "검측 (개착터널, 생태통로)"], + "포장공": ["검측 (기층, 보조기층)"], + "부대공": ["검측 (환경)", "검측 (지장가옥,건물 철거)", "검측 (방음벽 등)"], + "비탈면안전공": ["검측 (식생보호공)", "검측 (구조물보호공)"], + "교통안전시설공": ["검측 (낙석방지책)"], + "검측 양식서류": ["검측 양식서류"] + }, + "설계변경": { + "실정보고(어천~공주)": ["토공", "배수공", "교량공(평목교)", "구조물공", "포장공", "교통안전공", "부대공", "전기공사", "미확정공", "안전관리", "환경관리", "품질관리", "자재관리", "지장물", "기타"], + "실정보고(대술~정안)": ["토공", "배수공", "비탈면안전공", "포장공", "부대공", "안전관리", "환경관리", "자재관리", "기타"], + "기술지원 검토": ["토공", "배수공", "교량공(평목교)", "구조물&부대공", "기타"], + "시공계획(어천~공주)": ["토공", "배수공", "교량공(평목교)", "구조물&부대&포장&교통안전공", "환경 및 품질관리"] + }, + "공사관리": { + "공정·일정": ["공정표", "월간 공정보고", "작업일보"], + "품질 관리": ["품질시험계획서", "품질시험 실적보고", "콘크리트 타설현황[어천~공주(4차)]", "품질관리비 사용내역", "균열관리", "품질관리 양식서류"], + "안전 관리": ["안전관리계획서", "안전관리 실적보고", "위험성 평가", "사전작업허가서", "안전관리비 사용내역", "안전관리수준평가", "안전관리 양식서류"], + "환경 관리": ["환경영향평가", "사전재해영향성검토", "유지관리 및 보수점검", "환경보전비 사용내역", "건설폐기물 관리"], + "자재 관리 (관급)": ["자재구매요청 (레미콘, 철근)", "자재구매요청 (그 외)", "납품기한", "계약 변경", "자재 반입·수불 관리", "자재관리 양식서류"], + "자재 관리 (사급)": ["자재공급원 승인", "자재 반입·수불 관리", "자재 검수·확인"], + "점검 (정리중)": ["내부점검", "외부점검"], + "공문": ["접수(수신)", "발송(발신)", "하도급", "인력", "방침"] + }, + "민원관리": { + "민원(어천~공주)": ["처리대장", "보상", "공사일반", "환경분쟁"], + "실정보고(어천~공주)": ["민원"], + "실정보고(대술~정안)": ["민원"] + } +} + +def analyze_flow_reasoning(filename, all_text_list): + """ + 본문의 전수 조사 결과에 파일명의 '의도 가중치'를 더해 최종 추론 + """ + full_text = " ".join(all_text_list) + clean_ctx = full_text.replace(" ", "").replace("\n", "").lower() + fn_clean = filename.replace(" ", "").lower() - # 3중 레이어 데이터 통합 - full_pool = (title_text + " | " + text_content + " | " + ocr_content).lower().replace(" ", "").replace("\n", "") - - # 분석 초기화 - result = { - "suggested_path": "분석실패", - "confidence": "Low", - "log_steps": log_steps, - "raw_text": f"--- TITLE ---\n{filename}\n\n--- TEXT ---\n{text_content[:1000]}\n\n--- OCR ---\n{ocr_content[:1000]}", - "reason": "학습된 키워드 일치 항목 없음" + # 1. 도메인별 기본 점수 (본문 전수 조사 - 평등하게) + scores = { + "official": sum(clean_ctx.count(k) for k in ["수신:", "발신:", "경유:", "시행일자", "귀하", "드립니다", "바랍니다"]), + "contract": sum(clean_ctx.count(k) for k in ["계약서", "하도급", "외주", "도급", "인감", "사업자"]), + "hr": sum(clean_ctx.count(k) for k in ["이탈계", "인력", "기술자", "안전관리자", "재직증명", "배치"]), + "change": sum(clean_ctx.count(k) for k in ["실정보고", "설계변경", "변경보고", "추가반영"]), + "technical": sum(clean_ctx.count(k) for k in ["일위대가", "산출근거", "집계표", "물량산출", "단가", "내역", "도면", "dwg"]) } - # 최종 추천 로직 (합의 알고리즘) - is_eocheon = any(k in full_pool for k in ["어천", "공주"]) + # 2. 파일명에 대한 '방향타' 가중치 부여 (Final Push) + # 본문 데이터가 아무리 많아도 파일명의 의도를 존중하기 위해 7배 가중치 + if "실정" in fn_clean or "변경" in fn_clean: scores["change"] += 50 # 본문 50회 언급과 맞먹는 가중치 + if "계약" in fn_clean or "하도급" in fn_clean: scores["contract"] += 50 + if "인력" in fn_clean or "이탈" in fn_clean: scores["hr"] += 50 + if "단가" in fn_clean or "수량" in fn_clean or "도면" in fn_clean: scores["technical"] += 50 + if "제출" in fn_clean or "건" in fn_clean: scores["official"] += 30 + + # 3. 종합 농도에 따른 최종 도메인 선정 + dominant_domain = max(scores, key=scores.get) - if "실정보고" in full_pool or "실정" in full_pool: - if is_eocheon: - if "품질" in full_pool: - result["suggested_path"] = "설계변경 > 실정보고(어천~공주) > 품질관리" - result["reason"] = "3중 레이어 분석: 실정보고+어천공주+품질관리 키워드 통합 검출" - elif any(k in full_pool for k in ["토지", "임대"]): - result["suggested_path"] = "설계변경 > 실정보고(어천~공주) > 기타" - result["reason"] = "3중 레이어 분석: 토지임대 관련 실정보고(어천-공주) 확인" - else: - result["suggested_path"] = "설계변경 > 실정보고(어천~공주) > 기타" - result["reason"] = "3중 레이어 분석: 실정보고(어천-공주) 문서 판정" - result["confidence"] = "100%" - else: - result["suggested_path"] = "설계변경 > 실정보고(어천~공주) > 기타" # 폴백 - result["confidence"] = "80%" - result["reason"] = "실정보고 키워드는 발견되었으나 프로젝트명 교차 검증 실패 (기본값 제안)" + # 프로젝트 식별 (Fuzzy 매칭 및 교차 검증) + project_loc = "어천~공주" if any(k in clean_ctx or k in fn_clean for k in ["어천", "공주"]) else "대술~정안" if any(k in clean_ctx or k in fn_clean for k in ["대술", "정안"]) else "공통" - elif "품질" in full_pool: - result["suggested_path"] = "공사관리 > 품질 관리 > 품질시험계획서" - result["confidence"] = "90%" - result["reason"] = "텍스트/OCR 레이어에서 품질 관리 지표 다수 식별" + # --- [통합 추론 및 매칭] --- - return result + # 시나리오 A: 실정보고/설계변경 (본문 데이터 + 파일명 의도 합성) + if dominant_domain == "change" or (scores["change"] > 0 and scores["technical"] > 5): + cat = f"실정보고({project_loc})" + sub = "지장물" if any(k in clean_ctx for k in ["임대료", "토지", "보상"]) else "구조물공" if "구조물" in clean_ctx else "기타" + return f"설계변경 > {cat} > {sub}", f"본문의 기술 데이터 밀도와 파일명의 '{dominant_domain}' 관련 의도를 종합하여 {project_loc} 프로젝트의 실정보고 본체로 판정." + + # 시나리오 B: 행정 계약/하도급 (본체 중심) + if dominant_domain == "contract": + return "행정 > 계약 > 계약관리", "문서 전체에서 계약 및 하도급 업무 본질이 지배적으로 확인됨." + + # 시나리오 C: 인사/인력 관리 + if dominant_domain == "hr": + if len(all_text_list) <= 2: return "공사관리 > 공문 > 인력", "인력 사항을 간략히 보고하는 공문 형식임." + return "행정 > 계약 > 인원관리", "다량의 인력 증빙 데이터가 포함된 행정 서류임." + + # 시나리오 D: 순수 공문 (형식 우선) + if dominant_domain == "official" or scores["official"] > scores["technical"]: + tab, cat = "공사관리", "공문" + sub = "접수(수신)" + if "방침" in clean_ctx or "지침" in clean_ctx: sub = "방침" + elif "발신" in clean_ctx[:500]: sub = "발송(발신)" + return f"{tab} > {cat} > {sub}", "전체 맥락상 기술적 데이터보다 행정적 전달 행위(공문)가 핵심 정체성으로 판단됨." + + # 시나리오 E: 기술 성과품 + if dominant_domain == "technical": + if any(k in clean_ctx or k in fn_clean for k in ["단가", "내역"]): return "설계성과품 > 내역서 > 단가산출서", "내역/단가 산출 기술 데이터 확인." + if any(k in clean_ctx or k in fn_clean for k in ["도면", "dwg"]): return "설계성과품 > 설계도면 > 공통", "도면/그래픽 데이터 확인." + return "설계성과품 > 수량산출서 > 토공", "수량/물량 산출 데이터 확인." + + return "행정 > 업무관리 > 양식서류", "일반 행정 및 기타 양식 서류로 분류함." + +def analyze_file_content(filename: str): + try: + file_path = os.path.join("sample", filename) + text_by_pages = [] + if filename.lower().endswith(".pdf"): + reader = PdfReader(file_path) + for i in range(len(reader.pages)): + page_text = reader.pages[i].extract_text() or "" + if OCR_AVAILABLE: + try: + images = convert_from_path(file_path, first_page=i+1, last_page=i+1, poppler_path=POPPLER_BIN, dpi=200) + if images: + ocr_result = pytesseract.image_to_string(images[0], lang='kor+eng') + page_text += "\n" + ocr_result + except: pass + text_by_pages.append(page_text) + elif filename.lower().endswith(('.xlsx', '.xls')): + import pandas as pd + df = pd.read_excel(file_path) + text_by_pages.append(df.to_string()) + else: text_by_pages.append("") + + path, reason = analyze_flow_reasoning(filename, text_by_pages) + + return { + "filename": filename, + "total_pages": len(text_by_pages), + "final_result": { + "suggested_path": path, + "confidence": "100%", + "reason": reason, + "snippet": " ".join(text_by_pages)[:1500] + } + } + except Exception as e: + return {"error": str(e), "filename": filename} diff --git a/crawler_api.py b/crawler_api.py deleted file mode 100644 index 82471a1..0000000 --- a/crawler_api.py +++ /dev/null @@ -1,235 +0,0 @@ -import os -import re -import asyncio -import json -import traceback -from fastapi import FastAPI -from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import StreamingResponse, FileResponse -from fastapi.staticfiles import StaticFiles -from playwright.async_api import async_playwright -from dotenv import load_dotenv -from analyze import analyze_file_content - -load_dotenv() - -app = FastAPI() - -# Mount static files (css, images etc) -app.mount("/style", StaticFiles(directory="style"), name="style") - -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=False, - allow_methods=["*"], - allow_headers=["*"], -) - -@app.get("/dashboard") -async def get_dashboard(): - return FileResponse("dashboard.html") - -@app.get("/mailTest") -async def get_mail_test(): - return FileResponse("mailTest.html") - -@app.get("/attachments") -async def get_attachments(): - sample_path = "sample" - if not os.path.exists(sample_path): - os.makedirs(sample_path) - files = [] - for f in os.listdir(sample_path): - f_path = os.path.join(sample_path, f) - if os.path.isfile(f_path): - files.append({ - "name": f, - "size": f"{os.path.getsize(f_path) / 1024:.1f} KB" - }) - return files - -@app.get("/analyze-file") -async def analyze_file(filename: str): - return analyze_file_content(filename) - -@app.get("/") -async def root(): - return FileResponse("index.html") - -@app.get("/sync") -async def sync_data(): - async def event_generator(): - user_id = os.getenv("PM_USER_ID") - password = os.getenv("PM_PASSWORD") - - if not user_id or not password: - yield f"data: {json.dumps({'type': 'log', 'message': '오류: .env 파일에 계정 정보가 없습니다.'})}\n\n" - return - - results = [] - - async with async_playwright() as p: - yield f"data: {json.dumps({'type': 'log', 'message': '브라우저 실행 중...'})}\n\n" - browser = await p.chromium.launch(headless=True, args=[ - "--no-sandbox", - "--disable-dev-shm-usage", - "--disable-blink-features=AutomationControlled" - ]) - context = await browser.new_context( - viewport={'width': 1920, 'height': 1080}, - user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" - ) - page = await context.new_page() - - try: - yield f"data: {json.dumps({'type': 'log', 'message': '사이트 접속 및 로그인 중...'})}\n\n" - await page.goto("https://overseas.projectmastercloud.com/", wait_until="domcontentloaded") - - await page.click("#login-by-id", timeout=10000) - await page.fill("#user_id", user_id) - await page.fill("#user_pw", password) - await page.click("#login-btn") - - yield f"data: {json.dumps({'type': 'log', 'message': '대시보드 목록 대기 중...'})}\n\n" - await page.wait_for_selector("h4.list__contents_aria_group_body_list_item_label", timeout=60000) - - locators = page.locator("h4.list__contents_aria_group_body_list_item_label") - count = await locators.count() - yield f"data: {json.dumps({'type': 'log', 'message': f'총 {count}개의 프로젝트 발견. 수집 시작.'})}\n\n" - - for i in range(count): - try: - proj = page.locator("h4.list__contents_aria_group_body_list_item_label").nth(i) - project_name = (await proj.inner_text()).strip() - - yield f"data: {json.dumps({'type': 'log', 'message': f'[{i+1}/{count}] {project_name} - 시작'})}\n\n" - await proj.scroll_into_view_if_needed() - await proj.click(force=True) - - # 프로젝트 로딩 대기 (Gitea 방식: 물리적 대기) - await asyncio.sleep(5) - await page.wait_for_selector("div.footer", state="visible", timeout=20000) - - recent_log = "기존데이터유지" - file_count = 0 - - # 1단계: 활동로그 수집 (Gitea 방식 복구 + 정밀 셀렉터) - try: - log_btn_sel = "body > div.footer > div.left > div.wrap.log-wrap > div.title.text" - log_btn = page.locator(log_btn_sel).first - if await log_btn.is_visible(timeout=5000): - yield f"data: {json.dumps({'type': 'log', 'message': ' - [로그] 창 열기 시도...'})}\n\n" - await log_btn.click(force=True) - await asyncio.sleep(5) # 로딩 충분히 대기 - - modal_sel = "article.archive-modal" - if await page.locator(modal_sel).is_visible(): - yield f"data: {json.dumps({'type': 'log', 'message': ' - [로그] 모달 발견. 데이터 로딩 대기...'})}\n\n" - # .log-body 내부의 데이터만 타겟팅하도록 수정 - date_sel = "article.archive-modal .log-body .date .text" - user_sel = "article.archive-modal .log-body .user .text" - act_sel = "article.archive-modal .log-body .activity .text" - - # 데이터가 나타날 때까지 최대 15초 대기 - success_log = False - for _ in range(15): - if await page.locator(date_sel).count() > 0: - raw_date = (await page.locator(date_sel).first.inner_text()).strip() - if raw_date: - success_log = True - break - await asyncio.sleep(1) - - if success_log: - user_name = (await page.locator(user_sel).first.inner_text()).strip() - activity = (await page.locator(act_sel).first.inner_text()).strip() - formatted_date = re.sub(r'[-/]', '.', raw_date)[:10] - recent_log = f"{formatted_date}, {user_name}, {activity}" - yield f"data: {json.dumps({'type': 'log', 'message': f' - [로그] 성공: {recent_log[:30]}...'})}\n\n" - else: - yield f"data: {json.dumps({'type': 'log', 'message': ' - [로그] 데이터 추출 실패'})}\n\n" - - await page.click("article.archive-modal div.close", timeout=3000) - await asyncio.sleep(1.5) - except Exception as e: - yield f"data: {json.dumps({'type': 'log', 'message': f' - [로그] 오류: {str(e)[:20]}'})}\n\n" - - # 2단계: 구성(파일 수) 수집 (Gitea 순회 방식 복구 + 대기 시간 대폭 연장) - try: - sitemap_btn_sel = "body > div.footer > div.left > div.wrap.site-map-wrap" - sitemap_btn = page.locator(sitemap_btn_sel).first - if await sitemap_btn.is_visible(timeout=5000): - yield f"data: {json.dumps({'type': 'log', 'message': ' - [구성] 진입 시도...'})}\n\n" - await sitemap_btn.click(force=True) - - # Gitea 방식: context.pages 직접 뒤져서 팝업 찾기 - popup_page = None - for _ in range(30): # 최대 15초 대기 - for p_item in context.pages: - try: - if "composition" in p_item.url: - popup_page = p_item - break - except: pass - if popup_page: break - await asyncio.sleep(0.5) - - if popup_page: - yield f"data: {json.dumps({'type': 'log', 'message': ' - [구성] 창 발견. 데이터 로딩 대기 (최대 30초)...'})}\n\n" - # 사용자 제공 정밀 선택자 적용 (nth-child(3)가 실제 데이터) - target_selector = "#composition-list h6:nth-child(3)" - success_comp = False - - # 최대 30초간 데이터가 나타날 때까지 대기 - for _ in range(30): - h6_count = await popup_page.locator(target_selector).count() - if h6_count > 0: - success_comp = True - break - await asyncio.sleep(1) - - if success_comp: - yield f"data: {json.dumps({'type': 'log', 'message': ' - [구성] 데이터 감지됨. 최종 렌더링 대기...'})}\n\n" - await asyncio.sleep(10) # 렌더링 안정화를 위한 대기 - - # 모든 h6:nth-child(3) 요소를 순회하며 숫자 합산 - locators_h6 = popup_page.locator(target_selector) - h6_count = await locators_h6.count() - current_total = 0 - for j in range(h6_count): - text = (await locators_h6.nth(j).inner_text()).strip() - # 텍스트 내에서 숫자만 추출 (여러 줄일 경우 마지막 줄 기준) - nums = re.findall(r'\d+', text.split('\n')[-1]) - if nums: - current_total += int(nums[0]) - - file_count = current_total - yield f"data: {json.dumps({'type': 'log', 'message': f' - [구성] 성공 ({file_count}개)'})}\n\n" - else: - yield f"data: {json.dumps({'type': 'log', 'message': ' - [구성] 로딩 타임아웃'})}\n\n" - - await popup_page.close() - else: - yield f"data: {json.dumps({'type': 'log', 'message': ' - [구성] 팝업창 발견 실패'})}\n\n" - except Exception as e: - yield f"data: {json.dumps({'type': 'log', 'message': f' - [구성] 오류: {str(e)[:20]}'})}\n\n" - - results.append({"projectName": project_name, "recentLog": recent_log, "fileCount": file_count}) - - # 홈 복귀 - await page.locator("div.header div.title div").first.click(force=True) - await page.wait_for_selector("h4.list__contents_aria_group_body_list_item_label", timeout=20000) - await asyncio.sleep(2) - - except Exception: - await page.goto("https://overseas.projectmastercloud.com/dashboard", wait_until="domcontentloaded") - - yield f"data: {json.dumps({'type': 'done', 'data': results})}\n\n" - - except Exception as e: - yield f"data: {json.dumps({'type': 'log', 'message': f'치명적 오류: {str(e)}'})}\n\n" - finally: - await browser.close() - - return StreamingResponse(event_generator(), media_type="text_event-stream") diff --git a/crawler_service.py b/crawler_service.py new file mode 100644 index 0000000..f20ce38 --- /dev/null +++ b/crawler_service.py @@ -0,0 +1,137 @@ +import os +import re +import asyncio +import json +from playwright.async_api import async_playwright +from dotenv import load_dotenv + +load_dotenv() + +async def run_crawler_service(): + """ + Playwright를 이용해 데이터를 수집하고 SSE(Server-Sent Events)용 제너레이터를 반환합니다. + """ + user_id = os.getenv("PM_USER_ID") + password = os.getenv("PM_PASSWORD") + + if not user_id or not password: + yield f"data: {json.dumps({'type': 'log', 'message': '오류: .env 파일에 계정 정보가 없습니다.'})}\n\n" + return + + results = [] + + async with async_playwright() as p: + yield f"data: {json.dumps({'type': 'log', 'message': '브라우저 실행 중...'})}\n\n" + browser = await p.chromium.launch(headless=True, args=[ + "--no-sandbox", + "--disable-dev-shm-usage", + "--disable-blink-features=AutomationControlled" + ]) + context = await browser.new_context( + viewport={'width': 1920, 'height': 1080}, + user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" + ) + page = await context.new_page() + + try: + yield f"data: {json.dumps({'type': 'log', 'message': '사이트 접속 및 로그인 중...'})}\n\n" + await page.goto("https://overseas.projectmastercloud.com/", wait_until="domcontentloaded") + + await page.click("#login-by-id", timeout=10000) + await page.fill("#user_id", user_id) + await page.fill("#user_pw", password) + await page.click("#login-btn") + + yield f"data: {json.dumps({'type': 'log', 'message': '대시보드 목록 대기 중...'})}\n\n" + await page.wait_for_selector("h4.list__contents_aria_group_body_list_item_label", timeout=60000) + + locators = page.locator("h4.list__contents_aria_group_body_list_item_label") + count = await locators.count() + yield f"data: {json.dumps({'type': 'log', 'message': f'총 {count}개의 프로젝트 발견. 수집 시작.'})}\n\n" + + for i in range(count): + try: + proj = page.locator("h4.list__contents_aria_group_body_list_item_label").nth(i) + project_name = (await proj.inner_text()).strip() + + yield f"data: {json.dumps({'type': 'log', 'message': f'[{i+1}/{count}] {project_name} - 시작'})}\n\n" + await proj.scroll_into_view_if_needed() + await proj.click(force=True) + + await asyncio.sleep(5) + await page.wait_for_selector("div.footer", state="visible", timeout=20000) + + recent_log = "기존데이터유지" + file_count = 0 + + # 로그 수집 + try: + log_btn_sel = "body > div.footer > div.left > div.wrap.log-wrap > div.title.text" + log_btn = page.locator(log_btn_sel).first + if await log_btn.is_visible(timeout=5000): + await log_btn.click(force=True) + await asyncio.sleep(5) + + date_sel = "article.archive-modal .log-body .date .text" + user_sel = "article.archive-modal .log-body .user .text" + act_sel = "article.archive-modal .log-body .activity .text" + + if await page.locator(date_sel).count() > 0: + raw_date = (await page.locator(date_sel).first.inner_text()).strip() + user_name = (await page.locator(user_sel).first.inner_text()).strip() + activity = (await page.locator(act_sel).first.inner_text()).strip() + formatted_date = re.sub(r'[-/]', '.', raw_date)[:10] + recent_log = f"{formatted_date}, {user_name}, {activity}" + yield f"data: {json.dumps({'type': 'log', 'message': f' - [로그] 수집 완료'})}\n\n" + + await page.click("article.archive-modal div.close", timeout=3000) + await asyncio.sleep(1.5) + except: pass + + # 구성 수집 + try: + sitemap_btn_sel = "body > div.footer > div.left > div.wrap.site-map-wrap" + sitemap_btn = page.locator(sitemap_btn_sel).first + if await sitemap_btn.is_visible(timeout=5000): + await sitemap_btn.click(force=True) + + popup_page = None + for _ in range(20): + for p_item in context.pages: + if "composition" in p_item.url: + popup_page = p_item + break + if popup_page: break + await asyncio.sleep(0.5) + + if popup_page: + target_selector = "#composition-list h6:nth-child(3)" + await asyncio.sleep(5) # 로딩 대기 + locators_h6 = popup_page.locator(target_selector) + h6_count = await locators_h6.count() + current_total = 0 + for j in range(h6_count): + text = (await locators_h6.nth(j).inner_text()).strip() + nums = re.findall(r'\d+', text.split('\n')[-1]) + if nums: current_total += int(nums[0]) + file_count = current_total + yield f"data: {json.dumps({'type': 'log', 'message': f' - [구성] {file_count}개 확인'})}\n\n" + await popup_page.close() + except: pass + + results.append({"projectName": project_name, "recentLog": recent_log, "fileCount": file_count}) + + # 홈 복귀 + await page.locator("div.header div.title div").first.click(force=True) + await page.wait_for_selector("h4.list__contents_aria_group_body_list_item_label", timeout=20000) + await asyncio.sleep(2) + + except Exception: + await page.goto("https://overseas.projectmastercloud.com/dashboard", wait_until="domcontentloaded") + + yield f"data: {json.dumps({'type': 'done', 'data': results})}\n\n" + + except Exception as e: + yield f"data: {json.dumps({'type': 'log', 'message': f'치명적 오류: {str(e)}'})}\n\n" + finally: + await browser.close() diff --git a/mailTest.html b/mailTest.html index 68d0f1f..2f66671 100644 --- a/mailTest.html +++ b/mailTest.html @@ -5,9 +5,118 @@ Project Mail Manager - + + + +

+