diff --git a/03.Code/geulbeot_업로드용/converters/pipeline/router.py b/03.Code/geulbeot_업로드용/converters/pipeline/router.py new file mode 100644 index 0000000..49215ec --- /dev/null +++ b/03.Code/geulbeot_업로드용/converters/pipeline/router.py @@ -0,0 +1,156 @@ +IyAtKi0gY29kaW5nOiB1dGYtOCAtKi0NCmZyb20gZG90ZW52IGltcG9ydCBsb2FkX2RvdGVudg0KbG9hZF9kb3RlbnYoKQ0KDQoiIiINCnJvdXRlci5weQ0KDQrquLDriqU6DQotIEhUTUwg7J6F66Cl7J2YIOu2hOufieydhCDtjJDri6jtlZjsl6wg7KCB7KCI7ZWcIO2MjOydtO2UhOudvOyduOycvOuhnCDrtoTquLANCi0g6ri0IOusuOyEnCAoNTAwMOyekCDsnbTsg4EpOiBSQUcg7YyM7J207ZSE65287J24IChzdGVwM+KGkjTihpI14oaSNuKGkjfihpI44oaSOSkNCi0g7Ken7J2AIOusuOyEnCAoNTAwMOyekCDrr7jrp4wpOiDsp4HsoJEg7IOd7ISxIChzdGVwN+KGkjjihpI5KQ0KIiIiDQoNCmltcG9ydCByZQ0KaW1wb3J0IG9zDQpmcm9tIHR5cGluZyBpbXBvcnQgRGljdCwgQW55DQoNCiMg67aE65+JIO2MkOuLqCDquLDspIANCkxPTkdfRE9DX1RIUkVTSE9MRCA9IDUwMDAgICMgNTAwMOyekCDsnbTsg4HsnbTrqbQg6ri0IOusuOyEnA0KDQojIOydtOuvuOyngCBhc3NldHMg6rK966GcICjqsJzrsJzsmqkg6rOg7KCVKSAtIHIgcHJlZml4IO2VhOyImCENCkFTU0VUU19CQVNFX1BBVEggPSBvcy5lbnZpcm9uLmdldCgiQVNTRVRTX0JBU0VfUEFUSCIsICIvdG1wL2Fzc2V0cyIpDQoNCg0KZGVmIGNvdW50X2NoYXJhY3RlcnMoaHRtbF9jb250ZW50OiBzdHI) -> int: + """HTML 태그를 제외한 순수 텍스트 글자 수 계산""" + # HTML 태그 제거 + text_only = re.sub(r'<[^>]+>', '', html_content) + # 공백 정리 + text_only = ' '.join(text_only.split()) + return len(text_only) + + +def is_long_document(html_content: str) -> bool: + """긴 문서 여부 판단""" + char_count = count_characters(html_content) + return char_count >= LONG_DOC_THRESHOLD + +def convert_image_paths(html_content: str) -> str: + """ + HTML 내 이미지 경로를 서버 경로로 변환 + - assets/xxx.png -> /assets/xxx.png (Flask 서빙용) + - 절대 경로인 URL은 그대로 유지 + """ + + def replace_src(match): + original_path = match.group(1) + + # 이미 절대 경로이거나 URL이면 그대로 + if original_path.startswith(('http://', 'https://', 'file://', 'D:', 'C:', '/')): + return match.group(0) + + # assets/로 시작하면 /assets/로 변환 (Flask 서빙) + if original_path.startswith('assets/'): + return f'src="/{original_path}"' + + return match.group(0) + + # src="..." 패턴 찾아서 변환 + result = re.sub(r'src="([^"]+)"', replace_src, html_content) + return result + +def run_short_pipeline(html_content: str, options: dict) -> Dict[str, Any]: + """ + 짧은 문서 파이프라인 (5000자 미만) + """ + try: + # 이미지 경로 변환 + processed_html = convert_image_paths(html_content) + + # TODO: step7, step8, step9 연동 + return { + 'success': True, + 'pipeline': 'short', + 'char_count': count_characters(html_content), + 'html': processed_html + } + except Exception as e: + return { + 'success': False, + 'error': str(e), + 'pipeline': 'short' + } + +def inject_template_css(html_content: str, template_css: str) -> str: + """ + HTML에 템플릿 CSS 주입 + - 태그 앞부분에 추가 + if '' in html_content: + return html_content.replace('', f'{css_block}', 1) + + # 태그 뒤에 새로 추가 + elif '' in html_content: + return html_content.replace('', f'\n', 1) + + # head도 없으면 맨 앞에 추가 + else: + return f'\n{html_content}' + + +def run_long_pipeline(html_content: str, options: dict) -> Dict[str, Any]: + """ + 긴 문서 파이프라인 (5000자 이상) + 이전 실적 스텝들을 활성화 + """ + try: + processed_html = convert_image_paths(html_content) + + folder_path = options.get('folder_path', '') + write_mode = options.get('write_mode', 'restructure') + + if not folder_path: + # 폴더 없으면 HTML만으로 처리 (기존 로직) + return { + 'success': True, + 'pipeline': 'long', + 'char_count': count_characters(html_content), + 'html': processed_html + } + + # ★ 파이프라인 실행 /api/generate-toc -> /api/generate-report-from-toc 에서 처리 + # 라우터는 여전히 HTML 통과 역할 유지 + return { + 'success': True, + 'pipeline': 'long', + 'char_count': count_characters(html_content), + 'html': processed_html, + 'needs_pipeline': True # ← 프론트엔드에서 분기 판단용 + } + + except Exception as e: + return {'success': False, 'error': str(e), 'pipeline': 'long'} + + +def process_document(content: str, options: dict = None) -> Dict[str, Any]: + """ + 메인 라우터 함수 + - 분량에 따라 적절한 파이프라인으로 분기 + + Args: + content: HTML 문자열 + options: 추가 옵션 (page_option, instruction 등) + + Returns: + {'success': bool, 'html': str, 'pipeline': str, ...} + """ + if options is None: + options = {} + + if not content or not content.strip(): + return { + 'success': False, + 'error': '내용이 비어있습니다.' + } + + char_count = count_characters(content) + + if is_long_document(content): + result = run_long_pipeline(content, options) + else: + result = run_short_pipeline(content, options) + + # 공통 정보 추가 + result['char_count'] = char_count + result['threshold'] = LONG_DOC_THRESHOLD + + # 템플릿 CSS 주입 + template_css = options.get('template_css') + if template_css and result.get('success') and result.get('html'): + result['html'] = inject_template_css(result['html'], template_css) + + return result