# -*- coding: utf-8 -*- from dotenv import load_dotenv load_dotenv() """ domain_api.py 硫由 API + ъ⑸ (app.py): from domain_api import register_domain_routes register_domain_routes(app) """ import os import json from pathlib import Path from flask import request, jsonify import threading import hashlib import psycopg2 from db import get_conn from db import get_conn # ===== 寃쎈 ㅼ ===== BASE_DIR = Path(__file__).parent DOMAIN_CONFIG_PATH = BASE_DIR / "domain_config.json" DOMAIN_DIR = BASE_DIR / "domain" # 寃쎈 (step3~9媛 ъ⑺ 寃쎈) PIPELINE_OUTPUT_ROOT = Path(os.getenv("PIPELINE_OUTPUT_ROOT", "/tmp/pipeline_output")) CONTEXT_DIR = PIPELINE_OUTPUT_ROOT / "context" pipeline_jobs = {} def register_domain_routes(app): """Flask 깆 硫 고 깅 @app.route('/api/domain-config', methods=['GET']) def get_domain_config(): """ 硫 ㅼ 諛 硫 щ 泥댄 for cat in config.get('categories', []): if cat.get('file'): fpath = DOMAIN_DIR / cat['file'] cat['file_exists'] = fpath.exists() cat['file_size'] = fpath.stat().st_size if fpath.exists() else 0 for child in cat.get('children', []): if child.get('file'): fpath = DOMAIN_DIR / child['file'] child['file_exists'] = fpath.exists() child['file_size'] = fpath.stat().st_size if fpath.exists() else 0 return jsonify(config) else: return jsonify({'error': 'domain_config.json not found', 'categories': []}), 404 except Exception as e: return jsonify({'error': str(e), 'categories': []}), 500 @app.route('/api/domain-combine', methods=['POST']) def combine_domains(): """ 硫㼼 ⑹ domain_prompt.txt濡 泥: { "selected": ["civil_general", "survey", "bim"] } : { "success": true, "combined_length": 3200, "selected_names": [...] } """ try: data = request.get_json() selected_ids = data.get('selected', []) if not selected_ids: return jsonify({ 'success': True, 'combined_length': 0, 'selected_names': [], 'message': ' - step3 遺 ' }) # config 濡 config = json.loads(DOMAIN_CONFIG_PATH.read_text(encoding='utf-8')) # ID + 留ㅽ domain_parts = [] guide_parts = [] selected_names = [] for cat in config.get('categories', []): is_guide = (cat['id'] == 'report_guide') target = guide_parts if is_guide else domain_parts if cat['id'] in selected_ids and cat.get('file'): fpath = DOMAIN_DIR / cat['file'] if fpath.exists(): content = fpath.read_text(encoding='utf-8', errors='ignore').strip() if content: target.append(f"[{cat['label']}]\n{content}") selected_names.append(cat['label']) for child in cat.get('children', []): if child['id'] in selected_ids and child.get('file'): fpath = DOMAIN_DIR / child['file'] if fpath.exists(): content = fpath.read_text(encoding='utf-8', errors='ignore').strip() if content: target.append(f"[{child['label']}]\n{content}") selected_names.append(child['label']) selected_names.append(child['label']) if not domain_parts and not guide_parts: return jsonify({ 'success': False, 'error': ' 硫 댁듬.' }) sep = "\n\n" + "=" * 50 + "\n\n" sections = [] if domain_parts: domain_names = [n for n in selected_names if n not in ['紐⑹감 援ъ 媛', '蹂닿 臾몄껜 媛']] sections.append( f" ㅼ 遺 쇱 臾멸: {', '.join(domain_names)}.\n" f"ㅼ 硫 湲곕쇰, ъㅼ쇨굅 臾몄 댁⑹ 깊痢≪쎌 湲吏, 怨듬 洹쇨굅 몄 理 蹂댁〈 硫 臾 吏 媛]\n" f"ㅼ 媛瑜 李멸 蹂닿 ⑹감 援ъ 깃낵 臾몄껜瑜 寃곗 二쇱 (ъ⑹ 닿 媛]\n" "ㅼ 媛瑜 李멸 蹂닿 ⑹감 援ъ 깃낵 臾몄껜瑜 寃곗 CONTEXT_DIR.mkdir(parents=True, exist_ok=True) output_path = CONTEXT_DIR / "domain_prompt.txt" output_path.write_text(final_text, encoding='utf-8') return jsonify({ 'success': True, 'combined_length': len(final_text), 'selected_names': selected_names, 'selected_ids': selected_ids, 'output_path': str(output_path) }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/domain-list', methods=['GET']) def list_domain_files(): """ domains/ 대 硫由ъ """ try: files = [] if DOMAIN_DIR.exists(): for f in sorted(DOMAIN_DIR.rglob('*.txt')): rel = f.relative_to(DOMAIN_DIR) files.append({ 'path': str(rel), 'name': f.stem, 'size': f.stat().st_size, 'preview': f.read_text(encoding='utf-8', errors='ignore')[:200] }) return jsonify({ 'success': True, 'files': files, 'domains_dir': str(DOMAIN_DIR) }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/domain-save', methods=['POST']) def save_domain_file(): """ 硫/ 泥: { "id": "survey", "content": "痢〓 遺 쇱 臾 吏 content媛 ⑸.'}) # config 李얘린 config = json.loads(DOMAIN_CONFIG_PATH.read_text(encoding='utf-8')) file_path = None for cat in config.get('categories', []): if cat['id'] == domain_id: file_path = cat.get('file') break for child in cat.get('children', []): if child['id'] == domain_id: file_path = child.get('file') break if file_path: break if not file_path: return jsonify({'success': False, 'error': f' 硫 듬: {domain_id}'}) # full_path = BASE_DIR / file_path full_path.parent.mkdir(parents=True, exist_ok=True) full_path.write_text(content, encoding='utf-8') return jsonify({ 'success': True, 'path': str(full_path), 'size': len(content) }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/pipeline/status', methods=['GET']) def pipeline_status(): """ step щ""" try: status = { 'step3_domain': (CONTEXT_DIR / 'domain_prompt.txt').exists(), 'step4_chunks': len(list((PIPELINE_OUTPUT_ROOT / 'rag').glob('*_chunks.json'))) if (PIPELINE_OUTPUT_ROOT / 'rag').exists() else 0, 'step5_faiss': (PIPELINE_OUTPUT_ROOT / 'rag' / 'faiss.index').exists(), 'step6_corpus': (CONTEXT_DIR / 'corpus.txt').exists(), 'step7_outline': (CONTEXT_DIR / 'outline_issue_report.txt').exists(), 'step8_report': (PIPELINE_OUTPUT_ROOT / 'generated' / 'report_draft.md').exists(), 'step9_html': (PIPELINE_OUTPUT_ROOT / 'generated' / 'report.html').exists(), } return jsonify({'success': True, 'status': status}) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 def run_toc_pipeline(session_id, input_dir, output_dir, doc_type='report', attach_pages=1): try: pipeline_jobs[session_id] = {'status': 'running', 'step': 2} from converters.pipeline import step2_extract, step3_domain, step4_chunk, step5_rag, step6_corpus, step7_index # 댁 怨 input_files = sorted(Path(input_dir).glob('*')) if Path(input_dir).exists() else [] file_hashes = [] new_files = [] # RAG 罹 HIT: {f.name} ({h})", flush=True) else: new_files.append(f) print(f"[DB] RAG 罹 MISS: {f.name} ({h})", flush=True) except Exception as de: print(f"[DB] 罹 議고 ㅽ, 洹 泥: {de}", flush=True) new_files.append(f) # 洹 step2~5 ㅽ if new_files: step2_extract.process_all_pdfs(input_dir, output_dir) pipeline_jobs[session_id]['step'] = 3 step3_domain.main(input_dir, output_dir) pipeline_jobs[session_id]['step'] = 4 step4_chunk.main(output_dir, output_dir) pipeline_jobs[session_id]['step'] = 5 step5_rag.main(output_dir, output_dir) # RAG 寃곌낵臾 DB faiss_path = rag_dir / 'faiss.index' vectors_path = rag_dir / 'vectors.npy' meta_path = rag_dir / 'meta.json' chunks_files = list(rag_dir.glob('*_chunks.json')) faiss_bytes = faiss_path.read_bytes() if faiss_path.exists() else b'' vectors_bytes = vectors_path.read_bytes() if vectors_path.exists() else b'' meta_text = meta_path.read_text(encoding='utf-8') if meta_path.exists() else '' chunks_text = chunks_files[0].read_text(encoding='utf-8') if chunks_files else '' for f in new_files: h = hashlib.md5(f.read_bytes()).hexdigest() try: with get_conn() as conn: with conn.cursor() as cur: cur.execute(""" INSERT INTO files (file_hash, filename) VALUES (%s, %s) ON CONFLICT (file_hash) DO NOTHING """, (h, f.name)) cur.execute(""" INSERT INTO rag_cache (file_hash, chunks_json, faiss_index, vectors, meta_json) VALUES (%s, %s, %s, %s, %s) ON CONFLICT (file_hash) DO NOTHING """, (h, chunks_text, psycopg2.Binary(faiss_bytes), psycopg2.Binary(vectors_bytes), meta_text)) conn.commit() print(f"[DB] RAG 罹 : {f.name}", flush=True) except Exception as de: print(f"[DB] RAG ㅽ: {de}", flush=True) else: print("[DB] 紐⑤ HIT step2/4/5 ㅽ, step3 ㅽ + 罹 蹂듭 RAG 寃곌낵臾 蹂듭 蹂듭 猷", flush=True) except Exception as de: print(f"[DB] 罹 蹂듭ㅽ, step4~5 ㅽ: {de}", flush=True) step4_chunk.main(output_dir, output_dir) step5_rag.main(output_dir, output_dir) pipeline_jobs[session_id]['step'] = 5 # step6~7 ㅽ pipeline_jobs[session_id]['step'] = 6 step6_corpus.main(output_dir, output_dir) pipeline_jobs[session_id]['step'] = 7 step7_index.main(output_dir, output_dir, doc_type=doc_type) outline_txt = Path(output_dir) / 'context' / 'outline_issue_report.txt' print("[DEBUG outline]", outline_txt.read_text(encoding='utf-8')[:500], flush=True) # sessions / outlines DB outline_text = outline_txt.read_text(encoding='utf-8') if outline_txt.exists() else '' try: with get_conn() as conn: with conn.cursor() as cur: cur.execute(""" INSERT INTO sessions (session_id, file_hashes, doc_type) VALUES (%s, %s, %s) ON CONFLICT (session_id) DO UPDATE SET doc_type=EXCLUDED.doc_type """, (session_id, file_hashes, doc_type)) cur.execute(""" INSERT INTO outlines (session_id, outline_text) VALUES (%s, %s) ON CONFLICT (session_id) DO UPDATE SET outline_text=EXCLUDED.outline_text """, (session_id, outline_text)) conn.commit() print(f"[DB] session/outline 猷: {session_id}", flush=True) except Exception as de: print(f"[DB] session/outline ㅽ: {de}", flush=True) pipeline_jobs[session_id] = {'status': 'done', 'doc_type': doc_type} except Exception as e: import traceback print(f"[PIPELINE ERROR] {e}", flush=True) print(traceback.format_exc(), flush=True) pipeline_jobs[session_id] = {'status': 'error', 'error': str(e)} # ===== ㅽ API ===== @app.route('/api/generate-toc', methods=['POST']) def generate_toc(): """ 紐⑹감 API (step3 4 5 6 7) 硫 寃쎌: step3 ㅽ () 硫 寃쎌: step3 ㅽ 泥: { "folder_path": "D:\\...", "domain_selected": true/false, "selected_domains": ["civil_general", "survey"] } : { "success": true, "title": "蹂닿 紐", "toc_items": [ { "num": "1.1.1", "title": "...", "guide": "...", "keywords": [...] } ] } """ try: data = request.get_json() session_id = data.get('session_id', '') domain_selected = data.get('domain_selected', False) write_mode = data.get('write_mode', 'restructure') instruction = data.get('instruction', '') if not session_id: return jsonify({'success': False, 'error': 'session_id媛 듬. 癒쇱 濡 몄.'}) input_dir = f'/tmp/{session_id}/input' output_dir = f'/tmp/{session_id}/output' os.makedirs(output_dir, exist_ok=True) doc_type = data.get('doc_type', 'report') attach_pages = int(data.get('attach_pages', 1)) t = threading.Thread(target=run_toc_pipeline, args=(session_id, input_dir, output_dir, doc_type, attach_pages)) t.daemon = True t.start() return jsonify({'success': True, 'status': 'processing', 'session_id': session_id}) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/toc-status/', methods=['GET']) def toc_status(session_id): job = pipeline_jobs.get(session_id, {'status': 'unknown'}) if job.get('status') == 'done': outline_path = Path(f'/tmp/{session_id}/output/context/outline_issue_report.txt') if outline_path.exists(): doc_type = job.get('doc_type', 'report') if doc_type == 'briefing': toc_items = parse_briefing_plan_for_frontend(outline_path) else: toc_items = parse_outline_for_frontend(outline_path) return jsonify({'status': 'done', 'toc_items': toc_items}) return jsonify(job) @app.route('/api/generate-report-from-toc', methods=['POST']) def generate_report_from_toc(): """ 몄 紐⑹감濡 蹂닿 (step8 step9) 泥: { "toc_items": [...], # 몄 紐⑹감 "write_mode": "restructure", "instruction": "..." } """ try: data = request.get_json() session_id = data.get('session_id', '') toc_items = data.get('toc_items', []) write_mode = data.get('write_mode', 'restructure') instruction = data.get('instruction', '') if not session_id: return jsonify({'success': False, 'error': 'session_id媛 듬.'}) input_dir = f'/tmp/{session_id}/input' output_dir = f'/tmp/{session_id}/output' from converters.pipeline import step8_content, step9_html doc_type = data.get('doc_type', 'report') step8_content.main(output_dir, output_dir, doc_type=doc_type) step9_html.main(output_dir, output_dir, doc_type=doc_type) report_html_path = Path(output_dir) / 'generated' / 'report.html' # briefing_content 쇰㈃ None) briefing_json_path = Path(output_dir) / 'generated' / 'briefing_content.json' briefing_content = None if briefing_json_path.exists(): briefing_content = json.loads(briefing_json_path.read_text(encoding='utf-8')) # 湲곗〈 html 諛吏 吏 + briefing_content 留 異媛 if report_html_path.exists(): html = report_html_path.read_text(encoding='utf-8') # briefing 寃곌낵臾 DB try: with get_conn() as conn: with conn.cursor() as cur: cur.execute(""" INSERT INTO briefings (session_id, briefing_json, html) VALUES (%s, %s, %s) ON CONFLICT (session_id) DO UPDATE SET briefing_json=EXCLUDED.briefing_json, html=EXCLUDED.html """, (session_id, json.dumps(briefing_content, ensure_ascii=False) if briefing_content else '', html)) conn.commit() print(f"[DB] briefing 猷: {session_id}", flush=True) except Exception as de: print(f"[DB] briefing ㅽ: {de}", flush=True) return jsonify({ 'success': True, 'html': html, 'briefing_content': briefing_content }) else: return jsonify({ 'success': False, 'error': '蹂닿 깆ㅽ⑦듬.' }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/check-folder', methods=['POST']) def check_folder(): """대 寃쎈 遺 瑜대瑜 李얠 듬.'}) SUPPORTED = {'.hwpx', '.hwp', '.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.csv', 'md', 'json','img', 'png', 'html'} all_files = [f for f in folder.rglob('*') if f.is_file()] ok_files = [f for f in all_files if f.suffix.lower() in SUPPORTED] unknown_files = [f for f in all_files if f.suffix.lower() not in SUPPORTED] return jsonify({ 'success': True, 'total': len(all_files), 'ok': len(ok_files), 'unknown': len(unknown_files), 'ok_list': [{'name': f.name, 'size': f.stat().st_size} for f in ok_files], 'unknown_list': [f.name for f in unknown_files] }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/analyze-briefing', methods=['POST']) def analyze_briefing(): """ 濡 遺 댁 湲고 援ъ 怨 諛 濡 ㅽ 異異 source_text = content if session_id: input_dir = Path(f'/tmp/{session_id}/input') output_dir = Path(f'/tmp/{session_id}/output') if input_dir.exists(): try: from converters.pipeline import step2_extract output_dir.mkdir(parents=True, exist_ok=True) step2_extract.process_all_pdfs(str(input_dir), str(output_dir)) except Exception as ex: print(f"step2 異異 댁⑹듬.'}) client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY')) prompt = f"""ㅼ 臾몄 瑜 遺 A4 湲고 濡 뎄 깊 怨 JSON쇰 諛 댁: {source_text[:4000]} 諛, ㅻ 紐", "sections": [ {{"type": "由щ諛", "content": "듭 硫吏 以 "}}, {{"type": " 뱀 뱀 紐 + 댁 댁 "}}, {{"type": " 뱀 뱀 紐 + 댁 댁 "}}, {{"type": "⑤", "content": "듭 寃곕 以 "}} ] }}, {{ "page": 2, "title": "[泥⑤] 紐", "sections": [ {{"type": " 뱀 ⑤ 댁 "}}, {{"type": "⑤", "content": "듭 寃곕"}} ] }} ] }} 洹移: - 蹂몃Ц 1 + 泥⑤ 1 (댁⑹ 留) - 媛 뱀 ㅼ 臾몄 댁 湲곕쇰 援ъ껜쇰 - JSON留 諛щㅼ 肄釉濡釉濡嫄 raw = raw.replace('```json', '').replace('```', '').strip() plan = json.loads(raw) return jsonify({'success': True, 'plan': plan}) except Exception as e: import traceback print(traceback.format_exc(), flush=True) return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/generate-briefing', methods=['POST']) def generate_briefing(): """ 援ъ 怨 + 肄硫 諛 ㅼ A4 Navy HTML 湲고 """ try: import openai data = request.get_json() session_id = data.get('session_id', '') plan = data.get('plan', {}) comment = data.get('comment', '') content = data.get('content', '') doc_type = data.get('doc_type', '') # ㅽ source_text = content if session_id: input_dir = Path(f'/tmp/{session_id}/input') output_dir = Path(f'/tmp/{session_id}/output') if input_dir.exists(): # step2濡 PDF ㅽ 蹂쇱 ㅽ try: from converters.pipeline import step2_extract output_dir.mkdir(parents=True, exist_ok=True) step2_extract.process_all_pdfs(str(input_dir), str(output_dir)) except Exception as ex: print(f"step2 異異 異異 .md ъ⑹硫 (諛 諛):\n{comment}" if comment else "" prompt = f"""ㅼ 援ъ 怨 怨 臾몄 瑜 諛쇰 A4 HTML 湲고 瑜 깊 怨 : {plan_str} 臾몄 : {source_text[:5000]} [異 洹移] - 由 援ъ“瑜 곕 寃 - Navy 而щ ㅽ 吏 (#1a365d, #2c5282, #f7fafc) - .sheet overflow: hidden 댁⑹쇰 硫 - 媛 蹂
濡 援щ - HTML 泥 肄留 諛 ㅻ 由] """ resp = client.chat.completions.create( model=os.getenv('OPENAI_MODEL', 'gpt-4o'), messages=[{'role': 'user', 'content': prompt}], temperature=0.4, max_tokens=4000 ) html = resp.choices[0].message.content.strip() # 肄釉濡嫄 if html.startswith('```'): html = html.split('\n', 1)[1] html = html.rsplit('```', 1)[0] return jsonify({'success': True, 'html': html}) except Exception as e: import traceback print(traceback.format_exc(), flush=True) return jsonify({'success': False, 'error': str(e)}), 500 def parse_outline_for_frontend(outline_path: Path) -> list: """ outline_issue_report.txt瑜 깊 displayTocWithAnimation() 쇰 蹂紐⑹감 紐", "guide": "吏 媛", "keywords": ["ㅼ1", "ㅼ2"] } ] """ import re raw = outline_path.read_text(encoding='utf-8', errors='ignore').splitlines() if not raw: return [] report_title = raw[0].strip() items = [] re_l3_head = re.compile(r'^\s*(\d+\.\d+\.\d+)\s+(.+)$') re_l3_topic = re.compile(r'^\s*[\-\*]\s+(.+?)\s*\|\s*(.+?)\s*\|\s*(\[.+?\])\s*\|\s*(.+)$') re_keywords = re.compile(r'(#\S+)') current_l3 = None for ln in raw[1:]: line = ln.strip() if not line: continue m3h = re_l3_head.match(line) if m3h: current_l3 = { 'num': m3h.group(1), 'title': m3h.group(2), 'report_title': report_title, 'guide': '', 'keywords': [] } items.append(current_l3) continue m3t = re_l3_topic.match(line) if m3t and current_l3: kws = [k.lstrip('#').strip() for k in re_keywords.findall(m3t.group(2))] # 湲곗〈 ㅼ媛 current_l3['keywords'].extend(kws) # 媛 if current_l3['guide']: current_l3['guide'] += ' / ' current_l3['guide'] += m3t.group(4) return items def parse_briefing_plan_for_frontend(outline_path: Path) -> list: raw = outline_path.read_text(encoding='utf-8', errors='ignore').strip() raw_lines = raw.splitlines() if not raw_lines: return [] # / 媛 ⑤ 以 濡 寃쎌 ㅼ 以 怨 ⑹ merged = [] idx = 0 while idx < len(raw_lines): ln = raw_lines[idx].strip() if ln in [' ', ''] and idx + 1 < len(raw_lines): merged.append(ln + ' ' + raw_lines[idx + 1].strip()) idx += 2 continue merged.append(raw_lines[idx]) idx += 1 lines = merged items = [] current_page = None for line in lines: line = line.strip() if not line: continue if ' ' in line or '' in line: icon = ' ' if ' ' in line else '' title = line.replace(' ', '').replace('', '').strip() # "蹂몃Ц N" "蹂몃Ц", "泥⑤ N" "泥⑤ N" import re as _re title = _re.sub(r'蹂몃Ц\s*\d*?', '蹂몃Ц', title).strip() title = _re.sub(r'泥⑤\s*(\d+)?', r'泥⑤ \1', title).strip() if '紐:' in title: title = title.split('紐:')[-1].strip() if not title: title = '蹂몃Ц' if icon == ' ' else '泥⑤' current_page = { 'num': icon, 'title': title, 'guide': '', 'keywords': [], 'sections': [] } items.append(current_page) elif current_page is not None and line.strip(): content = line.lstrip('-').strip() if content.startswith(':'): continue if content.startswith('紐:'): current_page['title'] = content.replace('紐:', '').strip() continue # 由щ諛/⑤ (": " щ諛/⑤ ы⑤ 蹂 泥) import re as _re if _re.match(r'^由щ諛\s*:', content): lead_text = content.split(':', 1)[-1].strip() current_page['sections'].append({'label': '由щ諛', 'text': lead_text}) continue if _re.match(r'^⑤\s*:', content): bottom_text = content.split(':', 1)[-1].strip() current_page['sections'].append({'label': '⑤', 'text': bottom_text}) continue if '|' in content: parts = [p.strip() for p in content.split('|')] section_name = parts[0].split(':')[-1].strip() comment = parts[1] if len(parts) > 1 else '' fmt = parts[3] if len(parts) > 3 else (parts[2] if len(parts) > 2 else '') current_page['sections'].append({ 'label': section_name, 'text': comment, 'fmt': fmt }) else: current_page['sections'].append({'label': content, 'text': ''}) # guide 嫄 - sections 由ъㅽ 洹몃濡 濡몄 for item in items: item['guide'] = '' # 濡몄 sections 吏 ъ return items