Cleanup: Deleting 03.Code/업로드용/domain_api.py
This commit is contained in:
@@ -1,962 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
"""
|
||||
domain_api.py
|
||||
|
||||
|
||||
硫由 API +
|
||||
|
||||
ъ⑸ (app.py):
|
||||
from domain_api import register_domain_routes
|
||||
register_domain_routes(app)
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
from flask import request, jsonify
|
||||
import threading
|
||||
import hashlib
|
||||
import psycopg2
|
||||
from db import get_conn
|
||||
from db import get_conn
|
||||
|
||||
# ===== 寃쎈
|
||||
ㅼ =====
|
||||
BASE_DIR = Path(__file__).parent
|
||||
DOMAIN_CONFIG_PATH = BASE_DIR / "domain_config.json"
|
||||
DOMAIN_DIR = BASE_DIR / "domain"
|
||||
#
|
||||
寃쎈 (step3~9媛 ъ⑺ 寃쎈)
|
||||
PIPELINE_OUTPUT_ROOT = Path(os.getenv("PIPELINE_OUTPUT_ROOT", "/tmp/pipeline_output"))
|
||||
CONTEXT_DIR = PIPELINE_OUTPUT_ROOT / "context"
|
||||
pipeline_jobs = {}
|
||||
def register_domain_routes(app):
|
||||
"""Flask 깆
|
||||
硫 고 깅
|
||||
@app.route('/api/domain-config', methods=['GET'])
|
||||
def get_domain_config():
|
||||
"""
|
||||
硫
|
||||
ㅼ 諛
|
||||
硫 щ 泥댄
|
||||
for cat in config.get('categories', []):
|
||||
if cat.get('file'):
|
||||
fpath = DOMAIN_DIR / cat['file']
|
||||
cat['file_exists'] = fpath.exists()
|
||||
cat['file_size'] = fpath.stat().st_size if fpath.exists() else 0
|
||||
|
||||
for child in cat.get('children', []):
|
||||
if child.get('file'):
|
||||
fpath = DOMAIN_DIR / child['file']
|
||||
child['file_exists'] = fpath.exists()
|
||||
child['file_size'] = fpath.stat().st_size if fpath.exists() else 0
|
||||
|
||||
return jsonify(config)
|
||||
else:
|
||||
return jsonify({'error': 'domain_config.json not found', 'categories': []}), 404
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e), 'categories': []}), 500
|
||||
|
||||
|
||||
@app.route('/api/domain-combine', methods=['POST'])
|
||||
def combine_domains():
|
||||
"""
|
||||
|
||||
|
||||
硫㼼
|
||||
⑹
|
||||
domain_prompt.txt濡
|
||||
|
||||
泥:
|
||||
{ "selected": ["civil_general", "survey", "bim"] }
|
||||
|
||||
:
|
||||
{ "success": true, "combined_length": 3200, "selected_names": [...] }
|
||||
"""
|
||||
try:
|
||||
data = request.get_json()
|
||||
selected_ids = data.get('selected', [])
|
||||
|
||||
if not selected_ids:
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'combined_length': 0,
|
||||
'selected_names': [],
|
||||
'message': '
|
||||
- step3 遺
|
||||
|
||||
'
|
||||
})
|
||||
|
||||
# config 濡
|
||||
config = json.loads(DOMAIN_CONFIG_PATH.read_text(encoding='utf-8'))
|
||||
|
||||
#
|
||||
ID +
|
||||
留ㅽ
|
||||
domain_parts = []
|
||||
guide_parts = []
|
||||
selected_names = []
|
||||
|
||||
for cat in config.get('categories', []):
|
||||
is_guide = (cat['id'] == 'report_guide')
|
||||
target = guide_parts if is_guide else domain_parts
|
||||
|
||||
if cat['id'] in selected_ids and cat.get('file'):
|
||||
fpath = DOMAIN_DIR / cat['file']
|
||||
if fpath.exists():
|
||||
content = fpath.read_text(encoding='utf-8', errors='ignore').strip()
|
||||
if content:
|
||||
target.append(f"[{cat['label']}]\n{content}")
|
||||
selected_names.append(cat['label'])
|
||||
|
||||
for child in cat.get('children', []):
|
||||
if child['id'] in selected_ids and child.get('file'):
|
||||
fpath = DOMAIN_DIR / child['file']
|
||||
if fpath.exists():
|
||||
content = fpath.read_text(encoding='utf-8', errors='ignore').strip()
|
||||
if content:
|
||||
target.append(f"[{child['label']}]\n{content}")
|
||||
selected_names.append(child['label'])
|
||||
selected_names.append(child['label'])
|
||||
|
||||
if not domain_parts and not guide_parts:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': '
|
||||
|
||||
硫
|
||||
댁듬.'
|
||||
})
|
||||
|
||||
sep = "\n\n" + "=" * 50 + "\n\n"
|
||||
sections = []
|
||||
|
||||
if domain_parts:
|
||||
domain_names = [n for n in selected_names if n not in ['紐⑹감 援ъ
|
||||
媛', '蹂닿
|
||||
臾몄껜 媛']]
|
||||
sections.append(
|
||||
f"
|
||||
ㅼ 遺
|
||||
쇱
|
||||
臾멸: {', '.join(domain_names)}.\n"
|
||||
f"ㅼ
|
||||
硫
|
||||
湲곕쇰, ъㅼ쇨굅
|
||||
臾몄 댁⑹
|
||||
|
||||
깊痢≪쎌 湲吏, 怨듬 洹쇨굅 몄
|
||||
理 蹂댁〈
|
||||
硫
|
||||
臾 吏
|
||||
|
||||
媛]\n"
|
||||
f"ㅼ 媛瑜 李멸 蹂닿
|
||||
⑹감 援ъ
|
||||
깃낵 臾몄껜瑜 寃곗 二쇱
|
||||
(ъ⑹
|
||||
|
||||
닿
|
||||
|
||||
媛]\n"
|
||||
"ㅼ 媛瑜 李멸 蹂닿
|
||||
⑹감 援ъ
|
||||
깃낵 臾몄껜瑜 寃곗
|
||||
CONTEXT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
output_path = CONTEXT_DIR / "domain_prompt.txt"
|
||||
output_path.write_text(final_text, encoding='utf-8')
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'combined_length': len(final_text),
|
||||
'selected_names': selected_names,
|
||||
'selected_ids': selected_ids,
|
||||
'output_path': str(output_path)
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/api/domain-list', methods=['GET'])
|
||||
def list_domain_files():
|
||||
"""
|
||||
domains/ 대
|
||||
硫由ъ
|
||||
"""
|
||||
try:
|
||||
files = []
|
||||
|
||||
if DOMAIN_DIR.exists():
|
||||
for f in sorted(DOMAIN_DIR.rglob('*.txt')):
|
||||
rel = f.relative_to(DOMAIN_DIR)
|
||||
files.append({
|
||||
'path': str(rel),
|
||||
'name': f.stem,
|
||||
'size': f.stat().st_size,
|
||||
'preview': f.read_text(encoding='utf-8', errors='ignore')[:200]
|
||||
})
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'files': files,
|
||||
'domains_dir': str(DOMAIN_DIR)
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/api/domain-save', methods=['POST'])
|
||||
def save_domain_file():
|
||||
"""
|
||||
|
||||
硫/
|
||||
|
||||
泥:
|
||||
{ "id": "survey", "content": "痢〓 遺
|
||||
쇱
|
||||
臾 吏 content媛
|
||||
⑸.'})
|
||||
|
||||
# config
|
||||
李얘린
|
||||
config = json.loads(DOMAIN_CONFIG_PATH.read_text(encoding='utf-8'))
|
||||
file_path = None
|
||||
|
||||
for cat in config.get('categories', []):
|
||||
if cat['id'] == domain_id:
|
||||
file_path = cat.get('file')
|
||||
break
|
||||
for child in cat.get('children', []):
|
||||
if child['id'] == domain_id:
|
||||
file_path = child.get('file')
|
||||
break
|
||||
if file_path:
|
||||
break
|
||||
|
||||
if not file_path:
|
||||
return jsonify({'success': False, 'error': f'
|
||||
硫
|
||||
듬: {domain_id}'})
|
||||
|
||||
#
|
||||
full_path = BASE_DIR / file_path
|
||||
full_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
full_path.write_text(content, encoding='utf-8')
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'path': str(full_path),
|
||||
'size': len(content)
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/api/pipeline/status', methods=['GET'])
|
||||
def pipeline_status():
|
||||
"""
|
||||
step щ"""
|
||||
try:
|
||||
status = {
|
||||
'step3_domain': (CONTEXT_DIR / 'domain_prompt.txt').exists(),
|
||||
'step4_chunks': len(list((PIPELINE_OUTPUT_ROOT / 'rag').glob('*_chunks.json'))) if (PIPELINE_OUTPUT_ROOT / 'rag').exists() else 0,
|
||||
'step5_faiss': (PIPELINE_OUTPUT_ROOT / 'rag' / 'faiss.index').exists(),
|
||||
'step6_corpus': (CONTEXT_DIR / 'corpus.txt').exists(),
|
||||
'step7_outline': (CONTEXT_DIR / 'outline_issue_report.txt').exists(),
|
||||
'step8_report': (PIPELINE_OUTPUT_ROOT / 'generated' / 'report_draft.md').exists(),
|
||||
'step9_html': (PIPELINE_OUTPUT_ROOT / 'generated' / 'report.html').exists(),
|
||||
}
|
||||
|
||||
return jsonify({'success': True, 'status': status})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
def run_toc_pipeline(session_id, input_dir, output_dir, doc_type='report', attach_pages=1):
|
||||
try:
|
||||
pipeline_jobs[session_id] = {'status': 'running', 'step': 2}
|
||||
from converters.pipeline import step2_extract, step3_domain, step4_chunk, step5_rag, step6_corpus, step7_index
|
||||
|
||||
# 댁 怨
|
||||
|
||||
input_files = sorted(Path(input_dir).glob('*')) if Path(input_dir).exists() else []
|
||||
file_hashes = []
|
||||
new_files = [] # RAG 罹 HIT: {f.name} ({h})", flush=True)
|
||||
else:
|
||||
new_files.append(f)
|
||||
print(f"[DB] RAG 罹 MISS: {f.name} ({h})", flush=True)
|
||||
except Exception as de:
|
||||
print(f"[DB] 罹 議고 ㅽ, 洹 泥: {de}", flush=True)
|
||||
new_files.append(f)
|
||||
|
||||
# 洹 step2~5 ㅽ
|
||||
if new_files:
|
||||
step2_extract.process_all_pdfs(input_dir, output_dir)
|
||||
pipeline_jobs[session_id]['step'] = 3
|
||||
step3_domain.main(input_dir, output_dir)
|
||||
pipeline_jobs[session_id]['step'] = 4
|
||||
step4_chunk.main(output_dir, output_dir)
|
||||
pipeline_jobs[session_id]['step'] = 5
|
||||
step5_rag.main(output_dir, output_dir)
|
||||
|
||||
# RAG 寃곌낵臾 DB
|
||||
faiss_path = rag_dir / 'faiss.index'
|
||||
vectors_path = rag_dir / 'vectors.npy'
|
||||
meta_path = rag_dir / 'meta.json'
|
||||
chunks_files = list(rag_dir.glob('*_chunks.json'))
|
||||
|
||||
faiss_bytes = faiss_path.read_bytes() if faiss_path.exists() else b''
|
||||
vectors_bytes = vectors_path.read_bytes() if vectors_path.exists() else b''
|
||||
meta_text = meta_path.read_text(encoding='utf-8') if meta_path.exists() else ''
|
||||
chunks_text = chunks_files[0].read_text(encoding='utf-8') if chunks_files else ''
|
||||
|
||||
for f in new_files:
|
||||
h = hashlib.md5(f.read_bytes()).hexdigest()
|
||||
try:
|
||||
with get_conn() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
INSERT INTO files (file_hash, filename)
|
||||
VALUES (%s, %s)
|
||||
ON CONFLICT (file_hash) DO NOTHING
|
||||
""", (h, f.name))
|
||||
cur.execute("""
|
||||
INSERT INTO rag_cache (file_hash, chunks_json, faiss_index, vectors, meta_json)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
ON CONFLICT (file_hash) DO NOTHING
|
||||
""", (h, chunks_text,
|
||||
psycopg2.Binary(faiss_bytes),
|
||||
psycopg2.Binary(vectors_bytes),
|
||||
meta_text))
|
||||
conn.commit()
|
||||
print(f"[DB] RAG 罹 : {f.name}", flush=True)
|
||||
except Exception as de:
|
||||
print(f"[DB] RAG ㅽ: {de}", flush=True)
|
||||
else:
|
||||
print("[DB] 紐⑤ HIT step2/4/5 ㅽ, step3 ㅽ + 罹 蹂듭
|
||||
RAG 寃곌낵臾 蹂듭 蹂듭
|
||||
猷", flush=True)
|
||||
except Exception as de:
|
||||
print(f"[DB] 罹 蹂듭ㅽ, step4~5 ㅽ: {de}", flush=True)
|
||||
step4_chunk.main(output_dir, output_dir)
|
||||
step5_rag.main(output_dir, output_dir)
|
||||
pipeline_jobs[session_id]['step'] = 5
|
||||
|
||||
# step6~7 ㅽ
|
||||
pipeline_jobs[session_id]['step'] = 6
|
||||
step6_corpus.main(output_dir, output_dir)
|
||||
pipeline_jobs[session_id]['step'] = 7
|
||||
step7_index.main(output_dir, output_dir, doc_type=doc_type)
|
||||
|
||||
outline_txt = Path(output_dir) / 'context' / 'outline_issue_report.txt'
|
||||
print("[DEBUG outline]", outline_txt.read_text(encoding='utf-8')[:500], flush=True)
|
||||
|
||||
# sessions / outlines DB
|
||||
outline_text = outline_txt.read_text(encoding='utf-8') if outline_txt.exists() else ''
|
||||
try:
|
||||
with get_conn() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
INSERT INTO sessions (session_id, file_hashes, doc_type)
|
||||
VALUES (%s, %s, %s)
|
||||
ON CONFLICT (session_id) DO UPDATE SET doc_type=EXCLUDED.doc_type
|
||||
""", (session_id, file_hashes, doc_type))
|
||||
cur.execute("""
|
||||
INSERT INTO outlines (session_id, outline_text)
|
||||
VALUES (%s, %s)
|
||||
ON CONFLICT (session_id) DO UPDATE SET outline_text=EXCLUDED.outline_text
|
||||
""", (session_id, outline_text))
|
||||
conn.commit()
|
||||
print(f"[DB] session/outline
|
||||
猷: {session_id}", flush=True)
|
||||
except Exception as de:
|
||||
print(f"[DB] session/outline ㅽ: {de}", flush=True)
|
||||
|
||||
pipeline_jobs[session_id] = {'status': 'done', 'doc_type': doc_type}
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"[PIPELINE ERROR] {e}", flush=True)
|
||||
print(traceback.format_exc(), flush=True)
|
||||
pipeline_jobs[session_id] = {'status': 'error', 'error': str(e)}
|
||||
|
||||
|
||||
# =====
|
||||
ㅽ API =====
|
||||
@app.route('/api/generate-toc', methods=['POST'])
|
||||
def generate_toc():
|
||||
"""
|
||||
紐⑹감
|
||||
API (step3 4 5 6 7)
|
||||
|
||||
|
||||
硫
|
||||
|
||||
寃쎌: step3 ㅽ ()
|
||||
|
||||
硫
|
||||
寃쎌: step3 ㅽ
|
||||
|
||||
泥:
|
||||
{
|
||||
"folder_path": "D:\\...",
|
||||
"domain_selected": true/false,
|
||||
"selected_domains": ["civil_general", "survey"]
|
||||
}
|
||||
|
||||
:
|
||||
{
|
||||
"success": true,
|
||||
"title": "蹂닿
|
||||
紐",
|
||||
"toc_items": [
|
||||
{ "num": "1.1.1", "title": "...", "guide": "...", "keywords": [...] }
|
||||
]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
data = request.get_json()
|
||||
session_id = data.get('session_id', '')
|
||||
domain_selected = data.get('domain_selected', False)
|
||||
write_mode = data.get('write_mode', 'restructure')
|
||||
instruction = data.get('instruction', '')
|
||||
|
||||
if not session_id:
|
||||
return jsonify({'success': False, 'error': 'session_id媛 듬.
|
||||
癒쇱
|
||||
濡
|
||||
몄.'})
|
||||
|
||||
input_dir = f'/tmp/{session_id}/input'
|
||||
output_dir = f'/tmp/{session_id}/output'
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
doc_type = data.get('doc_type', 'report')
|
||||
attach_pages = int(data.get('attach_pages', 1))
|
||||
t = threading.Thread(target=run_toc_pipeline, args=(session_id, input_dir, output_dir, doc_type, attach_pages))
|
||||
t.daemon = True
|
||||
t.start()
|
||||
return jsonify({'success': True, 'status': 'processing', 'session_id': session_id})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/toc-status/<session_id>', methods=['GET'])
|
||||
def toc_status(session_id):
|
||||
job = pipeline_jobs.get(session_id, {'status': 'unknown'})
|
||||
if job.get('status') == 'done':
|
||||
outline_path = Path(f'/tmp/{session_id}/output/context/outline_issue_report.txt')
|
||||
if outline_path.exists():
|
||||
doc_type = job.get('doc_type', 'report')
|
||||
if doc_type == 'briefing':
|
||||
toc_items = parse_briefing_plan_for_frontend(outline_path)
|
||||
else:
|
||||
toc_items = parse_outline_for_frontend(outline_path)
|
||||
return jsonify({'status': 'done', 'toc_items': toc_items})
|
||||
return jsonify(job)
|
||||
|
||||
|
||||
@app.route('/api/generate-report-from-toc', methods=['POST'])
|
||||
def generate_report_from_toc():
|
||||
"""
|
||||
몄 紐⑹감濡 蹂닿
|
||||
|
||||
(step8 step9)
|
||||
|
||||
泥:
|
||||
{
|
||||
"toc_items": [...], # 몄 紐⑹감
|
||||
"write_mode": "restructure",
|
||||
"instruction": "..."
|
||||
}
|
||||
"""
|
||||
try:
|
||||
data = request.get_json()
|
||||
session_id = data.get('session_id', '')
|
||||
toc_items = data.get('toc_items', [])
|
||||
write_mode = data.get('write_mode', 'restructure')
|
||||
instruction = data.get('instruction', '')
|
||||
|
||||
if not session_id:
|
||||
return jsonify({'success': False, 'error': 'session_id媛 듬.'})
|
||||
|
||||
input_dir = f'/tmp/{session_id}/input'
|
||||
output_dir = f'/tmp/{session_id}/output'
|
||||
|
||||
from converters.pipeline import step8_content, step9_html
|
||||
|
||||
doc_type = data.get('doc_type', 'report')
|
||||
|
||||
step8_content.main(output_dir, output_dir, doc_type=doc_type)
|
||||
step9_html.main(output_dir, output_dir, doc_type=doc_type)
|
||||
|
||||
report_html_path = Path(output_dir) / 'generated' / 'report.html'
|
||||
|
||||
# briefing_content 쇰㈃ None)
|
||||
briefing_json_path = Path(output_dir) / 'generated' / 'briefing_content.json'
|
||||
briefing_content = None
|
||||
if briefing_json_path.exists():
|
||||
briefing_content = json.loads(briefing_json_path.read_text(encoding='utf-8'))
|
||||
|
||||
# 湲곗〈 html 諛吏 吏 + briefing_content
|
||||
留 異媛
|
||||
if report_html_path.exists():
|
||||
html = report_html_path.read_text(encoding='utf-8')
|
||||
# briefing 寃곌낵臾 DB
|
||||
try:
|
||||
with get_conn() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
INSERT INTO briefings (session_id, briefing_json, html)
|
||||
VALUES (%s, %s, %s)
|
||||
ON CONFLICT (session_id) DO UPDATE
|
||||
SET briefing_json=EXCLUDED.briefing_json, html=EXCLUDED.html
|
||||
""", (session_id,
|
||||
json.dumps(briefing_content, ensure_ascii=False) if briefing_content else '',
|
||||
html))
|
||||
conn.commit()
|
||||
print(f"[DB] briefing
|
||||
猷: {session_id}", flush=True)
|
||||
except Exception as de:
|
||||
print(f"[DB] briefing ㅽ: {de}", flush=True)
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'html': html,
|
||||
'briefing_content': briefing_content
|
||||
})
|
||||
else:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': '蹂닿
|
||||
|
||||
깆ㅽ⑦듬.'
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
|
||||
|
||||
@app.route('/api/check-folder', methods=['POST'])
|
||||
def check_folder():
|
||||
"""대 寃쎈
|
||||
遺
|
||||
瑜대瑜 李얠
|
||||
듬.'})
|
||||
|
||||
SUPPORTED = {'.hwpx', '.hwp', '.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.csv', 'md', 'json','img', 'png', 'html'}
|
||||
|
||||
all_files = [f for f in folder.rglob('*') if f.is_file()]
|
||||
ok_files = [f for f in all_files if f.suffix.lower() in SUPPORTED]
|
||||
unknown_files = [f for f in all_files if f.suffix.lower() not in SUPPORTED]
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'total': len(all_files),
|
||||
'ok': len(ok_files),
|
||||
'unknown': len(unknown_files),
|
||||
'ok_list': [{'name': f.name, 'size': f.stat().st_size} for f in ok_files],
|
||||
'unknown_list': [f.name for f in unknown_files]
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/analyze-briefing', methods=['POST'])
|
||||
def analyze_briefing():
|
||||
"""
|
||||
|
||||
濡
|
||||
遺
|
||||
|
||||
댁
|
||||
湲고
|
||||
援ъ
|
||||
怨
|
||||
諛
|
||||
濡
|
||||
|
||||
ㅽ 異異
|
||||
source_text = content
|
||||
if session_id:
|
||||
input_dir = Path(f'/tmp/{session_id}/input')
|
||||
output_dir = Path(f'/tmp/{session_id}/output')
|
||||
if input_dir.exists():
|
||||
try:
|
||||
from converters.pipeline import step2_extract
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
step2_extract.process_all_pdfs(str(input_dir), str(output_dir))
|
||||
except Exception as ex:
|
||||
print(f"step2 異異
|
||||
|
||||
댁⑹듬.'})
|
||||
|
||||
client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
||||
|
||||
prompt = f"""ㅼ 臾몄
|
||||
瑜 遺
|
||||
|
||||
A4 湲고
|
||||
濡 뎄
|
||||
깊 怨
|
||||
|
||||
JSON쇰 諛
|
||||
댁:
|
||||
{source_text[:4000]}
|
||||
|
||||
諛,
|
||||
ㅻ
|
||||
紐",
|
||||
"sections": [
|
||||
{{"type": "由щ諛", "content": "듭 硫吏 以
|
||||
"}},
|
||||
{{"type": "
|
||||
뱀
|
||||
|
||||
뱀
|
||||
紐 + 댁
|
||||
댁 "}},
|
||||
{{"type": "
|
||||
뱀
|
||||
|
||||
뱀
|
||||
紐 + 댁
|
||||
댁 "}},
|
||||
{{"type": "⑤", "content": "듭 寃곕 以
|
||||
"}}
|
||||
]
|
||||
}},
|
||||
{{
|
||||
"page": 2,
|
||||
"title": "[泥⑤] 紐",
|
||||
"sections": [
|
||||
{{"type": "
|
||||
뱀
|
||||
⑤ 댁 "}},
|
||||
{{"type": "⑤", "content": "듭 寃곕"}}
|
||||
]
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
洹移:
|
||||
- 蹂몃Ц 1 +
|
||||
泥⑤ 1 (댁⑹
|
||||
留)
|
||||
- 媛
|
||||
뱀
|
||||
ㅼ 臾몄
|
||||
댁 湲곕쇰 援ъ껜쇰
|
||||
|
||||
- JSON留 諛щㅼ 肄釉濡釉濡嫄
|
||||
raw = raw.replace('```json', '').replace('```', '').strip()
|
||||
plan = json.loads(raw)
|
||||
|
||||
return jsonify({'success': True, 'plan': plan})
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(traceback.format_exc(), flush=True)
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/api/generate-briefing', methods=['POST'])
|
||||
def generate_briefing():
|
||||
"""
|
||||
援ъ
|
||||
怨
|
||||
+ 肄硫 諛 ㅼ A4 Navy HTML 湲고
|
||||
|
||||
|
||||
"""
|
||||
try:
|
||||
import openai
|
||||
data = request.get_json()
|
||||
session_id = data.get('session_id', '')
|
||||
plan = data.get('plan', {})
|
||||
comment = data.get('comment', '')
|
||||
content = data.get('content', '')
|
||||
doc_type = data.get('doc_type', '')
|
||||
|
||||
#
|
||||
ㅽ
|
||||
source_text = content
|
||||
if session_id:
|
||||
input_dir = Path(f'/tmp/{session_id}/input')
|
||||
output_dir = Path(f'/tmp/{session_id}/output')
|
||||
|
||||
if input_dir.exists():
|
||||
#
|
||||
step2濡 PDF
|
||||
ㅽ 蹂쇱 ㅽ
|
||||
try:
|
||||
from converters.pipeline import step2_extract
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
step2_extract.process_all_pdfs(str(input_dir), str(output_dir))
|
||||
except Exception as ex:
|
||||
print(f"step2 異異
|
||||
異異 .md
|
||||
ъ⑹硫 (諛 諛):\n{comment}" if comment else ""
|
||||
|
||||
prompt = f"""ㅼ 援ъ
|
||||
怨
|
||||
怨 臾몄
|
||||
瑜 諛쇰 A4 HTML 湲고
|
||||
瑜
|
||||
깊
|
||||
怨
|
||||
:
|
||||
{plan_str}
|
||||
|
||||
臾몄
|
||||
:
|
||||
{source_text[:5000]}
|
||||
|
||||
[異 洹移]
|
||||
-
|
||||
|
||||
由 援ъ“瑜 곕 寃
|
||||
- Navy 而щ ㅽ 吏 (#1a365d, #2c5282, #f7fafc)
|
||||
- .sheet overflow: hidden 댁⑹쇰 硫
|
||||
- 媛 蹂
|
||||
|
||||
<div class="sheet">濡 援щ
|
||||
|
||||
- HTML
|
||||
泥 肄留 諛
|
||||
ㅻ
|
||||
|
||||
由]
|
||||
<!DOCTYPE html>
|
||||
<html lang="ko">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<style>
|
||||
@import url('https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@300;400;500;700;900&display=swap');
|
||||
:root {{
|
||||
--primary-navy: #1a365d; --secondary-navy: #2c5282;
|
||||
--dark-gray: #2d3748; --medium-gray: #4a5568;
|
||||
--light-gray: #e2e8f0; --bg-light: #f7fafc;
|
||||
--text-black: #1a202c; --border-color: #cbd5e0;
|
||||
}}
|
||||
* {{ margin:0; padding:0; box-sizing:border-box; -webkit-print-color-adjust:exact; }}
|
||||
body {{ font-family:'Noto Sans KR',sans-serif; background:#f0f0f0; display:flex; flex-direction:column; align-items:center; padding:20px 0; gap:20px; word-break:keep-all; }}
|
||||
.sheet {{ background:white; width:210mm; height:297mm; padding:20mm; box-shadow:0 0 10px rgba(0,0,0,.1); position:relative; display:flex; flex-direction:column; overflow:hidden; }}
|
||||
@media print {{ body{{background:none;padding:0;gap:0;}} .sheet{{box-shadow:none;page-break-after:always;}} }}
|
||||
.page-header {{ display:flex; justify-content:space-between; font-size:9pt; color:var(--medium-gray); margin-bottom:20px; }}
|
||||
.header-title {{ font-size:22pt; font-weight:900; color:var(--primary-navy); letter-spacing:-1px; text-align:center; margin-bottom:8px; }}
|
||||
.title-divider {{ height:3px; background:linear-gradient(90deg,var(--primary-navy),var(--secondary-navy)); margin-bottom:20px; }}
|
||||
.lead-box {{ background:var(--bg-light); border-left:4px solid var(--primary-navy); padding:14px 16px; margin-bottom:16px; font-size:11pt; font-weight:500; color:var(--dark-gray); line-height:1.6; }}
|
||||
.section {{ margin-bottom:14px; }}
|
||||
.section-title {{ font-size:11.5pt; font-weight:700; color:var(--primary-navy); display:flex; align-items:center; margin-bottom:8px; }}
|
||||
.section-title::before {{ content:""; display:inline-block; width:8px; height:8px; background:var(--secondary-navy); margin-right:10px; }}
|
||||
ul {{ list-style:none; padding-left:10px; }}
|
||||
li {{ font-size:10pt; position:relative; padding-left:14px; margin-bottom:5px; color:var(--dark-gray); line-height:1.55; }}
|
||||
li::before {{ content:""; position:absolute; left:0; color:var(--secondary-navy); }}
|
||||
.data-table {{ width:100%; border-collapse:collapse; font-size:9.5pt; border-top:2px solid var(--primary-navy); margin-top:6px; }}
|
||||
.data-table th {{ background:var(--primary-navy); color:#fff; padding:8px 6px; border:1px solid var(--secondary-navy); text-align:center; font-size:9pt; }}
|
||||
.data-table td {{ border:1px solid var(--border-color); padding:6px 8px; color:var(--dark-gray); }}
|
||||
.data-table td:first-child {{ background:var(--bg-light); font-weight:600; text-align:center; }}
|
||||
.two-col {{ display:flex; gap:12px; margin-top:6px; }}
|
||||
.info-box {{ flex:1; background:var(--bg-light); border:1px solid var(--border-color); padding:10px 12px; }}
|
||||
.info-box-title {{ font-weight:700; color:var(--primary-navy); font-size:10pt; margin-bottom:4px; }}
|
||||
.info-box p {{ font-size:10pt; color:var(--dark-gray); line-height:1.5; }}
|
||||
.bottom-box {{ border:1.5px solid var(--border-color); display:flex; margin-top:auto; min-height:65px; margin-bottom:8px; }}
|
||||
.bottom-left {{ width:18%; background:var(--primary-navy); padding:12px; display:flex; align-items:center; justify-content:center; text-align:center; font-weight:700; font-size:10pt; color:#fff; line-height:1.4; }}
|
||||
.bottom-right {{ width:82%; background:var(--bg-light); padding:12px 18px; font-size:10pt; line-height:1.6; display:flex; flex-direction:column; justify-content:center; color:var(--dark-gray); }}
|
||||
.page-footer {{ position:absolute; bottom:10mm; left:20mm; right:20mm; padding-top:8px; text-align:center; font-size:8.5pt; color:var(--medium-gray); border-top:1px solid var(--light-gray); }}
|
||||
b {{ font-weight:700; color:var(--primary-navy); }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<!-- ш린ㅼ
|
||||
댁⑹
|
||||
-->
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
resp = client.chat.completions.create(
|
||||
model=os.getenv('OPENAI_MODEL', 'gpt-4o'),
|
||||
messages=[{'role': 'user', 'content': prompt}],
|
||||
temperature=0.4,
|
||||
max_tokens=4000
|
||||
)
|
||||
|
||||
html = resp.choices[0].message.content.strip()
|
||||
# 肄釉濡嫄
|
||||
if html.startswith('```'):
|
||||
html = html.split('\n', 1)[1]
|
||||
html = html.rsplit('```', 1)[0]
|
||||
|
||||
return jsonify({'success': True, 'html': html})
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(traceback.format_exc(), flush=True)
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
|
||||
|
||||
def parse_outline_for_frontend(outline_path: Path) -> list:
|
||||
"""
|
||||
outline_issue_report.txt瑜 깊
|
||||
displayTocWithAnimation() 쇰 蹂紐⑹감 紐",
|
||||
"guide": "吏
|
||||
媛",
|
||||
"keywords": ["ㅼ1", "ㅼ2"]
|
||||
}
|
||||
]
|
||||
"""
|
||||
import re
|
||||
|
||||
raw = outline_path.read_text(encoding='utf-8', errors='ignore').splitlines()
|
||||
if not raw:
|
||||
return []
|
||||
|
||||
report_title = raw[0].strip()
|
||||
items = []
|
||||
|
||||
re_l3_head = re.compile(r'^\s*(\d+\.\d+\.\d+)\s+(.+)$')
|
||||
re_l3_topic = re.compile(r'^\s*[\-\*]\s+(.+?)\s*\|\s*(.+?)\s*\|\s*(\[.+?\])\s*\|\s*(.+)$')
|
||||
re_keywords = re.compile(r'(#\S+)')
|
||||
|
||||
current_l3 = None
|
||||
|
||||
for ln in raw[1:]:
|
||||
line = ln.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
m3h = re_l3_head.match(line)
|
||||
if m3h:
|
||||
current_l3 = {
|
||||
'num': m3h.group(1),
|
||||
'title': m3h.group(2),
|
||||
'report_title': report_title,
|
||||
'guide': '',
|
||||
'keywords': []
|
||||
}
|
||||
items.append(current_l3)
|
||||
continue
|
||||
|
||||
m3t = re_l3_topic.match(line)
|
||||
if m3t and current_l3:
|
||||
kws = [k.lstrip('#').strip() for k in re_keywords.findall(m3t.group(2))]
|
||||
# 湲곗〈 ㅼ媛
|
||||
current_l3['keywords'].extend(kws)
|
||||
# 媛
|
||||
|
||||
if current_l3['guide']:
|
||||
current_l3['guide'] += ' / '
|
||||
current_l3['guide'] += m3t.group(4)
|
||||
|
||||
return items
|
||||
|
||||
def parse_briefing_plan_for_frontend(outline_path: Path) -> list:
|
||||
raw = outline_path.read_text(encoding='utf-8', errors='ignore').strip()
|
||||
raw_lines = raw.splitlines()
|
||||
if not raw_lines:
|
||||
return []
|
||||
|
||||
#
|
||||
/ 媛 ⑤
|
||||
以
|
||||
濡 寃쎌 ㅼ 以
|
||||
怨 ⑹
|
||||
merged = []
|
||||
idx = 0
|
||||
while idx < len(raw_lines):
|
||||
ln = raw_lines[idx].strip()
|
||||
if ln in ['
|
||||
', ''] and idx + 1 < len(raw_lines):
|
||||
merged.append(ln + ' ' + raw_lines[idx + 1].strip())
|
||||
idx += 2
|
||||
continue
|
||||
merged.append(raw_lines[idx])
|
||||
idx += 1
|
||||
lines = merged
|
||||
|
||||
items = []
|
||||
current_page = None
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
if '
|
||||
' in line or '' in line:
|
||||
icon = '
|
||||
' if '
|
||||
' in line else ''
|
||||
title = line.replace('
|
||||
', '').replace('', '').strip()
|
||||
# "蹂몃Ц N" "蹂몃Ц", "泥⑤ N" "泥⑤ N"
|
||||
import re as _re
|
||||
title = _re.sub(r'蹂몃Ц\s*\d*?', '蹂몃Ц', title).strip()
|
||||
title = _re.sub(r'泥⑤\s*(\d+)?', r'泥⑤ \1', title).strip()
|
||||
if '紐:' in title:
|
||||
title = title.split('紐:')[-1].strip()
|
||||
if not title:
|
||||
title = '蹂몃Ц' if icon == '
|
||||
' else '泥⑤'
|
||||
current_page = {
|
||||
'num': icon,
|
||||
'title': title,
|
||||
'guide': '',
|
||||
'keywords': [],
|
||||
'sections': []
|
||||
}
|
||||
items.append(current_page)
|
||||
|
||||
elif current_page is not None and line.strip():
|
||||
content = line.lstrip('-').strip()
|
||||
|
||||
if content.startswith(':'):
|
||||
continue
|
||||
|
||||
if content.startswith('紐:'):
|
||||
current_page['title'] = content.replace('紐:', '').strip()
|
||||
continue
|
||||
|
||||
# 由щ諛/⑤ (": " щ諛/⑤ ы⑤ 蹂
|
||||
|
||||
泥)
|
||||
import re as _re
|
||||
if _re.match(r'^由щ諛\s*:', content):
|
||||
lead_text = content.split(':', 1)[-1].strip()
|
||||
current_page['sections'].append({'label': '由щ諛', 'text': lead_text})
|
||||
continue
|
||||
if _re.match(r'^⑤\s*:', content):
|
||||
bottom_text = content.split(':', 1)[-1].strip()
|
||||
current_page['sections'].append({'label': '⑤', 'text': bottom_text})
|
||||
continue
|
||||
|
||||
if '|' in content:
|
||||
parts = [p.strip() for p in content.split('|')]
|
||||
section_name = parts[0].split(':')[-1].strip()
|
||||
comment = parts[1] if len(parts) > 1 else ''
|
||||
fmt = parts[3] if len(parts) > 3 else (parts[2] if len(parts) > 2 else '')
|
||||
current_page['sections'].append({
|
||||
'label': section_name,
|
||||
'text': comment,
|
||||
'fmt': fmt
|
||||
})
|
||||
else:
|
||||
current_page['sections'].append({'label': content, 'text': ''})
|
||||
|
||||
# guide 嫄 - sections 由ъㅽ 洹몃濡
|
||||
濡몄
|
||||
|
||||
for item in items:
|
||||
item['guide'] = '' #
|
||||
濡몄
|
||||
sections 吏 ъ
|
||||
|
||||
return items
|
||||
Reference in New Issue
Block a user