Improve preserved slide loop for run-001

This commit is contained in:
2026-04-02 15:38:43 +09:00
parent 01a3ad51b9
commit 37ae72a63e
21 changed files with 329 additions and 229 deletions

View File

@@ -3,6 +3,7 @@
import argparse
import asyncio
import json
import re
import sys
from pathlib import Path
@@ -94,7 +95,9 @@ def _stage_1b(ctx: PipelineContext, stage1b: dict) -> PipelineContext:
for raw in ctx.topics:
merged = raw.model_dump()
if raw.id in refined_map:
merged.update(refined_map[raw.id])
refined = dict(refined_map[raw.id])
refined.pop('source_data', None)
merged.update(refined)
topics.append(Topic(**merged))
ctx.topics = topics
ctx.save_snapshot('stage_1b')
@@ -221,6 +224,71 @@ def _topic(ctx: PipelineContext, topic_id: int) -> Topic | None:
return next((t for t in ctx.topics if t.id == topic_id), None)
def compact_text(text: str, max_len: int) -> str:
normalized = re.sub(r"\s+", " ", text).strip()
if len(normalized) <= max_len:
return normalized
cut = normalized[:max_len].rsplit(" ", 1)[0].strip()
return (cut or normalized[:max_len]).rstrip(" ,.;:") + "..."
def preserve_80_percent(text: str, floor: int = 80, ceiling: int = 180) -> int:
normalized = re.sub(r"\s+", " ", text).strip()
if not normalized:
return floor
return max(floor, min(ceiling, int(len(normalized) * 0.8)))
def _prefer_source_text(topic: Topic | None, fallback: str) -> str:
if not topic:
return fallback
source = re.sub(r"\s+", " ", (topic.source_data or "")).strip()
if source and len(source) >= max(80, len(fallback)):
return source
summary = re.sub(r"\s+", " ", (topic.summary or "")).strip()
if source and len(source) >= 40:
return source
if summary:
return summary
return fallback
def _trim_visible_copy(text: str, floor: int = 120, ceiling: int = 320) -> str:
normalized = re.sub(r"\s+", " ", text).strip()
if not normalized:
return ""
max_len = preserve_80_percent(normalized, floor=floor, ceiling=ceiling)
return compact_text(normalized, max_len)
def _extract_sentence(text: str, keyword: str, fallback: str) -> str:
normalized = re.sub(r"\s+", " ", text).strip()
if not normalized:
return fallback
parts = re.split(r"(?<=[.!?])\s+", normalized)
for part in parts:
if keyword in part:
return part.strip()
return fallback
def _extract_multiple_sentences(text: str, keywords: list[str], fallback: str, limit: int = 2) -> str:
normalized = re.sub(r"\s+", " ", text).strip()
if not normalized:
return fallback
parts = [p.strip() for p in re.split(r"(?<=[.!?])\s+", normalized) if p.strip()]
selected: list[str] = []
for keyword in keywords:
for part in parts:
if keyword in part and part not in selected:
selected.append(part)
break
if len(selected) >= limit:
break
if selected:
return " ".join(selected[:limit])
return fallback
def _build_stage2_retry_html(ctx: PipelineContext, retry_plan: dict) -> dict:
title = ctx.analysis.title
@@ -231,12 +299,18 @@ def _build_stage2_retry_html(ctx: PipelineContext, retry_plan: dict) -> dict:
conclusion_topic = _topic(ctx, 6)
dx_topic = _topic(ctx, 2)
problem_text = problem_topic.summary if problem_topic and problem_topic.summary else '건설산업 디지털 전환 논의에서 DX와 BIM이 혼용되며 BIM 도입을 DX 완성으로 오인하는 문제가 발생하고 있다.'
relation_text = relation_topic.summary if relation_topic and relation_topic.summary else 'DX와 GIS, BIM, Digital Twin의 관계를 시각적으로 드러낸다.'
evidence_text = evidence_topic.summary if evidence_topic and evidence_topic.summary else '정책 문서에서도 DX와 BIM이 혼용되며 이를 바로잡을 필요가 있다.'
dx_text = dx_topic.summary if dx_topic and dx_topic.summary else 'DX는 상위 개념이고 BIM은 이를 실행하는 핵심 기술이다.'
compare_text = comparison_topic.summary if comparison_topic and comparison_topic.summary else '범위·프로세스·성과품·확장성의 4개 비교축으로 DX와 BIM 차이를 짧고 직접적으로 보여준다.'
conclusion_text = conclusion_topic.summary if conclusion_topic and conclusion_topic.summary else '결론: BIM은 건설산업 DX를 수행하는 과정의 가장 기초가 되는 일부분이다.'
problem_text = _trim_visible_copy(_prefer_source_text(problem_topic, '건설산업 디지털 전환 논의에서 DX와 BIM이 혼용되며 BIM 도입을 DX 완성으로 오인하는 문제가 발생하고 있다.'), floor=120, ceiling=260)
relation_source = _prefer_source_text(relation_topic, 'DX와 GIS, BIM, Digital Twin의 관계를 시각적으로 드러낸다.')
relation_text = _trim_visible_copy(relation_source, floor=110, ceiling=180)
gis_line = _trim_visible_copy(_extract_sentence(relation_source, 'GIS', 'GIS는 공간 분석과 위치 기반 정보를 제공한다.'), floor=60, ceiling=140)
bim_line = _trim_visible_copy(_extract_sentence(relation_source, 'BIM', 'BIM은 형상정보와 내용정보를 함께 다루는 핵심 인프라 기술이다.'), floor=60, ceiling=160)
evidence_source = _prefer_source_text(evidence_topic, '정책 문서에서도 DX와 BIM이 혼용되며 이를 바로잡을 필요가 있다.')
evidence_text = _trim_visible_copy(evidence_source, floor=90, ceiling=170)
dx_source = _prefer_source_text(dx_topic, 'DX는 상위 개념이고 BIM은 이를 실행하는 핵심 기술이다.')
dx_text = _trim_visible_copy(dx_source, floor=110, ceiling=220)
compare_source = _prefer_source_text(comparison_topic, '범위·프로세스·성과품·확장성의 4개 비교축으로 DX와 BIM 차이를 짧고 직접적으로 보여준다.')
compare_text = _trim_visible_copy(compare_source, floor=90, ceiling=120)
conclusion_text = _trim_visible_copy(_prefer_source_text(conclusion_topic, '결론: BIM은 건설산업 DX를 수행하는 과정의 가장 기초가 되는 일부분이다.'), floor=70, ceiling=180)
problem_title = problem_topic.title if problem_topic and problem_topic.title else 'DX와 BIM의 혼용 문제'
dx_title = dx_topic.title if dx_topic and dx_topic.title else 'DX의 정의와 위치'
relation_title = relation_topic.title if relation_topic and relation_topic.title else 'BIM과 핵심기술의 관계'
@@ -244,13 +318,13 @@ def _build_stage2_retry_html(ctx: PipelineContext, retry_plan: dict) -> dict:
evidence_title = evidence_topic.title if evidence_topic and evidence_topic.title else '정책 혼용 사례'
body_html = f"""
<div style="width:100%; height:100%; box-sizing:border-box; font-family:'Segoe UI',sans-serif; color:#0f172a; display:flex; flex-direction:column; gap:10px;">
<div style="background:linear-gradient(135deg,#fff7ed 0%,#ffedd5 100%); border:1px solid #fdba74; border-radius:12px; padding:12px 14px;">
<div style="width:100%; height:100%; box-sizing:border-box; font-family:'Segoe UI',sans-serif; color:#0f172a; display:flex; flex-direction:column; gap:6px;">
<div style="background:linear-gradient(135deg,#fff7ed 0%,#ffedd5 100%); border:1px solid #fdba74; border-radius:12px; padding:8px 10px;">
<div style="font-size:11px; font-weight:800; color:#c2410c; margin-bottom:4px;">{problem_title}</div>
<div style="font-size:10px; line-height:1.55; color:#7c2d12;">{problem_text}</div>
</div>
<div class="relation-diagram-card" style="background:#ffffff; border:1px solid #cbd5e1; border-radius:14px; padding:14px 16px; box-sizing:border-box; display:flex; flex-direction:column; gap:10px;">
<div class="relation-diagram-card" style="background:#ffffff; border:1px solid #cbd5e1; border-radius:14px; padding:14px 16px; box-sizing:border-box; display:flex; flex-direction:column; gap:6px;">
<div style="display:flex; justify-content:space-between; align-items:flex-start; gap:12px;">
<div>
<div style="font-size:12px; font-weight:800; color:#1e40af; margin-bottom:4px;">{dx_title}</div>
@@ -259,55 +333,55 @@ def _build_stage2_retry_html(ctx: PipelineContext, retry_plan: dict) -> dict:
<div style="font-size:10px; color:#166534; background:#dcfce7; border:1px solid #86efac; border-radius:999px; padding:4px 8px; white-space:nowrap;">[그림 1] DX와 핵심기술간 상호관계</div>
</div>
<div style="display:grid; grid-template-columns:220px 1fr; gap:14px; align-items:start;">
<div style="background:#f8fafc; border:1px solid #dbeafe; border-radius:14px; padding:12px; box-sizing:border-box;">
<div style="display:grid; grid-template-columns:198px 1fr; gap:8px; align-items:start;">
<div style="background:#f8fafc; border:1px solid #dbeafe; border-radius:14px; padding:10px; box-sizing:border-box;">
<div style="display:flex; align-items:center; justify-content:center; gap:8px; margin-bottom:8px;">
<div style="min-width:72px; text-align:center; background:#1d4ed8; color:#ffffff; border-radius:999px; padding:8px 12px; font-size:14px; font-weight:800;">DX</div>
<div style="min-width:68px; text-align:center; background:#1d4ed8; color:#ffffff; border-radius:999px; padding:8px 12px; font-size:14px; font-weight:800;">DX</div>
<div style="font-size:14px; color:#94a3b8;">→</div>
</div>
<div style="display:grid; grid-template-columns:1fr 1fr; gap:8px;">
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:10px; text-align:center; font-size:11px; font-weight:700;">GIS</div>
<div style="background:#dbeafe; border:2px solid #3b82f6; border-radius:10px; padding:10px; text-align:center; font-size:11px; font-weight:800; color:#1d4ed8;">BIM</div>
<div style="grid-column:1 / span 2; background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:10px; text-align:center; font-size:11px; font-weight:700;">Digital Twin</div>
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:8px; text-align:center; font-size:10px; font-weight:700;">GIS</div>
<div style="background:#dbeafe; border:2px solid #3b82f6; border-radius:10px; padding:8px; text-align:center; font-size:10px; font-weight:800; color:#1d4ed8;">BIM</div>
<div style="grid-column:1 / span 2; background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:8px; text-align:center; font-size:10px; font-weight:700;">Digital Twin</div>
</div>
</div>
<div style="display:flex; flex-direction:column; gap:8px;">
<div style="background:#f8fafc; border:1px solid #e2e8f0; border-radius:12px; padding:10px 12px;">
<div style="display:flex; flex-direction:column; gap:6px;">
<div style="background:#f8fafc; border:1px solid #e2e8f0; border-radius:12px; padding:8px 10px;">
<div style="font-size:11px; font-weight:800; color:#0f172a; margin-bottom:4px;">{relation_title}</div>
<div style="font-size:10px; line-height:1.55; color:#334155;">{relation_text}</div>
</div>
<div style="display:grid; grid-template-columns:1fr 1fr; gap:8px;">
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:9px 10px;">
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:8px 9px;">
<div style="font-size:10px; font-weight:800; color:#0f172a; margin-bottom:3px;">GIS 역할</div>
<div style="font-size:9px; line-height:1.5; color:#475569;">공간 분석과 위치 기반 정보를 제공한다.</div>
<div style="font-size:8px; line-height:1.45; color:#475569;">{gis_line}</div>
</div>
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:9px 10px;">
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:8px 9px;">
<div style="font-size:10px; font-weight:800; color:#0f172a; margin-bottom:3px;">BIM 역할</div>
<div style="font-size:9px; line-height:1.5; color:#475569;">형상정보와 내용정보를 함께 다루는 핵심 인프라 기술이다.</div>
<div style="font-size:8px; line-height:1.45; color:#475569;">{bim_line}</div>
</div>
</div>
</div>
</div>
</div>
<div class="comparison-summary-card" style="background:#eff6ff; border:1px solid #bfdbfe; border-radius:12px; padding:10px 12px; box-sizing:border-box; display:grid; grid-template-columns:126px 1fr; gap:10px;">
<div class="comparison-summary-card" style="background:#eff6ff; border:1px solid #bfdbfe; border-radius:12px; padding:8px 9px; box-sizing:border-box; display:grid; grid-template-columns:96px 1fr; gap:7px;">
<div>
<div style="font-size:11px; font-weight:800; color:#1d4ed8; margin-bottom:4px;">{comparison_title}</div>
<div style="font-size:9px; line-height:1.5; color:#475569;">{compare_text}</div>
<div style="font-size:8px; line-height:1.45; color:#475569;">{compare_text}</div>
</div>
<div style="display:grid; grid-template-columns:1fr 1fr; gap:8px; font-size:9px; line-height:1.45; color:#334155;">
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:8px 10px;"><span style="font-weight:800; color:#0f172a;">범위</span><br>DX는 BIM을 포함하는 상위 개념</div>
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:8px 10px;"><span style="font-weight:800; color:#0f172a;">프로세스</span><br>DX는 근본적 개선, BIM은 기존 2D 연장</div>
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:8px 10px;"><span style="font-weight:800; color:#0f172a;">성과품</span><br>DX는 공학 정보 연계, BIM은 3D 모델 중심</div>
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:8px 10px;"><span style="font-weight:800; color:#0f172a;">확장성</span><br>DX는 전 생애주기, BIM은 분야별 단절 위험</div>
<div style="display:grid; grid-template-columns:1fr 1fr; gap:8px; font-size:8px; line-height:1.35; color:#334155;">
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:7px 8px;"><span style="font-weight:800; color:#0f172a;">범위</span><br>DX는 BIM을 포함하는 상위 개념</div>
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:7px 8px;"><span style="font-weight:800; color:#0f172a;">프로세스</span><br>DX는 근본적 개선, BIM은 기존 2D 연장</div>
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:7px 8px;"><span style="font-weight:800; color:#0f172a;">성과품</span><br>DX는 공학 정보 연계, BIM은 3D 모델 중심</div>
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:10px; padding:7px 8px;"><span style="font-weight:800; color:#0f172a;">확장성</span><br>DX는 전 생애주기, BIM은 분야별 단절 위험</div>
</div>
</div>
</div>
""".strip()
sidebar_html = f"""
<div style="width:100%; height:100%; box-sizing:border-box; font-family:'Segoe UI',sans-serif; display:flex; flex-direction:column; gap:8px;">
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:12px; padding:12px 14px;">
<div style="width:100%; height:100%; box-sizing:border-box; font-family:'Segoe UI',sans-serif; display:flex; flex-direction:column; gap:6px;">
<div style="background:#ffffff; border:1px solid #cbd5e1; border-radius:12px; padding:8px 10px;">
<div style="font-size:11px; font-weight:800; color:#1e293b; margin-bottom:8px;">용어 정의</div>
<div style="display:grid; grid-template-columns:72px 1fr; row-gap:8px; column-gap:10px; align-items:start; font-size:9px; line-height:1.5; color:#334155;">
<div style="font-weight:800; color:#0f172a;">건설산업</div>
@@ -318,7 +392,7 @@ def _build_stage2_retry_html(ctx: PipelineContext, retry_plan: dict) -> dict:
<div>디지털 기술 기반으로 업무방식과 가치구조를 전환하는 상위 개념</div>
</div>
</div>
<div style="background:#fff7ed; border:1px solid #fdba74; border-radius:12px; padding:12px 14px; box-sizing:border-box;">
<div style="background:#fff7ed; border:1px solid #fdba74; border-radius:12px; padding:8px 10px; box-sizing:border-box;">
<div style="font-size:11px; font-weight:800; color:#c2410c; margin-bottom:5px;">{evidence_title}</div>
<div style="font-size:10px; line-height:1.55; color:#7c2d12;">{evidence_text}</div>
</div>
@@ -326,8 +400,8 @@ def _build_stage2_retry_html(ctx: PipelineContext, retry_plan: dict) -> dict:
""".strip()
footer_html = f"""
<div style="background:linear-gradient(135deg, #006aff 0%, #00aaff 100%); border-radius:10px; padding:14px 24px; text-align:center; color:#ffffff; width:100%; height:60px; display:flex; align-items:center; justify-content:center; box-sizing:border-box;">
<div style="font-size:13px; font-weight:800; line-height:1.35;">{conclusion_text}</div>
<div style="background:linear-gradient(135deg, #006aff 0%, #00aaff 100%); border-radius:10px; padding:10px 20px; text-align:center; color:#ffffff; width:100%; height:52px; display:flex; align-items:center; justify-content:center; box-sizing:border-box;">
<div style="font-size:12px; font-weight:800; line-height:1.35;">{conclusion_text}</div>
</div>
""".strip()