📦 Initialize Geulbeot structure and merge Prompts & test projects

2026-03-05 11:32:29 +09:00
commit 555a954458
687 changed files with 205247 additions and 0 deletions
--- a/Prompts/문서생성/codedomain/국토일보_한건신문_Python_v01.py
+++ b/Prompts/문서생성/codedomain/국토일보_한건신문_Python_v01.py
@@ -0,0 +1,13 @@
+def format_date(date_str, source):
+    try:
+        if source in ["국토일보", "한건신문"]:
+            # 기자 이름과 함께 있는 날짜 형식 처리
+            date_obj = re.search(r'\d{4}-\d{2}-\d{2}', date_str)
+            if date_obj:
+                return date_obj.group(0)
+        elif source in ["엔지니어링데일리", "건설이코노미뉴스", "공학저널"]:
+            # 기자 이름과 함께 있는 날짜 형식 처리
+            date_obj = re.search(r'\d{4}-\d{2}-\d{2}', date_str)
+            if date_obj:
+                return date_obj.group(0)
+        elif source == "연합
--- a/Prompts/문서생성/codedomain/날짜_형식을_Python_v01.py
+++ b/Prompts/문서생성/codedomain/날짜_형식을_Python_v01.py
@@ -0,0 +1,11 @@
+def format_date(date_str: str, source: str) -> str:
+    """날짜 형식을 YYYY-MM-DD 로 변환"""
+    try:
+        match = re.search(r'\d{4}-\d{2}-\d{2}', date_str)
+        if match:
+            return match.group(0)
+        if source == '연합뉴스':
+            return datetime.strptime(date_str, '%m-%d %H:%M').strftime('2024-%m-%d')
+        return date_str
+    except Exception:
+        return date_str
--- a/Prompts/문서생성/codedomain/다음_로우데이터를_Python_v01.py
+++ b/Prompts/문서생성/codedomain/다음_로우데이터를_Python_v01.py
@@ -0,0 +1,13 @@
+def summarize_data_for(section: str):
+    texts = []
+    for path in sorted(os.listdir(DATA_DIR)):
+        with open(path, encoding="utf-8", errors="ignore") as f:
+            texts.append(f.read())
+    prompt = (
+        f"다음 로우데이터를 바탕으로 ‘{section}’ 섹션에 들어갈 핵심 사실과 수치를 200~300자로 요약해주세요.\n\n"
+        + "\n\n".join(texts)
+    )
+    return call_claude(prompt)
+
+
+# ─── 4) 이미지 자동 매핑 ─────────────────────────
--- a/Prompts/문서생성/codedomain/단위일_가능성_Python_v01.py
+++ b/Prompts/문서생성/codedomain/단위일_가능성_Python_v01.py
@@ -0,0 +1,20 @@
+def is_likely_unit(cell_val):
+    """단위일 가능성 판별 (사용자 제안 로직)"""
+    if not cell_val:
+        return False
+    val = str(cell_val).strip()
+    
+    # 1. 빈 값 또는 너무 긴 텍스트 (단위는 보통 6자 이내)
+    if not val or len(val) > 6:
+        return False
+    
+    # 2. 순수 숫자는 제외
+    cleaned = val.replace('.', '').replace(',', '').replace('-', '').replace(' ', '')
+    if cleaned.isdigit():
+        return False
+    
+    # 3. 수식은 제외
+    if val.startswith('='):
+        return False
+    
+    # 4. 일반적인 계산 기호 및 정크 기호 제외
--- a/Prompts/문서생성/codedomain/단일_기사_Python_v01.py
+++ b/Prompts/문서생성/codedomain/단일_기사_Python_v01.py
@@ -0,0 +1,12 @@
+def fetch_article_content(url: str, source: str) -> str:
+    """단일 기사 본문 추출"""
+    try:
+        resp = requests.get(url, verify=False, timeout=10)
+        resp.encoding = 'utf-8'
+        resp.raise_for_status()
+        soup = BeautifulSoup(resp.text, 'html.parser')
+        paragraphs = soup.find_all('p')
+        content = ' '.join(clean_text(p.get_text()) for p in paragraphs)
+        content = content.replace('\n', ' ')
+        if not content.strip():
+            logging.warning(f'No content for
--- a/Prompts/문서생성/codedomain/당신은_보고서_Python_v01.py
+++ b/Prompts/문서생성/codedomain/당신은_보고서_Python_v01.py
@@ -0,0 +1,8 @@
+def analyze_references():
+    files = sorted(os.listdir(REF_DIR))
+    sys = "당신은 보고서 전문가입니다. 아래 파일명들을 보고, 이 프로젝트에 어울리는 보고서 스타일과 목차 구조를 요약해 주세요."
+    usr = "파일 목록:\n" + "\n".join(files)
+    return call_gpt(sys, usr)
+
+
+# ─── 2) 가이드라인에서 필수 섹션 추출 ───────────
--- a/Prompts/문서생성/codedomain/로그_전체_Python_v01.py
+++ b/Prompts/문서생성/codedomain/로그_전체_Python_v01.py
@@ -0,0 +1,13 @@
+def run_global_reconstruction(input_file):
+    print("로그: 전체 시트 통합 데이터를 분석 중입니다...")
+    df = pd.read_excel(input_file)
+    
+    # 1. 전역 주소록 생성: (시트명, 셀위치) -> 값
+    # 예: { ('A1', 'G105'): 30.901, ('철근집계', 'C47'): 159.263 }
+    global_map = {}
+    for _, row in df.iterrows():
+        global_map[(str(row['시트명']), str(row['셀위치']))] = row['현재값']
+
+    def trace_logic(formula, current_sheet):
+        if not isinstance(formula, str) or not formula.startswith("'="):
+            return formula
--- a/Prompts/문서생성/codedomain/로그_파일을_Python_v01.py
+++ b/Prompts/문서생성/codedomain/로그_파일을_Python_v01.py
@@ -0,0 +1,17 @@
+def extract_all_contents(file_path):
+    print(f"로그: 파일을 읽는 중입니다 (전체 내용 모드)...")
+    # 수식과 값을 동시에 비교하기 위해 data_only=False로 로드
+    wb = openpyxl.load_workbook(file_path, data_only=False)
+    
+    all_content_data = []
+
+    for sheet_name in wb.sheetnames:
+        ws = wb[sheet_name]
+        print(f"\n" + "="*60)
+        print(f"▶ 시트 탐색 중: [ {sheet_name} ]")
+        print("="*60)
+
+        # 시트의 모든 셀을 하나하나 검사
+        for row in ws.iter_rows():
+            for cell in row:
+                value = ce
--- a/Prompts/문서생성/codedomain/리스트_페이지_Python_v01.py
+++ b/Prompts/문서생성/codedomain/리스트_페이지_Python_v01.py
@@ -0,0 +1,18 @@
+def fetch_articles(
+    base_url: str,
+    article_sel: str,
+    title_sel: str,
+    date_sel: str,
+    start_page: int,
+    end_page: int,
+    source: str,
+    url_prefix: str = '',
+    date_fmt_func=None
+) -> list:
+    """리스트 페이지 순회하며 메타데이터 및 본문 수집"""
+    results = []
+    for page in range(start_page, end_page + 1):
+        try:
+            page_url = f"{base_url}{page}"
+            resp = requests.get(page_url, verify=False, timeout=10)
+            soup = BeautifulSoup(resp.text, 'html.parser
--- a/Prompts/문서생성/codedomain/멀티라인_대응_Python_v01.py
+++ b/Prompts/문서생성/codedomain/멀티라인_대응_Python_v01.py
@@ -0,0 +1,11 @@
+def get_item_id_with_lookback(ws, row, col, section_start_row):
+    """멀티라인 대응 상향 번호 탐색 - 섹션 경계 존중"""
+    for r in range(row, section_start_row - 1, -1):
+        # 새로운 섹션을 만나면 탐색 중단
+        f_val_check = str(ws.cell(row=r, column=6).value or "").strip()
+        if r != row and re.match(r'^\(.*\)$|^\[.*\]$', f_val_check):
+            break
+        
+        # F열에서 번호 탐색
+        if re.search(ID_MARKER_PATTERN, f_val_check):
+            return re.search(ID_MARKER_PATTERN, f_val_check).group()
--- a/Prompts/문서생성/codedomain/미분류_과업_Python_v01.py
+++ b/Prompts/문서생성/codedomain/미분류_과업_Python_v01.py
@@ -0,0 +1,14 @@
+def collect_app_usage(days_back):
+    server = 'localhost'
+    log_type = 'Security'
+    hand = win32evtlog.OpenEventLog(server, log_type)
+    flags = win32evtlog.EVENTLOG_BACKWARDS_READ | win32evtlog.EVENTLOG_SEQUENTIAL_READ
+
+    usage_records = []
+    cutoff_date = datetime.datetime.now() - datetime.timedelta(days=days_back)
+
+    events = True
+    while events:
+        events = win32evtlog.ReadEventLog(hand, flags, 0)
+        for ev_obj in events:
+            event_time = ev_obj.TimeGenerated
--- a/Prompts/문서생성/codedomain/법령_지침_Python_v01.py
+++ b/Prompts/문서생성/codedomain/법령_지침_Python_v01.py
@@ -0,0 +1,11 @@
+def extract_must_have_sections():
+    texts = []
+    for path in sorted(os.listdir(GUIDELINE_DIR)):
+        with open(path, encoding="utf-8", errors="ignore") as f:
+            texts.append(f.read())
+    sys = "법령·지침 문서를 바탕으로, 보고서에 반드시 들어가야 할 섹션(목차)을 순서대로 나열해 주세요."
+    usr = "\n\n---\n\n".join(texts)
+    return call_gpt(sys, usr).splitlines()
+
+
+# ─── 3) 로우데이터에서 섹션별 내용 뽑기 ───────────
--- a/Prompts/문서생성/codedomain/보고서_섹션에_Python_v01.py
+++ b/Prompts/문서생성/codedomain/보고서_섹션에_Python_v01.py
@@ -0,0 +1,16 @@
+def pick_images_for(section: str):
+    names = sorted(os.listdir(IMAGE_DIR))
+    prompt = (
+        f"보고서 ‘{section}’ 섹션에 적합한 이미지를 아래 목록에서 1~2개 추천해 파일명만 리턴하세요:\n"
+        + "\n".join(names)
+    )
+    resp = call_gpt("당신은 디자인 어시스턴트입니다.", prompt)
+    picked = []
+    for line in resp.splitlines():
+        fn = line.strip()
+        if fn in names:
+            picked.append(fn)
+    return picked
+
+
+# ─── 5) 디자인 템플릿 선택 ───────────────────────
--- a/Prompts/문서생성/codedomain/사이트별_함수_Python_v01.py
+++ b/Prompts/문서생성/codedomain/사이트별_함수_Python_v01.py
@@ -0,0 +1,13 @@
+class SslAdapter(HTTPAdapter):
+    def init_poolmanager(self, *args, **kwargs):
+        ctx = ssl.create_default_context()
+        ctx.set_ciphers('DEFAULT:@SECLEVEL=1')
+        self.poolmanager = PoolManager(*args, ssl_context=ctx, **kwargs)
+
+session = requests.Session()
+session.mount('https://', SslAdapter())
+headers = {'User-Agent': 'Mozilla/5.0', 'Accept-Language': 'ko-KR,ko;q=0.9'}
+
+# -------------------------------------------------
+# 사이트별 함수 (대한경제 제외)
+# -----------------------------------
--- a/Prompts/문서생성/codedomain/설명이_없습니다_Python_v01.py
+++ b/Prompts/문서생성/codedomain/설명이_없습니다_Python_v01.py
@@ -0,0 +1,7 @@
+def get_detail_content(detail_url):
+    res = requests.get(detail_url)
+    soup = BeautifulSoup(res.text, 'html.parser')
+    div = soup.find('div', {'data-v-5cb2d9fe': True})
+    if div and div.find('h2'):
+        return div.find('h2').text.strip()
+    return "설명이 없습니다."
--- a/Prompts/문서생성/codedomain/설정_브라우저가_Python_v01.py
+++ b/Prompts/문서생성/codedomain/설정_브라우저가_Python_v01.py
@@ -0,0 +1,11 @@
+def fetch_dnews_articles(base_url, start_page, end_page):
+    # Selenium WebDriver 설정
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')  # 브라우저가 뜨지 않게 설정
+    options.add_argument('--no-sandbox')
+    options.add_argument('--disable-dev-shm-usage')
+
+    # ChromeDriver 경로 설정
+    chromedriver_path = 'D:/python_for crawling/webdriver/chromedriver-win64/chromedriver.exe'  # ChromeDriver 경로 설정
+    service = ChromeService(executable_path=chromedriver_path)
+    driver = webdr
--- a/Prompts/문서생성/codedomain/수식_자체가_Python_v01.py
+++ b/Prompts/문서생성/codedomain/수식_자체가_Python_v01.py
@@ -0,0 +1,17 @@
+def extract_raw_constants(file_path):
+    # 수식 자체가 아닌 입력된 값을 확인하기 위해 로드
+    print(f"로그: 파일을 읽는 중입니다...")
+    wb = openpyxl.load_workbook(file_path, data_only=False)
+    
+    raw_data = []
+
+    for sheet_name in wb.sheetnames:
+        ws = wb[sheet_name]
+        print(f"\n" + "="*50)
+        print(f"▶ [ {sheet_name} ] 시트의 원천 데이터(상수) 추출 시작")
+        print("="*50)
+
+        for row in ws.iter_rows():
+            for cell in row:
+                value = cell.value
+                coord = cell.coordin
--- a/Prompts/문서생성/codedomain/수식_주소를_Python_v01.py
+++ b/Prompts/문서생성/codedomain/수식_주소를_Python_v01.py
@@ -0,0 +1,11 @@
+def reconstruct_formula(formula, wb_v, sheet_name):
+    """수식 내 셀 주소를 실제 값으로 치환 및 기호 가독화"""
+    if not formula or not str(formula).startswith('='): return str(formula)
+    ref_pattern = r"(?:'([^']+)'|([a-zA-Z0-9가-힣]+))?!([A-Z]+\d+)|([A-Z]+\d+)"
+    
+    def replace_with_value(match):
+        s_name = match.group(1) or match.group(2) or sheet_name
+        coord = match.group(3) or match.group(4)
+        try:
+            val = wb_v[s_name][coord].value
+            if val is None: return "0"
--- a/Prompts/문서생성/codedomain/수식을_가져오기_Python_v01.py
+++ b/Prompts/문서생성/codedomain/수식을_가져오기_Python_v01.py
@@ -0,0 +1,14 @@
+def extract_excel_logic(file_path):
+    # 1. 수식을 가져오기 위한 로드 (data_only=False)
+    print(f"로그: 파일을 읽는 중입니다 (수식 모드)...")
+    wb_formula = openpyxl.load_workbook(file_path, data_only=False)
+    
+    # 2. 결과값을 가져오기 위한 로드 (data_only=True)
+    print(f"로그: 파일을 읽는 중입니다 (데이터 모드)...")
+    wb_value = openpyxl.load_workbook(file_path, data_only=True)
+    
+    extraction_data = []
+
+    for sheet_name in wb_formula.sheetnames:
+        ws_f = wb_formula[sheet_name]
+        ws_v = wb_value[sheet_name]
--- a/Prompts/문서생성/codedomain/아래_디자인_Python_v01.py
+++ b/Prompts/문서생성/codedomain/아래_디자인_Python_v01.py
@@ -0,0 +1,11 @@
+def choose_design_template():
+    samples = sorted(os.listdir(DESIGN_DIR))
+    prompt = (
+        "아래 디자인 샘플 파일들 중 이 보고서에 어울리는 상위 3안(1안,2안,3안)을 "
+        "순서대로 파일명만으로 알려주세요:\n" + "\n".join(samples)
+    )
+    lines = call_gpt("디자인 전문가입니다.", prompt).splitlines()
+    return [ln.strip() for ln in lines if ln.strip() in samples][:3]
+
+
+# ─── PPT 생성 ────────────────────────────────────
--- a/Prompts/문서생성/codedomain/엔지니어링데일리_기자_Python_v01.py
+++ b/Prompts/문서생성/codedomain/엔지니어링데일리_기자_Python_v01.py
@@ -0,0 +1,13 @@
+def clean_text(text):
+    replacements = {
+        '&nbsp;': ' ', '&lsquo;': "'", '&rsquo;': "'", '&ldquo;': '"', '&rdquo;': '"',
+        '&amp;': '&', '&lt;': '<', '&gt;': '>', '&#39;': "'",
+        '&quot;' : "'", '&middot;': "'"
+    }
+    
+    for entity, replacement in replacements.items():
+        text = text.replace(entity, replacement)
+    
+    text = re.sub(r'<[^>]+>', '', text)
+    text = re.sub(r'\(엔지니어링데일리\).*?기자=', '', text)  # (엔지니어링데일리) *** 기자= 패턴 삭제
+    text = re.sub(r'\[국토일보\s.*?
--- a/Prompts/문서생성/codedomain/엔티티_불필요한_Python_v01.py
+++ b/Prompts/문서생성/codedomain/엔티티_불필요한_Python_v01.py
@@ -0,0 +1,9 @@
+def clean_text(text: str) -> str:
+    """HTML 엔티티 및 불필요한 태그 제거"""
+    reps = {
+        '&nbsp;': ' ', '&lsquo;': "'", '&rsquo;': "'", '&ldquo;': '"', '&rdquo;': '"',
+        '&amp;': '&', '&lt;': '<', '&gt;': '>', '&#39;': "'", '&quot;': "'", '&middot;': "'"
+    }
+    for key, val in reps.items():
+        text = text.replace(key, val)
+    return re.sub(r'<[^>]+>', '', text).strip()
--- a/Prompts/문서생성/codedomain/인증서_검증_Python_v01.py
+++ b/Prompts/문서생성/codedomain/인증서_검증_Python_v01.py
@@ -0,0 +1,11 @@
+def fetch_article_content(article_url, source):
+    try:
+        response = requests.get(article_url, verify=False, timeout=10)  # SSL 인증서 검증 비활성화 및 타임아웃 설정
+        response.encoding = 'utf-8'  # 인코딩 설정
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        paragraphs = soup.find_all('p')
+        content = ' '.join([clean_text(p.get_text()) for p in paragraphs])
+        
+        # 텍스트 내의 엔터키를 스페이스로 대체
+        content = content.replace('\n', ' ')
--- a/Prompts/문서생성/codedomain/카테고리_내용_Python_v01.py
+++ b/Prompts/문서생성/codedomain/카테고리_내용_Python_v01.py
@@ -0,0 +1,14 @@
+def get_category_and_content(detail_url):
+    res = requests.get(detail_url)
+    soup = BeautifulSoup(res.text, 'html.parser')
+
+    # 카테고리
+    category_tags = soup.select('ul.flex.flex-row.flex-wrap.gap-2 li a')
+    categories = [tag['href'].split('/')[-2] for tag in category_tags]
+
+    # 내용 
+    content_div = soup.select_one('div.content-base.workflow-description.text-md')
+    if content_div:
+        content_text = content_div.get_text(separator=' ', strip=True)
+    else:
+        content_text =
--- a/Prompts/문서생성/codedomain/커버_슬라이드_Python_v01.py
+++ b/Prompts/문서생성/codedomain/커버_슬라이드_Python_v01.py
@@ -0,0 +1,14 @@
+def build_ppt(sections, images_map, templates):
+    prs = Presentation()
+    prs.slide_width, prs.slide_height = Inches(8.27), Inches(11.69)  # A4
+
+    # 커버 슬라이드
+    slide = prs.slides.add_slide(prs.slide_layouts[6])
+    tb = slide.shapes.add_textbox(Inches(1), Inches(2), Inches(6.27), Inches(2))
+    p = tb.text_frame.paragraphs[0]
+    p.text = "🚀 자동 보고서"
+    p.font.size = Pt(26); p.font.bold = True
+
+    # 본문 슬라이드
+    for sec in sections:
+        slide = prs.slides.add_slide(prs.slide_layouts[6]
--- a/Prompts/문서생성/codedomain/합계_기준_Python_v01.py
+++ b/Prompts/문서생성/codedomain/합계_기준_Python_v01.py
@@ -0,0 +1,15 @@
+def find_unit_from_sum_cell(ws, sum_row, max_col):
+    """
+    합계 셀 기준 단위 탐색
+    - 오른쪽 열 우선, 위쪽 방향 탐색
+    - 대분류 경계 무시 (합계 기준으로만 판단)
+    """
+    # 오른쪽 열부터 왼쪽으로
+    for c in range(max_col, 0, -1):
+        # 합계 행부터 위쪽으로
+        for r in range(sum_row, 0, -1):
+            cell_val = ws.cell(row=r, column=c).value
+            if is_likely_unit(cell_val):
+                return str(cell_val).strip()
+    
+    return ""