v8:문서유형 분석등록 및 추출_20260206

This commit is contained in:
2026-02-20 11:46:52 +09:00
parent db6532b33c
commit c3e9e29205
57 changed files with 22138 additions and 1421 deletions

View File

@@ -776,8 +776,8 @@ class SurveyingFileConverter:
if __name__ == "__main__":
# 경로 설정
SOURCE_DIR = r"D:\for python\테스트 중(측량)\측량_GIS_드론 관련 자료들"
OUTPUT_DIR = r"D:\for python\테스트 중(측량)\추출"
SOURCE_DIR = r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\in"
OUTPUT_DIR = r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out"
# 변환기 실행
converter = SurveyingFileConverter(SOURCE_DIR, OUTPUT_DIR)

View File

@@ -27,8 +27,8 @@ except ImportError:
print("[INFO] pytesseract 미설치 - 텍스트 잘림 필터 비활성화")
# ===== 경로 설정 =====
BASE_DIR = Path(r"D:\for python\survey_test\extract") # PDF 원본 위치
OUTPUT_BASE = Path(r"D:\for python\survey_test\process") # 출력 위치
BASE_DIR = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out") # PDF 원본 위치
OUTPUT_BASE = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
CAPTION_PATTERN = re.compile(
r'^\s*(?:[<\[\(\{]\s*)?(그림|figure|fig)\s*\.?\s*(?:[<\[\(\{]\s*)?0*\d+(?:\s*[-]\s*\d+)?',

View File

@@ -29,8 +29,8 @@ from api_config import API_KEYS
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# ===== 경로 설정 =====
DATA_ROOT = Path(r"D:\for python\survey_test\extract")
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
CONTEXT_DIR = OUTPUT_ROOT / "context"
LOG_DIR = OUTPUT_ROOT / "logs"

View File

@@ -26,8 +26,8 @@ from openai import OpenAI
from api_config import API_KEYS
# ===== 경로 =====
DATA_ROOT = Path(r"D:\for python\survey_test\process")
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
TEXT_DIR = OUTPUT_ROOT / "text"
JSON_DIR = OUTPUT_ROOT / "json"

View File

@@ -20,8 +20,8 @@ from openai import OpenAI
from api_config import API_KEYS
# ===== 경로 설정 =====
DATA_ROOT = Path(r"D:\for python\survey_test\process")
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
RAG_DIR = OUTPUT_ROOT / "rag"
LOG_DIR = OUTPUT_ROOT / "logs"

View File

@@ -23,8 +23,8 @@ from openai import OpenAI
from api_config import API_KEYS
# ===== 경로 설정 =====
DATA_ROOT = Path(r"D:\for python\survey_test\process")
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
RAG_DIR = OUTPUT_ROOT / "rag"
CONTEXT_DIR = OUTPUT_ROOT / "context"
LOG_DIR = OUTPUT_ROOT / "logs"

View File

@@ -22,8 +22,8 @@ from openai import OpenAI
from api_config import API_KEYS
# ===== 경로 설정 =====
DATA_ROOT = Path(r"D:\for python\survey_test\process")
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
CONTEXT_DIR = OUTPUT_ROOT / "context"
LOG_DIR = OUTPUT_ROOT / "logs"

View File

@@ -55,8 +55,8 @@ GEMINI_MODEL = "gemini-3-pro-preview"
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
# ===== 경로 설정 =====
DATA_ROOT = Path(r"D:\for python\survey_test\process")
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
CONTEXT_DIR = OUTPUT_ROOT / "context"
LOG_DIR = OUTPUT_ROOT / "logs"
RAG_DIR = OUTPUT_ROOT / "rag"

View File

@@ -25,7 +25,7 @@ from typing import List, Dict, Any, Tuple, Optional
from dataclasses import dataclass, field
# ===== 경로 설정 =====
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
GEN_DIR = OUTPUT_ROOT / "generated"
ASSETS_DIR = GEN_DIR / "assets"
LOG_DIR = OUTPUT_ROOT / "logs"