v8:문서유형 분석등록 및 추출_20260206
This commit is contained in:
@@ -776,8 +776,8 @@ class SurveyingFileConverter:
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 경로 설정
|
||||
SOURCE_DIR = r"D:\for python\테스트 중(측량)\측량_GIS_드론 관련 자료들"
|
||||
OUTPUT_DIR = r"D:\for python\테스트 중(측량)\추출"
|
||||
SOURCE_DIR = r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\in"
|
||||
OUTPUT_DIR = r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out"
|
||||
|
||||
# 변환기 실행
|
||||
converter = SurveyingFileConverter(SOURCE_DIR, OUTPUT_DIR)
|
||||
|
||||
@@ -27,8 +27,8 @@ except ImportError:
|
||||
print("[INFO] pytesseract 미설치 - 텍스트 잘림 필터 비활성화")
|
||||
|
||||
# ===== 경로 설정 =====
|
||||
BASE_DIR = Path(r"D:\for python\survey_test\extract") # PDF 원본 위치
|
||||
OUTPUT_BASE = Path(r"D:\for python\survey_test\process") # 출력 위치
|
||||
BASE_DIR = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out") # PDF 원본 위치
|
||||
OUTPUT_BASE = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
|
||||
|
||||
CAPTION_PATTERN = re.compile(
|
||||
r'^\s*(?:[<\[\(\{]\s*)?(그림|figure|fig)\s*\.?\s*(?:[<\[\(\{]\s*)?0*\d+(?:\s*[-–]\s*\d+)?',
|
||||
|
||||
@@ -29,8 +29,8 @@ from api_config import API_KEYS
|
||||
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
||||
|
||||
# ===== 경로 설정 =====
|
||||
DATA_ROOT = Path(r"D:\for python\survey_test\extract")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
|
||||
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
|
||||
CONTEXT_DIR = OUTPUT_ROOT / "context"
|
||||
LOG_DIR = OUTPUT_ROOT / "logs"
|
||||
|
||||
|
||||
@@ -26,8 +26,8 @@ from openai import OpenAI
|
||||
from api_config import API_KEYS
|
||||
|
||||
# ===== 경로 =====
|
||||
DATA_ROOT = Path(r"D:\for python\survey_test\process")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
|
||||
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
|
||||
|
||||
TEXT_DIR = OUTPUT_ROOT / "text"
|
||||
JSON_DIR = OUTPUT_ROOT / "json"
|
||||
|
||||
@@ -20,8 +20,8 @@ from openai import OpenAI
|
||||
from api_config import API_KEYS
|
||||
|
||||
# ===== 경로 설정 =====
|
||||
DATA_ROOT = Path(r"D:\for python\survey_test\process")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
|
||||
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
|
||||
RAG_DIR = OUTPUT_ROOT / "rag"
|
||||
LOG_DIR = OUTPUT_ROOT / "logs"
|
||||
|
||||
|
||||
@@ -23,8 +23,8 @@ from openai import OpenAI
|
||||
from api_config import API_KEYS
|
||||
|
||||
# ===== 경로 설정 =====
|
||||
DATA_ROOT = Path(r"D:\for python\survey_test\process")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
|
||||
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
|
||||
RAG_DIR = OUTPUT_ROOT / "rag"
|
||||
CONTEXT_DIR = OUTPUT_ROOT / "context"
|
||||
LOG_DIR = OUTPUT_ROOT / "logs"
|
||||
|
||||
@@ -22,8 +22,8 @@ from openai import OpenAI
|
||||
from api_config import API_KEYS
|
||||
|
||||
# ===== 경로 설정 =====
|
||||
DATA_ROOT = Path(r"D:\for python\survey_test\process")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
|
||||
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
|
||||
CONTEXT_DIR = OUTPUT_ROOT / "context"
|
||||
LOG_DIR = OUTPUT_ROOT / "logs"
|
||||
|
||||
|
||||
@@ -55,8 +55,8 @@ GEMINI_MODEL = "gemini-3-pro-preview"
|
||||
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
|
||||
|
||||
# ===== 경로 설정 =====
|
||||
DATA_ROOT = Path(r"D:\for python\survey_test\process")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
|
||||
DATA_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
|
||||
CONTEXT_DIR = OUTPUT_ROOT / "context"
|
||||
LOG_DIR = OUTPUT_ROOT / "logs"
|
||||
RAG_DIR = OUTPUT_ROOT / "rag"
|
||||
|
||||
@@ -25,7 +25,7 @@ from typing import List, Dict, Any, Tuple, Optional
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
# ===== 경로 설정 =====
|
||||
OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
|
||||
OUTPUT_ROOT = Path(r"D:\for python\geulbeot-light\geulbeot-light\00.test\hwpx\out\out") # 출력 위치
|
||||
GEN_DIR = OUTPUT_ROOT / "generated"
|
||||
ASSETS_DIR = GEN_DIR / "assets"
|
||||
LOG_DIR = OUTPUT_ROOT / "logs"
|
||||
|
||||
Reference in New Issue
Block a user