paddle ocr 위치 기반 텍스트 정렬

This commit is contained in:
kyy
2025-09-09 16:21:59 +09:00
parent b0049229d8
commit 7ebd979521

View File

@@ -149,8 +149,8 @@ def extract_text_paddle_ocr(images):
use_doc_orientation_classify=False, use_doc_unwarping=False, lang="korean"
)
full_response = []
coord_response = []
all_text_boxes = [] # (y_center, x_center, text, box) 저장용
for page_idx, img in enumerate(images):
print(f"[PaddleOCR] 페이지 {page_idx + 1} OCR로 텍스트 추출 중...")
@@ -183,13 +183,50 @@ def extract_text_paddle_ocr(images):
texts = res_dic.get("rec_texts", [])
boxes = res_dic.get("rec_boxes", [])
full_response.extend(texts)
for text, box in zip(texts, boxes):
if isinstance(box, np.ndarray):
box = box.tolist()
# ✅ box 정규화
if all(isinstance(p, (int, float)) for p in box):
if len(box) % 2 == 0:
box = [[box[i], box[i + 1]] for i in range(0, len(box), 2)]
else:
print(f"[PaddleOCR] 잘못된 box 형식: {box}")
continue
# ndarray → list 변환
clean_boxes = [
box.tolist() if isinstance(box, np.ndarray) else box for box in boxes
]
coord_response.extend(clean_boxes)
coord_response.append(box)
# 중심 좌표 계산 (y → 줄 순서, x → 단어 순서)
x_coords = [p[0] for p in box]
y_coords = [p[1] for p in box]
x_center = sum(x_coords) / len(x_coords)
y_center = sum(y_coords) / len(y_coords)
all_text_boxes.append((y_center, x_center, text))
# ✅ 위치 기반 정렬
all_text_boxes.sort(key=lambda x: (x[0], x[1])) # y 먼저, 그 다음 x 정렬
# ✅ 줄 단위 그룹핑
lines = []
current_line = []
prev_y = None
line_threshold = 15 # 줄 묶음 y 오차 허용값
for y, x, text in all_text_boxes:
if prev_y is None or abs(y - prev_y) < line_threshold:
current_line.append((x, text))
else:
current_line.sort(key=lambda xx: xx[0])
lines.append(" ".join(t for _, t in current_line))
current_line = [(x, text)]
prev_y = y
if current_line:
current_line.sort(key=lambda xx: xx[0])
lines.append(" ".join(t for _, t in current_line))
parsed_text = "\n".join(lines)
print("[PaddleOCR] 전체 페이지 텍스트 및 좌표 추출 완료")
return " ".join(full_response), coord_response
return parsed_text, coord_response