# ocr_eval_app.py import base64 import difflib import json from pathlib import Path import pandas as pd import streamlit as st from ocr_eval_engine import OCREvaluator # --- 상수 --- SESSION_BASE_PATH = Path(__file__).parent / "shared_sessions" EDIT_KEY = "parsed" # --- 헬퍼 함수 --- def get_evaluable_sessions(): """ "shared_sessions"에서 'groundtruth' 폴더를 포함하는 세션 목록을 가져옵니다.""" if not SESSION_BASE_PATH.exists(): return [] evaluable = [] for d in SESSION_BASE_PATH.iterdir(): if d.is_dir() and (d / "groundtruth").is_dir(): evaluable.append(d.name) return sorted(evaluable) def get_session_path(seed): return SESSION_BASE_PATH / seed def display_pdf(file_path): """PDF 파일을 iframe으로 표시합니다.""" bytes_data = file_path.read_bytes() base64_pdf = base64.b64encode(bytes_data).decode("utf-8") st.markdown( f'', unsafe_allow_html=True, ) def generate_hyp_html(ref: str, hyp: str) -> str: """ difflib.SequenceMatcher를 사용하여 가설(hyp) 텍스트의 오류를 시각화하는 HTML을 생성합니다. """ matcher = difflib.SequenceMatcher(None, ref, hyp) html_out = "" for tag, i1, i2, j1, j2 in matcher.get_opcodes(): hyp_chunk = hyp[j1:j2] hyp_chunk_display = ( hyp_chunk.replace("&", "&").replace("<", "<").replace(">", ">") ) hyp_chunk_display = hyp_chunk_display.replace("\n", "
").replace( " ", " " ) if tag == "equal": html_out += f'{hyp_chunk_display}' elif tag == "replace": html_out += f'{hyp_chunk_display}' elif tag == "insert": html_out += f'{hyp_chunk_display}' return f'
{html_out}
' def match_evaluation_files(seed): """ 세션 폴더 내에서 평가에 필요한 파일들의 목록을 찾아서 매칭합니다. """ session_path = get_session_path(seed) doc_path = session_path / "docs" gt_path = session_path / "groundtruth" paddle_path = session_path / "jsons_paddle_ocr" upstage_path = session_path / "jsons_upstage" if not all([p.is_dir() for p in [doc_path, gt_path, paddle_path, upstage_path]]): return None gt_files = {f.stem for f in gt_path.glob("*.json")} doc_map = {f.stem: f for f in doc_path.iterdir()} paddle_map = {f.stem: f for f in paddle_path.glob("*.json")} upstage_map = {f.stem: f for f in upstage_path.glob("*.json")} matched = {} for stem in sorted(list(gt_files)): if stem in doc_map and stem in paddle_map and stem in upstage_map: matched[stem] = { "doc_file": doc_map[stem], "gt_file": gt_path / f"{stem}.json", "paddle_file": paddle_map[stem], "upstage_file": upstage_map[stem], } return matched def display_evaluation_for_file(files): """선택된 파일에 대한 평가 결과를 표시합니다.""" st.header("📊 성능 평가 결과") try: with open(files["gt_file"], "r", encoding="utf-8") as f: gt_data = json.load(f) with open(files["paddle_file"], "r", encoding="utf-8") as f: paddle_data = json.load(f) with open(files["upstage_file"], "r", encoding="utf-8") as f: upstage_data = json.load(f) gt_text = (gt_data[0] if isinstance(gt_data, list) else gt_data).get( EDIT_KEY, "" ) paddle_text = ( paddle_data[0] if isinstance(paddle_data, list) else paddle_data ).get(EDIT_KEY, "") upstage_text = ( upstage_data[0] if isinstance(upstage_data, list) else upstage_data ).get(EDIT_KEY, "") if not gt_text: st.warning("정답 텍스트가 비어있어 평가할 수 없습니다.") return evaluator = OCREvaluator(gt_text) paddle_results = evaluator.evaluate(paddle_text) upstage_results = evaluator.evaluate(upstage_text) col1, col2 = st.columns(2) with col1: st.markdown("#### Model 1: Paddle OCR") m_col1, m_col2 = st.columns(2) m_col1.metric("엄격한 WER", f"{paddle_results['strict_wer']:.2%}") m_col2.metric("엄격한 CER", f"{paddle_results['strict_cer']:.2%}") m_col1.metric("유연한 WER", f"{paddle_results['flexible_wer']:.2%}") m_col2.metric("유연한 CER", f"{paddle_results['flexible_cer']:.2%}") with col2: st.markdown("#### Model 2: Upstage OCR") m_col1, m_col2 = st.columns(2) m_col1.metric("엄격한 WER", f"{upstage_results['strict_wer']:.2%}") m_col2.metric("엄격한 CER", f"{upstage_results['strict_cer']:.2%}") m_col1.metric("유연한 WER", f"{upstage_results['flexible_wer']:.2%}") m_col2.metric("유연한 CER", f"{upstage_results['flexible_cer']:.2%}") with st.expander("상세 텍스트 비교", expanded=True): st.markdown( """ 범례 (Legend)
일치하는 텍스트
삽입된 텍스트 (정답에 없음)
치환된 텍스트 (정답과 다름)
""", unsafe_allow_html=True, ) st.markdown("---") text_col1, text_col2, text_col3 = st.columns(3) with text_col1: st.text("정답 (Ground Truth)") st.code(gt_text, language=None) with text_col2: st.text("Paddle OCR") html = generate_hyp_html(gt_text, paddle_text) st.markdown(html, unsafe_allow_html=True) with text_col3: st.text("Upstage OCR") html = generate_hyp_html(gt_text, upstage_text) st.markdown(html, unsafe_allow_html=True) except Exception as e: st.error(f"성능 평가 중 오류 발생: {e}") @st.cache_data def generate_all_results_df(_matched_files): """세션의 모든 파일에 대한 평가 결과를 집계하여 DataFrame으로 반환합니다.""" all_results = [] for basename, files in _matched_files.items(): try: with open(files["gt_file"], "r", encoding="utf-8") as f: gt_data = json.load(f) with open(files["paddle_file"], "r", encoding="utf-8") as f: paddle_data = json.load(f) with open(files["upstage_file"], "r", encoding="utf-8") as f: upstage_data = json.load(f) gt_text = (gt_data[0] if isinstance(gt_data, list) else gt_data).get( EDIT_KEY, "" ) if not gt_text: continue evaluator = OCREvaluator(gt_text) # Paddle 모델 평가 paddle_text = ( paddle_data[0] if isinstance(paddle_data, list) else paddle_data ).get(EDIT_KEY, "") paddle_results = evaluator.evaluate(paddle_text) paddle_results["model"] = "paddle_ocr" paddle_results["file"] = basename all_results.append(paddle_results) # Upstage 모델 평가 upstage_text = ( upstage_data[0] if isinstance(upstage_data, list) else upstage_data ).get(EDIT_KEY, "") upstage_results = evaluator.evaluate(upstage_text) upstage_results["model"] = "upstage_ocr" upstage_results["file"] = basename all_results.append(upstage_results) except Exception: # 오류가 있는 파일은 건너뜀 continue df = pd.DataFrame(all_results) # 컬럼 순서 재정렬 ordered_cols = [ "file", "model", "strict_wer", "strict_cer", "flexible_wer", "flexible_cer", "word_hits", "word_substitutions", "word_deletions", "word_insertions", "char_hits", "char_substitutions", "char_deletions", "char_insertions", ] return df[ordered_cols] # --- 콜백 함수 --- def handle_nav_button(direction, total_files): if direction == "prev" and st.session_state.eval_current_index > 0: st.session_state.eval_current_index -= 1 elif direction == "next" and st.session_state.eval_current_index < total_files - 1: st.session_state.eval_current_index += 1 def handle_selectbox_change(): st.session_state.eval_current_index = st.session_state.eval_selectbox_key # --- 메인 UI 로직 --- def main(): st.set_page_config(layout="wide", page_title="OCR 성능 평가 도구") st.title("OCR 성능 평가 도구") if "eval_current_index" not in st.session_state: st.session_state.eval_current_index = 0 st.sidebar.header("세션 선택") sessions = get_evaluable_sessions() if not sessions: st.info( "평가 가능한 세션이 없습니다. 먼저 '정답셋 생성 도구'에서 정답셋을 생성해주세요." ) return seed = st.sidebar.selectbox("평가할 세션을 선택하세요.", sessions) if not seed: st.info("사이드바에서 평가할 세션을 선택하세요.") return matched_files = match_evaluation_files(seed) if matched_files is None: st.error( f"'{seed}'에 해당하는 세션을 찾을 수 없거나, 필요한 폴더(docs, groundtruth 등)가 없습니다." ) return if not matched_files: st.warning("해당 세션에 평가할 파일(정답셋이 생성된 파일)이 없습니다.") return sorted_basenames = sorted(list(matched_files.keys())) if st.session_state.eval_current_index >= len(sorted_basenames): st.session_state.eval_current_index = 0 st.sidebar.header("파일 선택") st.sidebar.selectbox( "평가할 파일을 선택하세요.", options=range(len(sorted_basenames)), format_func=lambda x: f"{x+1}. {sorted_basenames[x]}", index=st.session_state.eval_current_index, key="eval_selectbox_key", on_change=handle_selectbox_change, ) st.sidebar.header("보기 옵션") hide_document = st.sidebar.checkbox("원본 문서 숨기기", value=False) st.sidebar.header("내보내기") results_df = generate_all_results_df(matched_files) if not results_df.empty: csv = results_df.to_csv(index=False).encode("utf-8") st.sidebar.download_button( label="전체 결과 CSV 다운로드", data=csv, file_name=f"evaluation_results_{seed}.csv", mime="text/csv", ) else: st.sidebar.write("다운로드할 결과가 없습니다.") current_basename = sorted_basenames[st.session_state.eval_current_index] nav_cols = st.columns([1, 5, 1]) nav_cols[0].button( "◀ 이전", on_click=handle_nav_button, args=("prev", len(sorted_basenames)), use_container_width=True, ) nav_cols[1].markdown( f"

{current_basename} ({st.session_state.eval_current_index + 1}/{len(sorted_basenames)})

", unsafe_allow_html=True, ) nav_cols[2].button( "다음 ▶", on_click=handle_nav_button, args=("next", len(sorted_basenames)), use_container_width=True, ) st.markdown("---") files_to_evaluate = matched_files[current_basename] if hide_document: display_evaluation_for_file(files_to_evaluate) else: col1, col2 = st.columns([1, 1]) with col1: st.header("📄 원본 문서") doc_file = files_to_evaluate["doc_file"] if doc_file.suffix.lower() == ".pdf": display_pdf(doc_file) else: st.image(str(doc_file), use_container_width=True) with col2: display_evaluation_for_file(files_to_evaluate) if __name__ == "__main__": main()