파일분류, csv output, 세션유지 기능 추가

2025-08-07 11:15:51 +09:00
parent 47b7ecf34e
commit 1b96840c83
5 changed files with 328 additions and 81 deletions
--- a/workspace/show_summary.py
+++ b/workspace/show_summary.py
@@ -0,0 +1,92 @@
+# workspace/show_summary.py
+import os
+import json
+import argparse
+import pandas as pd
+
+def generate_summary(directory_path):
+    """
+    지정된 디렉터리에서 모든 JSON 파일을 읽어 요약 정보를 추출하고,
+    pandas DataFrame으로 반환합니다.
+    """
+    summary_data = []
+    
+    if not os.path.isdir(directory_path):
+        print(f"오류: 디렉터리를 찾을 수 없습니다 - {directory_path}")
+        return None
+
+    for filename in sorted(os.listdir(directory_path)):
+        if filename.endswith('.json'):
+            file_path = os.path.join(directory_path, filename)
+            
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                
+                # JSON 파일이 리스트 형태이므로 첫 번째 항목을 사용
+                if isinstance(data, list) and data:
+                    item = data[0]
+                else:
+                    # 예상치 못한 형식이면 건너뛰기
+                    continue
+
+                # 필요한 정보 추출
+                row_data = {
+                    'filename': item.get('filename'),
+                    'duration_sec': item.get('time', {}).get('duration_sec')
+                }
+                
+                # 'processed' 딕셔너리의 모든 키-값을 row_data에 추가
+                processed_info = item.get('processed', {})
+                if isinstance(processed_info, dict):
+                    row_data.update(processed_info)
+                
+                summary_data.append(row_data)
+
+            except (json.JSONDecodeError, IndexError) as e:
+                print(f"파일 처리 중 오류 발생 ({filename}): {e}")
+            except Exception as e:
+                print(f"알 수 없는 오류 발생 ({filename}): {e}")
+
+    if not summary_data:
+        print("처리할 JSON 파일이 없습니다.")
+        return None
+        
+    return pd.DataFrame(summary_data)
+
+def main():
+    """메인 실행 함수"""
+    parser = argparse.ArgumentParser(description="JSON 파일들을 읽어 요약 테이블을 생성하고 CSV로 저장하는 스크립트")
+    parser.add_argument("input_dir", help="JSON 파일들이 포함된 입력 디렉터리 경로")
+    parser.add_argument("-o", "--output", help="요약 결과를 저장할 CSV 파일 경로")
+    args = parser.parse_args()
+
+    # pandas 출력 옵션 설정
+    pd.set_option('display.max_rows', 500)
+    pd.set_option('display.max_columns', 50)
+    pd.set_option('display.width', 200)
+    
+    summary_df = generate_summary(args.input_dir)
+    
+    if summary_df is not None:
+        print("\n--- JSON 처리 결과 요약 ---")
+        print(summary_df)
+        print("\n")
+
+        # CSV 파일로 저장하는 로직 추가
+        if args.output:
+            output_path = args.output
+            # 파일명에 .csv 확장자가 없으면 자동으로 추가
+            if not output_path.lower().endswith('.csv'):
+                output_path += '.csv'
+
+            try:
+                # CSV 파일 저장 시 Excel에서 한글이 깨지지 않도록 'utf-8-sig' 인코딩 사용
+                summary_df.to_csv(output_path, index=False, encoding='utf-8-sig')
+                print(f"요약 결과가 '{output_path}' 파일로 성공적으로 저장되었습니다.")
+            except Exception as e:
+                print(f"CSV 파일 저장 중 오류가 발생했습니다: {e}")
+            print("\n")
+
+if __name__ == "__main__":
+    main()