first commit

2025-03-27 16:11:09 +09:00
parent bd308ea2df
commit 1d1d4e62b2
9 changed files with 300 additions and 5 deletions
--- a/kcs_insert.py
+++ b/kcs_insert.py
@@ -0,0 +1,40 @@
+import csv
+import os
+
+from elasticsearch import Elasticsearch
+from elasticsearch.helpers import bulk
+
+# Elasticsearch 클라이언트 생성
+es = Elasticsearch("http://localhost:9200")
+
+# CSV 파일이 있는 폴더 경로 설정
+csv_folder = r"split_KCS"  # CSV 파일들이 들어 있는 폴더
+
+
+# Elasticsearch에 데이터 삽입 함수
+def index_csv_files(folder_path, index_name):
+    docs = []  # bulk 삽입을 위한 리스트
+
+    # 폴더 내의 모든 CSV 파일 찾기
+    for filename in os.listdir(folder_path):
+        if filename.endswith(".csv"):  # CSV 파일만 처리
+            file_path = os.path.join(folder_path, filename)
+            print(f"📂 {filename} 처리 중...")
+
+            # CSV 파일 읽기
+            with open(file_path, mode="r", encoding="utf-8") as file:
+                csv_reader = csv.DictReader(file)
+                for row in csv_reader:
+                    # Elasticsearch 문서 형태로 변환
+                    docs.append({"_index": index_name, "_source": row})
+
+            print(f"✅ {filename} 처리 완료")
+
+    # bulk API를 이용해 한 번에 Elasticsearch에 삽입
+    if docs:
+        bulk(es, docs)
+        print(f"🚀 총 {len(docs)}개의 문서 삽입 완료!")
+
+
+# 실행
+index_csv_files(csv_folder, "my-user-index")