From c6a71467e8191f51016c9726fa737fc759e9ef63 Mon Sep 17 00:00:00 2001 From: chan Date: Fri, 14 Feb 2025 12:13:05 +0900 Subject: [PATCH] =?UTF-8?q?=ED=99=95=EC=9E=A5=EC=9E=90,=20=ED=8F=AC?= =?UTF-8?q?=ED=8A=B8=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 2 +- docker-compose.yml | 34 ++++++++++++++++++++++++++++------ workspace/config.py | 1 + workspace/convert_obj_to_md.py | 8 +++----- workspace/main.py | 10 +++++++++- workspace/worker.py | 2 +- 6 files changed, 43 insertions(+), 14 deletions(-) diff --git a/Dockerfile b/Dockerfile index 91ce622..f7746a9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,6 @@ RUN uv self update COPY requirements.txt . RUN uv pip install --no-cache-dir -r requirements.txt --system -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] +#CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/docker-compose.yml b/docker-compose.yml index a214911..ca59cec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,26 +2,48 @@ version: "3.9" services: api: build: . - command: uvicorn main:app --host 0.0.0.0 --port 8000 + command: uvicorn main:app --host 0.0.0.0 --port 8010 + volumes: + - ./workspace:/opt/workspace + - ./cache:/root/.cache/ ports: - - "8000:8000" + - "8010:8010" depends_on: - redis networks: - - app_network + - rag_data_network worker: build: . + volumes: + - ./workspace:/opt/workspace + - ./cache:/root/.cache/ command: python3 worker.py depends_on: - redis networks: - - app_network + - rag_data_network redis: + volumes: + - ./workspace:/opt/workspace + - ./cache:/root/.cache/ image: redis:6 ports: - "6379:6379" networks: - - app_network + - rag_data_network + rq-dashboard: + volumes: + - ./workspace:/opt/workspace + - ./cache:/root/.cache/ + image: eoranged/rq-dashboard + ports: + - "9181:9181" + environment: + - RQ_DASHBOARD_REDIS_URL=redis://redis:6379 + depends_on: + - redis + networks: + - rag_data_network networks: - app_network: + rag_data_network: driver: bridge \ No newline at end of file diff --git a/workspace/config.py b/workspace/config.py index 1843f0d..0fb4e59 100644 --- a/workspace/config.py +++ b/workspace/config.py @@ -5,5 +5,6 @@ UPLOAD_DIR = os.getenv("UPLOAD_DIR", "data") OUTPUT_DIR = os.getenv("OUTPUT_DIR", "converted") # Redis 연결 정보 +# local 연결시 redis -> localhost로 변경 REDIS_HOST = os.getenv("REDIS_HOST", "redis") REDIS_PORT = int(os.getenv("REDIS_PORT", "6379")) diff --git a/workspace/convert_obj_to_md.py b/workspace/convert_obj_to_md.py index 910cb98..2f790c8 100644 --- a/workspace/convert_obj_to_md.py +++ b/workspace/convert_obj_to_md.py @@ -13,21 +13,19 @@ def convert_hwp_to_md(input_path: str, output_path: str): for doc in docs: try: text = doc.page_content if hasattr(doc, "page_content") else str(doc) - text = text.encode("utf-8", "ignore").decode( - "utf-8" - ) # UTF-8로 변환하면서 깨진 문자 제거 + text = text.encode("utf-8", "ignore").decode("utf-8") docs_as_text.append(text) except Exception as e: print(f"인코딩 변환 중 오류 발생: {e}") with open(output_path, "w", encoding="utf-8") as f: - f.write("\n".join(docs_as_text)) # ✅ 변환된 리스트를 파일에 저장 + f.write("\n".join(docs_as_text)) return None def convert_to_md(input_path: str, output_path: str): - md = MarkItDown(docintel_endpoint="") + md = MarkItDown() result = md.convert(input_path) with open(output_path, "w", encoding="utf-8") as f: f.write(result.text_content) diff --git a/workspace/main.py b/workspace/main.py index aedcce8..9d48f82 100644 --- a/workspace/main.py +++ b/workspace/main.py @@ -49,7 +49,15 @@ async def convert_files(): for file in files: file_ext = file.split(".")[-1].lower() - if file_ext in ["txt", "html", "docx", "pdf", "hwp"]: + if file_ext in [ + "txt", + "html", + "pdf", + "hwp", + "pptx", + "xlsx", + "docx", + ]: task = { "filename": file, "extension": file_ext, diff --git a/workspace/worker.py b/workspace/worker.py index abdf3d0..fd59f86 100644 --- a/workspace/worker.py +++ b/workspace/worker.py @@ -34,5 +34,5 @@ def process_task(task): if __name__ == "__main__": listen = ["task_queue1"] - worker = Worker(listen, connection=redis_client) # 최신 방식 + worker = Worker(listen, connection=redis_client) worker.work()