확장자, 포트 수정

This commit is contained in:
2025-02-14 12:13:05 +09:00
parent 0d34c20882
commit c6a71467e8
6 changed files with 43 additions and 14 deletions

View File

@@ -14,6 +14,6 @@ RUN uv self update
COPY requirements.txt .
RUN uv pip install --no-cache-dir -r requirements.txt --system
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
#CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -2,26 +2,48 @@ version: "3.9"
services:
api:
build: .
command: uvicorn main:app --host 0.0.0.0 --port 8000
command: uvicorn main:app --host 0.0.0.0 --port 8010
volumes:
- ./workspace:/opt/workspace
- ./cache:/root/.cache/
ports:
- "8000:8000"
- "8010:8010"
depends_on:
- redis
networks:
- app_network
- rag_data_network
worker:
build: .
volumes:
- ./workspace:/opt/workspace
- ./cache:/root/.cache/
command: python3 worker.py
depends_on:
- redis
networks:
- app_network
- rag_data_network
redis:
volumes:
- ./workspace:/opt/workspace
- ./cache:/root/.cache/
image: redis:6
ports:
- "6379:6379"
networks:
- app_network
- rag_data_network
rq-dashboard:
volumes:
- ./workspace:/opt/workspace
- ./cache:/root/.cache/
image: eoranged/rq-dashboard
ports:
- "9181:9181"
environment:
- RQ_DASHBOARD_REDIS_URL=redis://redis:6379
depends_on:
- redis
networks:
- rag_data_network
networks:
app_network:
rag_data_network:
driver: bridge

View File

@@ -5,5 +5,6 @@ UPLOAD_DIR = os.getenv("UPLOAD_DIR", "data")
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "converted")
# Redis 연결 정보
# local 연결시 redis -> localhost로 변경
REDIS_HOST = os.getenv("REDIS_HOST", "redis")
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))

View File

@@ -13,21 +13,19 @@ def convert_hwp_to_md(input_path: str, output_path: str):
for doc in docs:
try:
text = doc.page_content if hasattr(doc, "page_content") else str(doc)
text = text.encode("utf-8", "ignore").decode(
"utf-8"
) # UTF-8로 변환하면서 깨진 문자 제거
text = text.encode("utf-8", "ignore").decode("utf-8")
docs_as_text.append(text)
except Exception as e:
print(f"인코딩 변환 중 오류 발생: {e}")
with open(output_path, "w", encoding="utf-8") as f:
f.write("\n".join(docs_as_text)) # ✅ 변환된 리스트를 파일에 저장
f.write("\n".join(docs_as_text))
return None
def convert_to_md(input_path: str, output_path: str):
md = MarkItDown(docintel_endpoint="<document_intelligence_endpoint>")
md = MarkItDown()
result = md.convert(input_path)
with open(output_path, "w", encoding="utf-8") as f:
f.write(result.text_content)

View File

@@ -49,7 +49,15 @@ async def convert_files():
for file in files:
file_ext = file.split(".")[-1].lower()
if file_ext in ["txt", "html", "docx", "pdf", "hwp"]:
if file_ext in [
"txt",
"html",
"pdf",
"hwp",
"pptx",
"xlsx",
"docx",
]:
task = {
"filename": file,
"extension": file_ext,

View File

@@ -34,5 +34,5 @@ def process_task(task):
if __name__ == "__main__":
listen = ["task_queue1"]
worker = Worker(listen, connection=redis_client) # 최신 방식
worker = Worker(listen, connection=redis_client)
worker.work()