확장자, 포트 수정
This commit is contained in:
@@ -14,6 +14,6 @@ RUN uv self update
|
|||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN uv pip install --no-cache-dir -r requirements.txt --system
|
RUN uv pip install --no-cache-dir -r requirements.txt --system
|
||||||
|
|
||||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
#CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,26 +2,48 @@ version: "3.9"
|
|||||||
services:
|
services:
|
||||||
api:
|
api:
|
||||||
build: .
|
build: .
|
||||||
command: uvicorn main:app --host 0.0.0.0 --port 8000
|
command: uvicorn main:app --host 0.0.0.0 --port 8010
|
||||||
|
volumes:
|
||||||
|
- ./workspace:/opt/workspace
|
||||||
|
- ./cache:/root/.cache/
|
||||||
ports:
|
ports:
|
||||||
- "8000:8000"
|
- "8010:8010"
|
||||||
depends_on:
|
depends_on:
|
||||||
- redis
|
- redis
|
||||||
networks:
|
networks:
|
||||||
- app_network
|
- rag_data_network
|
||||||
worker:
|
worker:
|
||||||
build: .
|
build: .
|
||||||
|
volumes:
|
||||||
|
- ./workspace:/opt/workspace
|
||||||
|
- ./cache:/root/.cache/
|
||||||
command: python3 worker.py
|
command: python3 worker.py
|
||||||
depends_on:
|
depends_on:
|
||||||
- redis
|
- redis
|
||||||
networks:
|
networks:
|
||||||
- app_network
|
- rag_data_network
|
||||||
redis:
|
redis:
|
||||||
|
volumes:
|
||||||
|
- ./workspace:/opt/workspace
|
||||||
|
- ./cache:/root/.cache/
|
||||||
image: redis:6
|
image: redis:6
|
||||||
ports:
|
ports:
|
||||||
- "6379:6379"
|
- "6379:6379"
|
||||||
networks:
|
networks:
|
||||||
- app_network
|
- rag_data_network
|
||||||
|
rq-dashboard:
|
||||||
|
volumes:
|
||||||
|
- ./workspace:/opt/workspace
|
||||||
|
- ./cache:/root/.cache/
|
||||||
|
image: eoranged/rq-dashboard
|
||||||
|
ports:
|
||||||
|
- "9181:9181"
|
||||||
|
environment:
|
||||||
|
- RQ_DASHBOARD_REDIS_URL=redis://redis:6379
|
||||||
|
depends_on:
|
||||||
|
- redis
|
||||||
|
networks:
|
||||||
|
- rag_data_network
|
||||||
networks:
|
networks:
|
||||||
app_network:
|
rag_data_network:
|
||||||
driver: bridge
|
driver: bridge
|
||||||
@@ -5,5 +5,6 @@ UPLOAD_DIR = os.getenv("UPLOAD_DIR", "data")
|
|||||||
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "converted")
|
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "converted")
|
||||||
|
|
||||||
# Redis 연결 정보
|
# Redis 연결 정보
|
||||||
|
# local 연결시 redis -> localhost로 변경
|
||||||
REDIS_HOST = os.getenv("REDIS_HOST", "redis")
|
REDIS_HOST = os.getenv("REDIS_HOST", "redis")
|
||||||
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
|
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
|
||||||
|
|||||||
@@ -13,21 +13,19 @@ def convert_hwp_to_md(input_path: str, output_path: str):
|
|||||||
for doc in docs:
|
for doc in docs:
|
||||||
try:
|
try:
|
||||||
text = doc.page_content if hasattr(doc, "page_content") else str(doc)
|
text = doc.page_content if hasattr(doc, "page_content") else str(doc)
|
||||||
text = text.encode("utf-8", "ignore").decode(
|
text = text.encode("utf-8", "ignore").decode("utf-8")
|
||||||
"utf-8"
|
|
||||||
) # UTF-8로 변환하면서 깨진 문자 제거
|
|
||||||
docs_as_text.append(text)
|
docs_as_text.append(text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"인코딩 변환 중 오류 발생: {e}")
|
print(f"인코딩 변환 중 오류 발생: {e}")
|
||||||
|
|
||||||
with open(output_path, "w", encoding="utf-8") as f:
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
f.write("\n".join(docs_as_text)) # ✅ 변환된 리스트를 파일에 저장
|
f.write("\n".join(docs_as_text))
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def convert_to_md(input_path: str, output_path: str):
|
def convert_to_md(input_path: str, output_path: str):
|
||||||
md = MarkItDown(docintel_endpoint="<document_intelligence_endpoint>")
|
md = MarkItDown()
|
||||||
result = md.convert(input_path)
|
result = md.convert(input_path)
|
||||||
with open(output_path, "w", encoding="utf-8") as f:
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
f.write(result.text_content)
|
f.write(result.text_content)
|
||||||
|
|||||||
@@ -49,7 +49,15 @@ async def convert_files():
|
|||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
file_ext = file.split(".")[-1].lower()
|
file_ext = file.split(".")[-1].lower()
|
||||||
if file_ext in ["txt", "html", "docx", "pdf", "hwp"]:
|
if file_ext in [
|
||||||
|
"txt",
|
||||||
|
"html",
|
||||||
|
"pdf",
|
||||||
|
"hwp",
|
||||||
|
"pptx",
|
||||||
|
"xlsx",
|
||||||
|
"docx",
|
||||||
|
]:
|
||||||
task = {
|
task = {
|
||||||
"filename": file,
|
"filename": file,
|
||||||
"extension": file_ext,
|
"extension": file_ext,
|
||||||
|
|||||||
@@ -34,5 +34,5 @@ def process_task(task):
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
listen = ["task_queue1"]
|
listen = ["task_queue1"]
|
||||||
|
|
||||||
worker = Worker(listen, connection=redis_client) # 최신 방식
|
worker = Worker(listen, connection=redis_client)
|
||||||
worker.work()
|
worker.work()
|
||||||
|
|||||||
Reference in New Issue
Block a user