75 lines
1.6 KiB
YAML
75 lines
1.6 KiB
YAML
version: "3.8"
|
|
|
|
services:
|
|
llm-asyncio:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
shm_size: "1000gb"
|
|
volumes:
|
|
- ./workspace:/opt/workspace/
|
|
- ./cache:/root/.cache/
|
|
- ../model:/opt/model/
|
|
environment:
|
|
PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
|
|
PYTHONPATH: /opt/workspace/
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: "all"
|
|
capabilities: [gpu]
|
|
container_name: llm-asyncio
|
|
ports:
|
|
- "8000:8000"
|
|
networks:
|
|
- llm-network
|
|
entrypoint: >
|
|
/bin/bash -c "
|
|
uvicorn main:app --reload --host 0.0.0.0 --port 8000
|
|
"
|
|
tty: true
|
|
|
|
redis:
|
|
image: redis:latest
|
|
container_name: redis-server
|
|
ports:
|
|
- "6380:6380"
|
|
restart: always
|
|
networks:
|
|
- llm-network
|
|
|
|
worker:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
shm_size: "1000gb"
|
|
volumes:
|
|
- ./workspace:/opt/workspace/
|
|
- ./cache:/root/.cache/
|
|
- ../model:/opt/model/
|
|
environment:
|
|
PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
|
|
PYTHONPATH: /opt/workspace/
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: "all"
|
|
capabilities: [gpu]
|
|
networks:
|
|
- llm-network
|
|
entrypoint: >
|
|
/bin/bash -c "
|
|
python /opt/workspace/worker.py
|
|
"
|
|
restart: always
|
|
tty: true
|
|
scale: 2
|
|
|
|
networks:
|
|
llm-network:
|
|
driver: bridge
|