version: "3.8" services: llm-asyncio: build: context: . dockerfile: Dockerfile shm_size: "1000gb" volumes: - ./workspace:/opt/workspace/ - ./cache:/root/.cache/ - ../model:/opt/model/ environment: PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True PYTHONPATH: /opt/workspace/ deploy: resources: reservations: devices: - driver: nvidia count: "all" capabilities: [gpu] container_name: llm-asyncio ports: - "8000:8000" networks: - llm-network entrypoint: > /bin/bash -c " uvicorn main:app --reload --host 0.0.0.0 --port 8000 " tty: true redis: image: redis:latest container_name: redis-server ports: - "6380:6380" restart: always networks: - llm-network worker: build: context: . dockerfile: Dockerfile shm_size: "1000gb" volumes: - ./workspace:/opt/workspace/ - ./cache:/root/.cache/ - ../model:/opt/model/ environment: PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True PYTHONPATH: /opt/workspace/ deploy: resources: reservations: devices: - driver: nvidia count: "all" capabilities: [gpu] networks: - llm-network entrypoint: > /bin/bash -c " python /opt/workspace/worker.py " restart: always tty: true scale: 2 networks: llm-network: driver: bridge