version: '3.8' services: call-center-ai: build: . container_name: call_center_engine restart: always ports: - "8000:8000" environment: # Team will paste the token here or in a .env file - HF_TOKEN=${HF_TOKEN} deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] volumes: # Optional: Persist models so they don't re-download on restart - huggingface_cache:/root/.cache/huggingface volumes: huggingface_cache: