services: ollama-demand: image: ollama/ollama:latest container_name: ollama-demand ports: - "11435:11434" # port pour chat / RAG environment: - OLLAMA_HOST=0.0.0.0 # 👈 important pour accepter les connexions externes runtime: nvidia deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] # Monte les modèles dans le home du conteneur pour qu'Ollama les détecte volumes: # 📂 Modèles Ollama (commun entre les 2 images) - /usr/share/ollama/.ollama/models:/root/.ollama/models:ro command: serve ollama-batch: image: ollama/ollama:latest container_name: ollama-batch ports: - "11436:11434" # port hôte 11435 → port conteneur 11434 environment: - OLLAMA_HOST=0.0.0.0 # 👈 important pour accepter les connexions externes runtime: nvidia deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] volumes: # 📂 Modèles Ollama (commun entre les 2 images) - /usr/share/ollama/.ollama/models:/root/.ollama/models:ro command: serve fooocus-api: image: konieshadow/fooocus-api:latest container_name: fooocus-api ports: - "8888:8888" environment: # GPU config NVIDIA_VISIBLE_DEVICES: all NVIDIA_DRIVER_CAPABILITIES: compute,utility runtime: nvidia deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] volumes: # 📂 Modèles fooocus-api - ./models/checkpoints:/app/models/checkpoints - ./models/vae:/app/models/vae - ./models/upscale_models:/app/models/upscale_models - ./models/loras:/app/models/loras # 📂 Résultats fooocus-api - ./outputs:/app/outputs command: python3 main.py --host 0.0.0.0 --port 8888 --skip-pip restart: unless-stopped