AI: Add "open-webui" service to compose.yaml (development only)

Signed-off-by: Michael Mayer <michael@photoprism.app>
2025-12-12 00:34:13 +01:00 · 2025-09-12 15:28:59 +02:00
parent cc93144bcb
commit e45a23f9a4
2 changed files with 103 additions and 56 deletions
--- a/compose.nvidia.yaml
+++ b/compose.nvidia.yaml
@@ -153,7 +153,7 @@ services:
              capabilities: [gpu]
              count: "all"

-  ## Ollama Large-Language Model Runner (optional)
+  ## Ollama Large-Language Model Runner
  ## Run "ollama pull [name]:[version]" to download a vision model
  ## listed at <https://ollama.com/search?c=vision>, for example:
  ## docker compose exec ollama ollama pull gemma3:latest
@@ -161,13 +161,20 @@ services:
    image: ollama/ollama:latest
    restart: unless-stopped
    stop_grace_period: 15s
-    ## Only starts this service if the "vision" or "all" profile is specified::
-    ## docker compose --profile vision up -d
-    profiles: [ "all", "vision" ]
+    ## Only starts this service if the "all", "ollama", or "vision" profile is specified::
+    ## docker compose --profile ollama up -d
+    profiles: ["all", "ollama", "vision"]
    ## Insecurely exposes the Ollama service on port 11434
    ## without authentication (for private networks only):
    # ports:
    #  - "11434:11434"
+    labels:
+      - "traefik.enable=true"
+      - "traefik.docker.network=photoprism"
+      - "traefik.http.services.ollama.loadbalancer.server.port=11434"
+      - "traefik.http.routers.ollama.rule=Host(`ollama.localssl.dev`)"
+      - "traefik.http.routers.ollama.entrypoints=websecure"
+      - "traefik.http.routers.ollama.tls=true"
    environment:
      ## Ollama Configuration Options:
      OLLAMA_HOST: "0.0.0.0:11434"
@@ -191,7 +198,7 @@ services:
      NVIDIA_VISIBLE_DEVICES: "all"
      NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
    volumes:
-      - "./storage/ollama:/root/.ollama"
+      - "./storage/services/ollama:/root/.ollama"
    ## NVIDIA GPU Hardware Acceleration (see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html):
    deploy:
      resources:
@@ -209,6 +216,10 @@ services:
    extends:
      file: ./compose.yaml
      service: qdrant
+  open-webui:
+    extends:
+      file: ./compose.yaml
+      service: open-webui
  photoprism-vision:
    extends:
      file: ./compose.yaml
--- a/compose.yaml
+++ b/compose.yaml
@@ -199,15 +199,99 @@ services:
      - 6334
      - 6335
    volumes:
-      - ./.qdrant.yaml:/qdrant/config/production.yaml
-      - ./storage/qdrant:/qdrant/storage
+      - "./.qdrant.yaml:/qdrant/config/production.yaml"
+      - "./storage/services/qdrant:/qdrant/storage"
+
+  ## Ollama Large-Language Model Runner
+  ## Run "ollama pull [name]:[version]" to download a vision model
+  ## listed at <https://ollama.com/search?c=vision>, for example:
+  ## docker compose exec ollama ollama pull gemma3:latest
+  ollama:
+    image: ollama/ollama:latest
+    restart: unless-stopped
+    stop_grace_period: 10s
+    ## Only starts this service if the "all", "ollama", or "vision" profile is specified::
+    ## docker compose --profile ollama up -d
+    profiles: ["all", "ollama", "vision"]
+    ## Insecurely exposes the Ollama service on port 11434
+    ## without authentication (for private networks only):
+    # ports:
+    #  - "11434:11434"
+    labels:
+      - "traefik.enable=true"
+      - "traefik.docker.network=photoprism"
+      - "traefik.http.services.ollama.loadbalancer.server.port=11434"
+      - "traefik.http.routers.ollama.rule=Host(`ollama.localssl.dev`)"
+      - "traefik.http.routers.ollama.entrypoints=websecure"
+      - "traefik.http.routers.ollama.tls=true"
+    environment:
+      ## Ollama Configuration Options:
+      OLLAMA_HOST: "0.0.0.0:11434"
+      OLLAMA_MODELS: "/root/.ollama"   # model storage path (see volumes section below)
+      OLLAMA_MAX_QUEUE: "100"          # maximum number of queued requests
+      OLLAMA_NUM_PARALLEL: "1"         # maximum number of parallel requests
+      OLLAMA_MAX_LOADED_MODELS: "1"    # maximum number of loaded models per GPU
+      OLLAMA_LOAD_TIMEOUT: "5m"        # maximum time for loading models (default "5m")
+      OLLAMA_KEEP_ALIVE: "5m"          # duration that models stay loaded in memory (default "5m")
+      OLLAMA_CONTEXT_LENGTH: "4096"    # maximum input context length
+      OLLAMA_MULTIUSER_CACHE: "false"  # optimize prompt caching for multi-user scenarios
+      OLLAMA_NOPRUNE: "false"          # disables pruning of model blobs at startup
+      OLLAMA_NOHISTORY: "true"         # disables readline history
+      OLLAMA_FLASH_ATTENTION: "false"  # enables the experimental flash attention feature
+      OLLAMA_KV_CACHE_TYPE: "f16"      # cache quantization (f16, q8_0, or q4_0)
+      OLLAMA_SCHED_SPREAD: "false"     # allows scheduling models across all GPUs.
+      OLLAMA_NEW_ENGINE: "true"        # enables the new Ollama engine
+      # OLLAMA_DEBUG: "true"             # shows additional debug information
+      # OLLAMA_INTEL_GPU: "true"         # enables experimental Intel GPU detection
+      ## NVIDIA GPU Hardware Acceleration (optional):
+      # NVIDIA_VISIBLE_DEVICES: "all"
+      # NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
+    volumes:
+      - "./storage/services/ollama:/root/.ollama"
+    ## NVIDIA GPU Hardware Acceleration (optional):
+    # deploy:
+    #  resources:
+    #    reservations:
+    #      devices:
+    #        - driver: "nvidia"
+    #          capabilities: [ gpu ]
+    #          count: "all"
+
+  ## Open WebUI, an extensible and user-friendly AI platform:
+  ## https://github.com/open-webui/open-webui
+  open-webui:
+    image: ghcr.io/open-webui/open-webui:main
+    restart: unless-stopped
+    stop_grace_period: 10s
+    ## Only starts this service if the "all", "ollama", "open-webui", or "vision" profile is specified::
+    ## docker compose --profile ollama up -d
+    profiles: [ "all", "ollama", "open-webui", "vision" ]
+    ports:
+      - "127.0.0.1:8080:8080"
+    labels:
+      - "traefik.enable=true"
+      - "traefik.docker.network=photoprism"
+      - "traefik.http.services.open-webui.loadbalancer.server.port=8080"
+      - "traefik.http.routers.open-webui.rule=Host(`chat.localssl.dev`) || Host(`open-webui.localssl.dev`) || Host(`ollama-ui.localssl.dev`)"
+      - "traefik.http.routers.open-webui.entrypoints=websecure"
+      - "traefik.http.routers.open-webui.tls=true"
+    environment:
+      WEBUI_URL: "https://chat.localssl.dev"
+      # WEBUI_SECRET_KEY: "AiBo5eeY3aeJami3ro7ahtohh6Xoh4fed8aid4feighaiYoa"
+      OLLAMA_BASE_URL: "http://ollama:11434"
+      ANONYMIZED_TELEMETRY: "false"   # disable Chroma telemetry
+      HF_HUB_DISABLE_TELEMETRY: "1"   # disable Hugging Face telemetry
+      # HUGGING_FACE_HUB_TOKEN: ""      # see https://huggingface.co/docs/hub/en/security-tokens
+    volumes:
+      - "./storage/services/open-webui:/app/backend/data"

  ## PhotoPrism® Computer Vision API
  ## See: https://github.com/photoprism/photoprism-vision
  photoprism-vision:
    image: photoprism/vision:latest
    entrypoint: [ "/app/venv/bin/flask" ]
-    command: [ "--app", "app", "run", "--debug", "--host", "0.0.0.0" ]
+    ## Only starts this service if the "all" or "vision" profile is specified::
+    ## docker compose --profile vision up -d
    profiles: ["all", "vision"]
    stop_grace_period: 15s
    working_dir: "/app"
@@ -233,54 +317,6 @@ services:
      OLLAMA_ENABLED: "true"
      OLLAMA_HOST: "http://ollama:11434"

-  ## Ollama Large-Language Model Runner (optional)
-  ## Run "ollama pull [name]:[version]" to download a vision model
-  ## listed at <https://ollama.com/search?c=vision>, for example:
-  ## docker compose exec ollama ollama pull gemma3:latest
-  ollama:
-    image: ollama/ollama:latest
-    restart: unless-stopped
-    stop_grace_period: 15s
-    ## Only starts this service if the "vision" or "all" profile is specified::
-    ## docker compose --profile vision up -d
-    profiles: ["all", "vision"]
-    ## Insecurely exposes the Ollama service on port 11434
-    ## without authentication (for private networks only):
-    # ports:
-    #  - "11434:11434"
-    environment:
-      ## Ollama Configuration Options:
-      OLLAMA_HOST: "0.0.0.0:11434"
-      OLLAMA_MODELS: "/root/.ollama"   # model storage path (see volumes section below)
-      OLLAMA_MAX_QUEUE: "100"          # maximum number of queued requests
-      OLLAMA_NUM_PARALLEL: "1"         # maximum number of parallel requests
-      OLLAMA_MAX_LOADED_MODELS: "1"    # maximum number of loaded models per GPU
-      OLLAMA_LOAD_TIMEOUT: "5m"        # maximum time for loading models (default "5m")
-      OLLAMA_KEEP_ALIVE: "5m"          # duration that models stay loaded in memory (default "5m")
-      OLLAMA_CONTEXT_LENGTH: "4096"    # maximum input context length
-      OLLAMA_MULTIUSER_CACHE: "false"  # optimize prompt caching for multi-user scenarios
-      OLLAMA_NOPRUNE: "false"          # disables pruning of model blobs at startup
-      OLLAMA_NOHISTORY: "true"         # disables readline history
-      OLLAMA_FLASH_ATTENTION: "false"  # enables the experimental flash attention feature
-      OLLAMA_KV_CACHE_TYPE: "f16"      # cache quantization (f16, q8_0, or q4_0)
-      OLLAMA_SCHED_SPREAD: "false"     # allows scheduling models across all GPUs.
-      OLLAMA_NEW_ENGINE: "true"        # enables the new Ollama engine
-      # OLLAMA_DEBUG: "true"             # shows additional debug information
-      # OLLAMA_INTEL_GPU: "true"         # enables experimental Intel GPU detection
-      ## NVIDIA GPU Hardware Acceleration (optional):
-      # NVIDIA_VISIBLE_DEVICES: "all"
-      # NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
-    volumes:
-      - "./storage/ollama:/root/.ollama"
-    ## NVIDIA GPU Hardware Acceleration (optional):
-    # deploy:
-    #  resources:
-    #    reservations:
-    #      devices:
-    #        - driver: "nvidia"
-    #          capabilities: [ gpu ]
-    #          count: "all"
-
  ## Traefik v3 (Reverse Proxy)
  ## includes "*.localssl.dev" SSL certificate for test environments
  ## Docs: https://doc.traefik.io/traefik/