Setup: Update Ollama service examples in compose.yaml files #5123

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer
2025-09-01 16:03:27 +02:00
parent c3537b10e5
commit a0b44b2ca2
5 changed files with 5 additions and 5 deletions

View File

@@ -179,7 +179,7 @@ services:
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
OLLAMA_NOHISTORY: "true" # disables readline history
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
OLLAMA_KV_CACHE_TYPE: "f16" # see https://mitjamartini.com/blog/kv-cache-quantization-in-ollama/
OLLAMA_KV_CACHE_TYPE: "f16" # cache quantization (f16, q8_0, or q4_0)
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
# OLLAMA_DEBUG: "true" # shows additional debug information

View File

@@ -259,7 +259,7 @@ services:
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
OLLAMA_NOHISTORY: "true" # disables readline history
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
OLLAMA_KV_CACHE_TYPE: "f16" # see https://mitjamartini.com/blog/kv-cache-quantization-in-ollama/
OLLAMA_KV_CACHE_TYPE: "f16" # cache quantization (f16, q8_0, or q4_0)
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
# OLLAMA_DEBUG: "true" # shows additional debug information

View File

@@ -181,7 +181,7 @@ services:
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
OLLAMA_NOHISTORY: "true" # disables readline history
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
OLLAMA_KV_CACHE_TYPE: "f16" # see https://mitjamartini.com/blog/kv-cache-quantization-in-ollama/
OLLAMA_KV_CACHE_TYPE: "f16" # cache quantization (f16, q8_0, or q4_0)
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
# OLLAMA_DEBUG: "true" # shows additional debug information

View File

@@ -186,7 +186,7 @@ services:
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
OLLAMA_NOHISTORY: "true" # disables readline history
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
OLLAMA_KV_CACHE_TYPE: "f16" # see https://mitjamartini.com/blog/kv-cache-quantization-in-ollama/
OLLAMA_KV_CACHE_TYPE: "f16" # cache quantization (f16, q8_0, or q4_0)
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
# OLLAMA_DEBUG: "true" # shows additional debug information

View File

@@ -186,7 +186,7 @@ services:
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
OLLAMA_NOHISTORY: "true" # disables readline history
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
OLLAMA_KV_CACHE_TYPE: "f16" # see https://mitjamartini.com/blog/kv-cache-quantization-in-ollama/
OLLAMA_KV_CACHE_TYPE: "f16" # cache quantization (f16, q8_0, or q4_0)
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
# OLLAMA_DEBUG: "true" # shows additional debug information