mirror of
https://github.com/photoprism/photoprism.git
synced 2025-12-12 08:44:04 +01:00
Setup: Update Ollama service examples in compose.yaml files #5123
Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
@@ -179,7 +179,7 @@ services:
|
|||||||
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
|
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
|
||||||
OLLAMA_NOHISTORY: "true" # disables readline history
|
OLLAMA_NOHISTORY: "true" # disables readline history
|
||||||
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
|
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
|
||||||
OLLAMA_KV_CACHE_TYPE: "f16" # see https://mitjamartini.com/blog/kv-cache-quantization-in-ollama/
|
OLLAMA_KV_CACHE_TYPE: "f16" # cache quantization (f16, q8_0, or q4_0)
|
||||||
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
|
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
|
||||||
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
|
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
|
||||||
# OLLAMA_DEBUG: "true" # shows additional debug information
|
# OLLAMA_DEBUG: "true" # shows additional debug information
|
||||||
|
|||||||
@@ -259,7 +259,7 @@ services:
|
|||||||
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
|
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
|
||||||
OLLAMA_NOHISTORY: "true" # disables readline history
|
OLLAMA_NOHISTORY: "true" # disables readline history
|
||||||
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
|
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
|
||||||
OLLAMA_KV_CACHE_TYPE: "f16" # see https://mitjamartini.com/blog/kv-cache-quantization-in-ollama/
|
OLLAMA_KV_CACHE_TYPE: "f16" # cache quantization (f16, q8_0, or q4_0)
|
||||||
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
|
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
|
||||||
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
|
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
|
||||||
# OLLAMA_DEBUG: "true" # shows additional debug information
|
# OLLAMA_DEBUG: "true" # shows additional debug information
|
||||||
|
|||||||
@@ -181,7 +181,7 @@ services:
|
|||||||
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
|
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
|
||||||
OLLAMA_NOHISTORY: "true" # disables readline history
|
OLLAMA_NOHISTORY: "true" # disables readline history
|
||||||
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
|
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
|
||||||
OLLAMA_KV_CACHE_TYPE: "f16" # see https://mitjamartini.com/blog/kv-cache-quantization-in-ollama/
|
OLLAMA_KV_CACHE_TYPE: "f16" # cache quantization (f16, q8_0, or q4_0)
|
||||||
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
|
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
|
||||||
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
|
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
|
||||||
# OLLAMA_DEBUG: "true" # shows additional debug information
|
# OLLAMA_DEBUG: "true" # shows additional debug information
|
||||||
|
|||||||
@@ -186,7 +186,7 @@ services:
|
|||||||
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
|
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
|
||||||
OLLAMA_NOHISTORY: "true" # disables readline history
|
OLLAMA_NOHISTORY: "true" # disables readline history
|
||||||
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
|
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
|
||||||
OLLAMA_KV_CACHE_TYPE: "f16" # see https://mitjamartini.com/blog/kv-cache-quantization-in-ollama/
|
OLLAMA_KV_CACHE_TYPE: "f16" # cache quantization (f16, q8_0, or q4_0)
|
||||||
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
|
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
|
||||||
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
|
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
|
||||||
# OLLAMA_DEBUG: "true" # shows additional debug information
|
# OLLAMA_DEBUG: "true" # shows additional debug information
|
||||||
|
|||||||
@@ -186,7 +186,7 @@ services:
|
|||||||
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
|
OLLAMA_NOPRUNE: "false" # disables pruning of model blobs at startup
|
||||||
OLLAMA_NOHISTORY: "true" # disables readline history
|
OLLAMA_NOHISTORY: "true" # disables readline history
|
||||||
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
|
OLLAMA_FLASH_ATTENTION: "false" # enables the experimental flash attention feature
|
||||||
OLLAMA_KV_CACHE_TYPE: "f16" # see https://mitjamartini.com/blog/kv-cache-quantization-in-ollama/
|
OLLAMA_KV_CACHE_TYPE: "f16" # cache quantization (f16, q8_0, or q4_0)
|
||||||
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
|
OLLAMA_SCHED_SPREAD: "false" # allows scheduling models across all GPUs.
|
||||||
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
|
OLLAMA_NEW_ENGINE: "true" # enables the new Ollama engine
|
||||||
# OLLAMA_DEBUG: "true" # shows additional debug information
|
# OLLAMA_DEBUG: "true" # shows additional debug information
|
||||||
|
|||||||
Reference in New Issue
Block a user