diff --git a/.qdrant.yaml b/.qdrant.yaml new file mode 100644 index 000000000..695eec9db --- /dev/null +++ b/.qdrant.yaml @@ -0,0 +1,335 @@ +log_level: INFO + +# Logging configuration +# Qdrant logs to stdout. You may configure to also write logs to a file on disk. +# Be aware that this file may grow indefinitely. +# logger: +# # Logging format, supports `text` and `json` +# format: text +# on_disk: +# enabled: true +# log_file: path/to/log/file.log +# log_level: INFO +# # Logging format, supports `text` and `json` +# format: text + +storage: + # Where to store all the data + storage_path: ./storage + + # Where to store snapshots + snapshots_path: ./snapshots + + snapshots_config: + # "local" or "s3" - where to store snapshots + snapshots_storage: local + # s3_config: + # bucket: "" + # region: "" + # access_key: "" + # secret_key: "" + + # Where to store temporary files + # If null, temporary snapshots are stored in: storage/snapshots_temp/ + temp_path: null + + # If true - point payloads will not be stored in memory. + # It will be read from the disk every time it is requested. + # This setting saves RAM by (slightly) increasing the response time. + # Note: those payload values that are involved in filtering and are indexed - remain in RAM. + # + # Default: true + on_disk_payload: true + + # Maximum number of concurrent updates to shard replicas + # If `null` - maximum concurrency is used. + update_concurrency: null + + # Write-ahead-log related configuration + wal: + # Size of a single WAL segment + wal_capacity_mb: 32 + + # Number of WAL segments to create ahead of actual data requirement + wal_segments_ahead: 0 + + # Normal node - receives all updates and answers all queries + node_type: "Normal" + + # Listener node - receives all updates, but does not answer search/read queries + # Useful for setting up a dedicated backup node + # node_type: "Listener" + + performance: + # Number of parallel threads used for search operations. If 0 - auto selection. + max_search_threads: 0 + + # Max number of threads (jobs) for running optimizations across all collections, each thread runs one job. + # If 0 - have no limit and choose dynamically to saturate CPU. + # Note: each optimization job will also use `max_indexing_threads` threads by itself for index building. + max_optimization_threads: 0 + + # CPU budget, how many CPUs (threads) to allocate for an optimization job. + # If 0 - auto selection, keep 1 or more CPUs unallocated depending on CPU size + # If negative - subtract this number of CPUs from the available CPUs. + # If positive - use this exact number of CPUs. + optimizer_cpu_budget: 0 + + # Prevent DDoS of too many concurrent updates in distributed mode. + # One external update usually triggers multiple internal updates, which breaks internal + # timings. For example, the health check timing and consensus timing. + # If null - auto selection. + update_rate_limit: null + + # Limit for number of incoming automatic shard transfers per collection on this node, does not affect user-requested transfers. + # The same value should be used on all nodes in a cluster. + # Default is to allow 1 transfer. + # If null - allow unlimited transfers. + #incoming_shard_transfers_limit: 1 + + # Limit for number of outgoing automatic shard transfers per collection on this node, does not affect user-requested transfers. + # The same value should be used on all nodes in a cluster. + # Default is to allow 1 transfer. + # If null - allow unlimited transfers. + #outgoing_shard_transfers_limit: 1 + + # Enable async scorer which uses io_uring when rescoring. + # Only supported on Linux, must be enabled in your kernel. + # See: + #async_scorer: false + + optimizers: + # The minimal fraction of deleted vectors in a segment, required to perform segment optimization + deleted_threshold: 0.2 + + # The minimal number of vectors in a segment, required to perform segment optimization + vacuum_min_vector_number: 1000 + + # Target amount of segments optimizer will try to keep. + # Real amount of segments may vary depending on multiple parameters: + # - Amount of stored points + # - Current write RPS + # + # It is recommended to select default number of segments as a factor of the number of search threads, + # so that each segment would be handled evenly by one of the threads. + # If `default_segment_number = 0`, will be automatically selected by the number of available CPUs + default_segment_number: 0 + + # Do not create segments larger this size (in KiloBytes). + # Large segments might require disproportionately long indexation times, + # therefore it makes sense to limit the size of segments. + # + # If indexation speed have more priority for your - make this parameter lower. + # If search speed is more important - make this parameter higher. + # Note: 1Kb = 1 vector of size 256 + # If not set, will be automatically selected considering the number of available CPUs. + max_segment_size_kb: null + + # Maximum size (in KiloBytes) of vectors to store in-memory per segment. + # Segments larger than this threshold will be stored as read-only memmapped file. + # To enable memmap storage, lower the threshold + # Note: 1Kb = 1 vector of size 256 + # To explicitly disable mmap optimization, set to `0`. + # If not set, will be disabled by default. + memmap_threshold_kb: null + + # Maximum size (in KiloBytes) of vectors allowed for plain index. + # Default value based on https://github.com/google-research/google-research/blob/master/scann/docs/algorithms.md + # Note: 1Kb = 1 vector of size 256 + # To explicitly disable vector indexing, set to `0`. + # If not set, the default value will be used. + indexing_threshold_kb: 20000 + + # Interval between forced flushes. + flush_interval_sec: 5 + + # Max number of threads (jobs) for running optimizations per shard. + # Note: each optimization job will also use `max_indexing_threads` threads by itself for index building. + # If null - have no limit and choose dynamically to saturate CPU. + # If 0 - no optimization threads, optimizations will be disabled. + max_optimization_threads: null + + # This section has the same options as 'optimizers' above. All values specified here will overwrite the collections + # optimizers configs regardless of the config above and the options specified at collection creation. + #optimizers_overwrite: + # deleted_threshold: 0.2 + # vacuum_min_vector_number: 1000 + # default_segment_number: 0 + # max_segment_size_kb: null + # memmap_threshold_kb: null + # indexing_threshold_kb: 20000 + # flush_interval_sec: 5 + # max_optimization_threads: null + + # Default parameters of HNSW Index. Could be overridden for each collection or named vector individually + hnsw_index: + # Number of edges per node in the index graph. Larger the value - more accurate the search, more space required. + m: 16 + + # Number of neighbours to consider during the index building. Larger the value - more accurate the search, more time required to build index. + ef_construct: 100 + + # Minimal size (in KiloBytes) of vectors for additional payload-based indexing. + # If payload chunk is smaller than `full_scan_threshold_kb` additional indexing won't be used - + # in this case full-scan search should be preferred by query planner and additional indexing is not required. + # Note: 1Kb = 1 vector of size 256 + full_scan_threshold_kb: 10000 + + # Number of parallel threads used for background index building. + # If 0 - automatically select. + # Best to keep between 8 and 16 to prevent likelihood of building broken/inefficient HNSW graphs. + # On small CPUs, less threads are used. + max_indexing_threads: 0 + + # Store HNSW index on disk. If set to false, index will be stored in RAM. Default: false + on_disk: false + + # Custom M param for hnsw graph built for payload index. If not set, default M will be used. + payload_m: null + + # Default shard transfer method to use if none is defined. + # If null - don't have a shard transfer preference, choose automatically. + # If stream_records, snapshot or wal_delta - prefer this specific method. + # More info: https://qdrant.tech/documentation/guides/distributed_deployment/#shard-transfer-method + shard_transfer_method: null + + # Default parameters for collections + collection: + # Number of replicas of each shard that network tries to maintain + replication_factor: 1 + + # How many replicas should apply the operation for us to consider it successful + write_consistency_factor: 1 + + # Default parameters for vectors. + vectors: + # Whether vectors should be stored in memory or on disk. + on_disk: null + + # shard_number_per_node: 1 + + # Default quantization configuration. + # More info: https://qdrant.tech/documentation/guides/quantization + quantization: null + + # Default strict mode parameters for newly created collections. + strict_mode: + # Whether strict mode is enabled for a collection or not. + enabled: false + + # Max allowed `limit` parameter for all APIs that don't have their own max limit. + max_query_limit: null + + # Max allowed `timeout` parameter. + max_timeout: null + + # Allow usage of unindexed fields in retrieval based (eg. search) filters. + unindexed_filtering_retrieve: null + + # Allow usage of unindexed fields in filtered updates (eg. delete by payload). + unindexed_filtering_update: null + + # Max HNSW value allowed in search parameters. + search_max_hnsw_ef: null + + # Whether exact search is allowed or not. + search_allow_exact: null + + # Max oversampling value allowed in search. + search_max_oversampling: null + +service: + # Maximum size of POST data in a single request in megabytes + max_request_size_mb: 32 + + # Number of parallel workers used for serving the api. If 0 - equal to the number of available cores. + # If missing - Same as storage.max_search_threads + max_workers: 0 + + # Host to bind the service on + host: 0.0.0.0 + + # HTTP(S) port to bind the service on + http_port: 6333 + + # gRPC port to bind the service on. + # If `null` - gRPC is disabled. Default: null + # Comment to disable gRPC: + grpc_port: 6334 + + # Enable CORS headers in REST API. + # If enabled, browsers would be allowed to query REST endpoints regardless of query origin. + # More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS + # Default: true + enable_cors: true + + # Enable HTTPS for the REST and gRPC API + enable_tls: false + + # Check user HTTPS client certificate against CA file specified in tls config + verify_https_client_certificate: false + + # Set an api-key. + # If set, all requests must include a header with the api-key. + # example header: `api-key: ` + # + # If you enable this you should also enable TLS. + # (Either above or via an external service like nginx.) + # Sending an api-key over an unencrypted channel is insecure. + # + # Uncomment to enable. + # api_key: your_secret_api_key_here + + # Set an api-key for read-only operations. + # If set, all requests must include a header with the api-key. + # example header: `api-key: ` + # + # If you enable this you should also enable TLS. + # (Either above or via an external service like nginx.) + # Sending an api-key over an unencrypted channel is insecure. + # + # Uncomment to enable. + # read_only_api_key: your_secret_read_only_api_key_here + + # Uncomment to enable JWT Role Based Access Control (RBAC). + # If enabled, you can generate JWT tokens with fine-grained rules for access control. + # Use generated token instead of API key. + # + # jwt_rbac: true + + # Hardware reporting adds information to the API responses with a + # hint on how many resources were used to execute the request. + # + # Uncomment to enable. + # hardware_reporting: true + +cluster: + # Use `enabled: true` to run Qdrant in distributed deployment mode + enabled: false + + # Configuration of the inter-cluster communication + p2p: + # Port for internal communication between peers + port: 6335 + + # Use TLS for communication between peers + enable_tls: false + + # Configuration related to distributed consensus algorithm + consensus: + # How frequently peers should ping each other. + # Setting this parameter to lower value will allow consensus + # to detect disconnected nodes earlier, but too frequent + # tick period may create significant network and CPU overhead. + # We encourage you NOT to change this parameter unless you know what you are doing. + tick_period_ms: 100 + + # Compact consensus operations once we have this amount of applied + # operations. Allows peers to join quickly with a consensus snapshot without + # replaying a huge amount of operations. + # If 0 - disable compaction + compact_wal_entries: 128 + +# Set to true to prevent service from sending usage statistics to the developers. +# Read more: https://qdrant.tech/documentation/guides/telemetry +telemetry_disabled: true diff --git a/compose.intel.yaml b/compose.intel.yaml index 38bf47fe4..17f101b52 100644 --- a/compose.intel.yaml +++ b/compose.intel.yaml @@ -129,6 +129,10 @@ services: extends: file: ./compose.yaml service: mariadb + qdrant: + extends: + file: ./compose.yaml + service: qdrant traefik: extends: file: ./compose.yaml diff --git a/compose.nvidia.yaml b/compose.nvidia.yaml index 0b8eeefbb..1a62442ca 100644 --- a/compose.nvidia.yaml +++ b/compose.nvidia.yaml @@ -144,6 +144,10 @@ services: extends: file: ./compose.yaml service: mariadb + qdrant: + extends: + file: ./compose.yaml + service: qdrant traefik: extends: file: ./compose.yaml diff --git a/compose.yaml b/compose.yaml index 80f812c7c..d54d50ff7 100644 --- a/compose.yaml +++ b/compose.yaml @@ -164,6 +164,30 @@ services: MARIADB_PASSWORD: "photoprism" MARIADB_ROOT_PASSWORD: "photoprism" + ## Qdrant (Vector Database) + ## Docs: https://qdrant.tech/documentation/guides/installation/#docker-compose + ## Release Notes: https://github.com/qdrant/qdrant/releases + ## Web UI: https://qdrant.localssl.dev/dashboard + qdrant: + image: qdrant/qdrant:latest + labels: + - "traefik.enable=true" + - "traefik.http.services.qdrant.loadbalancer.server.port=6333" + - "traefik.http.services.qdrant.loadbalancer.server.scheme=http" + - "traefik.http.routers.qdrant.entrypoints=websecure" + - "traefik.http.routers.qdrant.rule=Host(`qdrant.localssl.dev`)" + - "traefik.http.routers.qdrant.priority=3" + - "traefik.http.routers.qdrant.tls.domains[0].main=localssl.dev" + - "traefik.http.routers.qdrant.tls.domains[0].sans=*.localssl.dev" + - "traefik.http.routers.qdrant.tls=true" + expose: + - 6333 + - 6334 + - 6335 + volumes: + - ./.qdrant.yaml:/qdrant/config/production.yaml + - ./storage/qdrant:/qdrant/storage + ## Traefik v3 (Reverse Proxy) ## includes "*.localssl.dev" SSL certificate for test environments ## Docs: https://doc.traefik.io/traefik/