AI: Rename vision.ApiRequestOptions to vision.ModelOptions

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer
2025-12-02 17:05:22 +01:00
parent 068d5dbfe5
commit d4aef5cf49
12 changed files with 86 additions and 74 deletions

View File

@@ -1,6 +1,6 @@
## PhotoPrism — Vision Package
**Last Updated:** November 25, 2025
**Last Updated:** December 2, 2025
### Overview
@@ -51,20 +51,29 @@ The `vision.yml` file is usually kept in the `storage/config` directory (overrid
#### Model Options
| Option | Default | Description |
|-------------------|-----------------------------------------------------------------------------------------|------------------------------------------------------------------------------------|
| `Temperature` | engine default (`0.1` for Ollama; unset for OpenAI) | Controls randomness; clamped to `[0,2]`. `gpt-5*` OpenAI models are forced to `0`. |
| `TopP` | engine default (`0.9` for some Ollama label defaults; unset for OpenAI) | Nucleus sampling parameter. |
| `MaxOutputTokens` | engine default (OpenAI caption 512, labels 1024; Ollama label default 256) | Upper bound on generated tokens; adapters raise low values to defaults. |
| `ForceJson` | engine-specific (`true` for OpenAI labels; `false` for Ollama labels; captions `false`) | Forces structured output when enabled. |
| `SchemaVersion` | derived from schema name | Override when coordinating schema migrations. |
| `Stop` | engine default | Array of stop sequences (e.g., `["\\n\\n"]`). |
| `NumThread` | runtime auto | Caps CPU threads for local engines. |
| `NumCtx` | engine default | Context window length (tokens). |
The model `Options` adjust model parameters such as temperature, top-p, and schema constraints when using [Ollama](ollama/README.md) or [OpenAI](openai/README.md):
| Option | Default | Description |
|-------------------|-----------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------|
| `Temperature` | engine default (`0.1` for Ollama) | Controls randomness with a value between `0.01` and `2.0`; not used for OpenAI's GPT-5. |
| `TopK` | engine default (model-specific) | Limits sampling to the top K tokens to reduce rare or noisy outputs. |
| `TopP` | engine default (`0.9` for some Ollama label defaults; unset for OpenAI) | Nucleus sampling; keeps the smallest token set whose cumulative probability ≥ `p`. |
| `MinP` | engine default (unset unless provided) | Drops tokens whose probability mass is below `p`, trimming the long tail. |
| `TypicalP` | engine default (unset unless provided) | Keeps tokens with typicality under the threshold; combine with TopP/MinP for flow. |
| `Seed` | random per run (unless set) | Fix for reproducible outputs; unset for more variety between runs. |
| `RepeatLastN` | engine default (model-specific) | Number of recent tokens considered for repetition penalties. |
| `RepeatPenalty` | engine default (model-specific) | Multiplier >1 discourages repeating the same tokens or phrases. |
| `NumPredict` | engine default (Ollama only) | Ollama-specific max output tokens; synonymous intent with `MaxOutputTokens`. |
| `MaxOutputTokens` | engine default (OpenAI caption 512, labels 1024) | Upper bound on generated tokens; adapters raise low values to defaults. |
| `ForceJson` | engine-specific (`true` for OpenAI labels; `false` for Ollama labels; captions `false`) | Forces structured output when enabled. |
| `SchemaVersion` | derived from schema name | Override when coordinating schema migrations. |
| `Stop` | engine default | Array of stop sequences (e.g., `["\\n\\n"]`). |
| `NumThread` | runtime auto | Caps CPU threads for local engines. |
| `NumCtx` | engine default | Context window length (tokens). |
#### Model Service
Used for Ollama/OpenAI (and any future HTTP engines). All credentials and identifiers support `${ENV_VAR}` expansion.
Configures the endpoint URL, method, format, and authentication for [Ollama](ollama/README.md), [OpenAI](openai/README.md), and other engines that perform remote HTTP requests:
| Field | Default | Notes |
|------------------------------------|------------------------------------------|------------------------------------------------------|
@@ -78,6 +87,8 @@ Used for Ollama/OpenAI (and any future HTTP engines). All credentials and identi
| `FileScheme` | set by engine alias (`data` or `base64`) | Controls image transport. |
| `Disabled` | `false` | Disable the endpoint without removing the model. |
> **Authentication:** All credentials and identifiers support `${ENV_VAR}` expansion. `Service.Key` sets `Authorization: Bearer <token>`; `Username`/`Password` injects HTTP basic authentication into the service URI when it is not already present.
### Field Behavior & Precedence
- Model identifier resolution order: `Service.Model``Model``Name`. `Model.GetModel()` returns `(id, name, version)` where Ollama receives `name:version` and other engines receive `name` plus a separate `Version`.

View File

@@ -32,43 +32,6 @@ const (
logDataTruncatedSuffix = "... (truncated)"
)
// ApiRequestOptions represents additional model parameters listed in the documentation.
type ApiRequestOptions struct {
NumKeep int `yaml:"NumKeep,omitempty" json:"num_keep,omitempty"`
Seed int `yaml:"Seed,omitempty" json:"seed,omitempty"`
NumPredict int `yaml:"NumPredict,omitempty" json:"num_predict,omitempty"`
TopK int `yaml:"TopK,omitempty" json:"top_k,omitempty"`
TopP float64 `yaml:"TopP,omitempty" json:"top_p,omitempty"`
MinP float64 `yaml:"MinP,omitempty" json:"min_p,omitempty"`
TfsZ float64 `yaml:"TfsZ,omitempty" json:"tfs_z,omitempty"`
TypicalP float64 `yaml:"TypicalP,omitempty" json:"typical_p,omitempty"`
RepeatLastN int `yaml:"RepeatLastN,omitempty" json:"repeat_last_n,omitempty"`
Temperature float64 `yaml:"Temperature,omitempty" json:"temperature,omitempty"`
RepeatPenalty float64 `yaml:"RepeatPenalty,omitempty" json:"repeat_penalty,omitempty"`
PresencePenalty float64 `yaml:"PresencePenalty,omitempty" json:"presence_penalty,omitempty"`
FrequencyPenalty float64 `yaml:"FrequencyPenalty,omitempty" json:"frequency_penalty,omitempty"`
Mirostat int `yaml:"Mirostat,omitempty" json:"mirostat,omitempty"`
MirostatTau float64 `yaml:"MirostatTau,omitempty" json:"mirostat_tau,omitempty"`
MirostatEta float64 `yaml:"MirostatEta,omitempty" json:"mirostat_eta,omitempty"`
PenalizeNewline bool `yaml:"PenalizeNewline,omitempty" json:"penalize_newline,omitempty"`
Stop []string `yaml:"Stop,omitempty" json:"stop,omitempty"`
Numa bool `yaml:"Numa,omitempty" json:"numa,omitempty"`
NumCtx int `yaml:"NumCtx,omitempty" json:"num_ctx,omitempty"`
NumBatch int `yaml:"NumBatch,omitempty" json:"num_batch,omitempty"`
NumGpu int `yaml:"NumGpu,omitempty" json:"num_gpu,omitempty"`
MainGpu int `yaml:"MainGpu,omitempty" json:"main_gpu,omitempty"`
LowVram bool `yaml:"LowVram,omitempty" json:"low_vram,omitempty"`
VocabOnly bool `yaml:"VocabOnly,omitempty" json:"vocab_only,omitempty"`
UseMmap bool `yaml:"UseMmap,omitempty" json:"use_mmap,omitempty"`
UseMlock bool `yaml:"UseMlock,omitempty" json:"use_mlock,omitempty"`
NumThread int `yaml:"NumThread,omitempty" json:"num_thread,omitempty"`
MaxOutputTokens int `yaml:"MaxOutputTokens,omitempty" json:"max_output_tokens,omitempty"`
Detail string `yaml:"Detail,omitempty" json:"detail,omitempty"`
ForceJson bool `yaml:"ForceJson,omitempty" json:"force_json,omitempty"`
SchemaVersion string `yaml:"SchemaVersion,omitempty" json:"schema_version,omitempty"`
CombineOutputs string `yaml:"CombineOutputs,omitempty" json:"combine_outputs,omitempty"`
}
// ApiRequestContext represents a context parameter returned from a previous request.
type ApiRequestContext = []int
@@ -84,7 +47,7 @@ type ApiRequest struct {
Url string `form:"url" yaml:"Url,omitempty" json:"url,omitempty"`
Org string `form:"org" yaml:"Org,omitempty" json:"org,omitempty"`
Project string `form:"project" yaml:"Project,omitempty" json:"project,omitempty"`
Options *ApiRequestOptions `form:"options" yaml:"Options,omitempty" json:"options,omitempty"`
Options *ModelOptions `form:"options" yaml:"Options,omitempty" json:"options,omitempty"`
Context *ApiRequestContext `form:"context" yaml:"Context,omitempty" json:"context,omitempty"`
Stream bool `form:"stream" yaml:"Stream,omitempty" json:"stream"`
Images Files `form:"images" yaml:"Images,omitempty" json:"images,omitempty"`

View File

@@ -36,7 +36,7 @@ type EngineDefaults interface {
SystemPrompt(model *Model) string
UserPrompt(model *Model) string
SchemaTemplate(model *Model) string
Options(model *Model) *ApiRequestOptions
Options(model *Model) *ModelOptions
}
// Engine groups the callbacks required to integrate a third-party vision service.

View File

@@ -78,20 +78,20 @@ func (ollamaDefaults) SchemaTemplate(model *Model) string {
}
// Options returns the Ollama service request options.
func (ollamaDefaults) Options(model *Model) *ApiRequestOptions {
func (ollamaDefaults) Options(model *Model) *ModelOptions {
if model == nil {
return nil
}
switch model.Type {
case ModelTypeLabels:
return &ApiRequestOptions{
return &ModelOptions{
Temperature: DefaultTemperature,
TopP: 0.9,
Stop: []string{"\n\n"},
}
case ModelTypeCaption:
return &ApiRequestOptions{
return &ModelOptions{
Temperature: DefaultTemperature,
}
default:

View File

@@ -80,19 +80,19 @@ func (openaiDefaults) SchemaTemplate(model *Model) string {
}
// Options returns default OpenAI request options for the model.
func (openaiDefaults) Options(model *Model) *ApiRequestOptions {
func (openaiDefaults) Options(model *Model) *ModelOptions {
if model == nil {
return nil
}
switch model.Type {
case ModelTypeCaption:
return &ApiRequestOptions{
return &ModelOptions{
Detail: openai.DefaultDetail,
MaxOutputTokens: openai.CaptionMaxTokens,
}
case ModelTypeLabels:
return &ApiRequestOptions{
return &ModelOptions{
Detail: openai.DefaultDetail,
MaxOutputTokens: openai.LabelsMaxTokens,
ForceJson: true,

View File

@@ -40,7 +40,7 @@ func TestOpenAIBuilderBuildCaptionDisablesForceJSON(t *testing.T) {
Type: ModelTypeCaption,
Name: openai.DefaultModel,
Engine: openai.EngineName,
Options: &ApiRequestOptions{ForceJson: true},
Options: &ModelOptions{ForceJson: true},
}
model.ApplyEngineDefaults()
@@ -59,7 +59,7 @@ func TestApiRequestJSONForOpenAI(t *testing.T) {
Prompt: "describe the scene",
Images: []string{"data:image/jpeg;base64,AA=="},
ResponseFormat: ApiFormatOpenAI,
Options: &ApiRequestOptions{
Options: &ModelOptions{
Detail: openai.DefaultDetail,
MaxOutputTokens: 128,
Temperature: 0.2,
@@ -111,7 +111,7 @@ func TestApiRequestJSONForOpenAIDefaultSchemaName(t *testing.T) {
Model: "gpt-5-mini",
Images: []string{"data:image/jpeg;base64,AA=="},
ResponseFormat: ApiFormatOpenAI,
Options: &ApiRequestOptions{
Options: &ModelOptions{
Detail: openai.DefaultDetail,
MaxOutputTokens: 64,
ForceJson: true,
@@ -254,7 +254,7 @@ func TestPerformApiRequestOpenAISuccess(t *testing.T) {
Model: "gpt-5-mini",
Images: []string{"data:image/jpeg;base64,AA=="},
ResponseFormat: ApiFormatOpenAI,
Options: &ApiRequestOptions{
Options: &ModelOptions{
Detail: openai.DefaultDetail,
},
Schema: json.RawMessage(`{"type":"object"}`),
@@ -299,7 +299,7 @@ func TestPerformApiRequestOpenAITextFallback(t *testing.T) {
Model: "gpt-5-mini",
Images: []string{"data:image/jpeg;base64,AA=="},
ResponseFormat: ApiFormatOpenAI,
Options: &ApiRequestOptions{
Options: &ModelOptions{
Detail: openai.DefaultDetail,
},
Schema: nil,

View File

@@ -46,7 +46,7 @@ type Model struct {
SchemaFile string `yaml:"SchemaFile,omitempty" json:"schemaFile,omitempty"`
Resolution int `yaml:"Resolution,omitempty" json:"resolution,omitempty"`
TensorFlow *tensorflow.ModelInfo `yaml:"TensorFlow,omitempty" json:"tensorflow,omitempty"`
Options *ApiRequestOptions `yaml:"Options,omitempty" json:"options,omitempty"`
Options *ModelOptions `yaml:"Options,omitempty" json:"options,omitempty"`
Service Service `yaml:"Service,omitempty" json:"service,omitempty"`
Path string `yaml:"Path,omitempty" json:"-"`
Disabled bool `yaml:"Disabled,omitempty" json:"disabled,omitempty"`
@@ -334,12 +334,12 @@ func (m *Model) GetSource() string {
// GetOptions returns the API request options, applying engine defaults on
// demand. Nil receivers return nil.
func (m *Model) GetOptions() *ApiRequestOptions {
func (m *Model) GetOptions() *ModelOptions {
if m == nil {
return nil
}
var engineDefaults *ApiRequestOptions
var engineDefaults *ModelOptions
if defaults := m.engineDefaults(); defaults != nil {
engineDefaults = cloneOptions(defaults.Options(m))
}
@@ -348,7 +348,7 @@ func (m *Model) GetOptions() *ApiRequestOptions {
switch m.Type {
case ModelTypeLabels, ModelTypeCaption, ModelTypeGenerate:
if engineDefaults == nil {
engineDefaults = &ApiRequestOptions{}
engineDefaults = &ModelOptions{}
}
normalizeOptions(engineDefaults)
m.Options = engineDefaults
@@ -364,7 +364,7 @@ func (m *Model) GetOptions() *ApiRequestOptions {
return m.Options
}
func mergeOptionDefaults(target, defaults *ApiRequestOptions) {
func mergeOptionDefaults(target, defaults *ModelOptions) {
if target == nil || defaults == nil {
return
}
@@ -402,7 +402,7 @@ func mergeOptionDefaults(target, defaults *ApiRequestOptions) {
}
}
func normalizeOptions(opts *ApiRequestOptions) {
func normalizeOptions(opts *ModelOptions) {
if opts == nil {
return
}
@@ -412,7 +412,7 @@ func normalizeOptions(opts *ApiRequestOptions) {
}
}
func cloneOptions(opts *ApiRequestOptions) *ApiRequestOptions {
func cloneOptions(opts *ModelOptions) *ModelOptions {
if opts == nil {
return nil
}

View File

@@ -0,0 +1,38 @@
package vision
// ModelOptions represents additional model parameters listed in the documentation.
type ModelOptions struct {
NumKeep int `yaml:"NumKeep,omitempty" json:"num_keep,omitempty"` // Ollama ↓
Seed int `yaml:"Seed,omitempty" json:"seed,omitempty"`
NumPredict int `yaml:"NumPredict,omitempty" json:"num_predict,omitempty"`
Temperature float64 `yaml:"Temperature,omitempty" json:"temperature,omitempty"`
TopK int `yaml:"TopK,omitempty" json:"top_k,omitempty"`
TopP float64 `yaml:"TopP,omitempty" json:"top_p,omitempty"`
MinP float64 `yaml:"MinP,omitempty" json:"min_p,omitempty"`
TypicalP float64 `yaml:"TypicalP,omitempty" json:"typical_p,omitempty"`
TfsZ float64 `yaml:"TfsZ,omitempty" json:"tfs_z,omitempty"`
RepeatLastN int `yaml:"RepeatLastN,omitempty" json:"repeat_last_n,omitempty"`
RepeatPenalty float64 `yaml:"RepeatPenalty,omitempty" json:"repeat_penalty,omitempty"`
PresencePenalty float64 `yaml:"PresencePenalty,omitempty" json:"presence_penalty,omitempty"`
FrequencyPenalty float64 `yaml:"FrequencyPenalty,omitempty" json:"frequency_penalty,omitempty"`
Mirostat int `yaml:"Mirostat,omitempty" json:"mirostat,omitempty"`
MirostatTau float64 `yaml:"MirostatTau,omitempty" json:"mirostat_tau,omitempty"`
MirostatEta float64 `yaml:"MirostatEta,omitempty" json:"mirostat_eta,omitempty"`
PenalizeNewline bool `yaml:"PenalizeNewline,omitempty" json:"penalize_newline,omitempty"`
Stop []string `yaml:"Stop,omitempty" json:"stop,omitempty"`
Numa bool `yaml:"Numa,omitempty" json:"numa,omitempty"`
NumCtx int `yaml:"NumCtx,omitempty" json:"num_ctx,omitempty"`
NumBatch int `yaml:"NumBatch,omitempty" json:"num_batch,omitempty"`
NumGpu int `yaml:"NumGpu,omitempty" json:"num_gpu,omitempty"`
MainGpu int `yaml:"MainGpu,omitempty" json:"main_gpu,omitempty"`
LowVram bool `yaml:"LowVram,omitempty" json:"low_vram,omitempty"`
VocabOnly bool `yaml:"VocabOnly,omitempty" json:"vocab_only,omitempty"`
UseMmap bool `yaml:"UseMmap,omitempty" json:"use_mmap,omitempty"`
UseMlock bool `yaml:"UseMlock,omitempty" json:"use_mlock,omitempty"`
NumThread int `yaml:"NumThread,omitempty" json:"num_thread,omitempty"`
MaxOutputTokens int `yaml:"MaxOutputTokens,omitempty" json:"max_output_tokens,omitempty"` // OpenAI ↓
Detail string `yaml:"Detail,omitempty" json:"detail,omitempty"`
ForceJson bool `yaml:"ForceJson,omitempty" json:"force_json,omitempty"`
SchemaVersion string `yaml:"SchemaVersion,omitempty" json:"schema_version,omitempty"`
CombineOutputs string `yaml:"CombineOutputs,omitempty" json:"combine_outputs,omitempty"`
}

View File

@@ -158,7 +158,7 @@ func TestModelGetOptionsRespectsCustomValues(t *testing.T) {
model := &Model{
Type: ModelTypeLabels,
Engine: ollama.EngineName,
Options: &ApiRequestOptions{
Options: &ModelOptions{
Temperature: 5,
TopP: 0.95,
Stop: []string{"CUSTOM"},
@@ -183,7 +183,7 @@ func TestModelGetOptionsFillsMissingFields(t *testing.T) {
model := &Model{
Type: ModelTypeLabels,
Engine: ollama.EngineName,
Options: &ApiRequestOptions{},
Options: &ModelOptions{},
}
model.ApplyEngineDefaults()

View File

@@ -89,7 +89,7 @@ var (
}
CaptionModel = &Model{
Type: ModelTypeCaption,
Name: ollama.CaptionModel,
Model: ollama.CaptionModel,
Version: VersionLatest,
Engine: ollama.EngineName,
Resolution: 720, // Original aspect ratio, with a max size of 720 x 720 pixels.

View File

@@ -4,7 +4,7 @@ const (
// CaptionPrompt instructs Ollama caption models to emit a single, active-voice sentence.
CaptionPrompt = "Create a caption with exactly one sentence in the active voice that describes the main visual content. Begin with the main subject and clear action. Avoid text formatting, meta-language, and filler words."
// CaptionModel names the default caption model bundled with our adapter defaults.
CaptionModel = "gemma3"
CaptionModel = "gemma3:latest"
// LabelConfidenceDefault is used when the model omits the confidence field.
LabelConfidenceDefault = 0.5
// LabelSystem defines the system prompt shared by Ollama label models. It aims to ensure that single-word nouns are returned.

View File

@@ -65,7 +65,7 @@ Models:
Name: embeddings
Outputs: 512
- Type: caption
Name: gemma3
Model: gemma3:latest
Version: latest
Engine: ollama
Resolution: 720