mirror of
https://github.com/photoprism/photoprism.git
synced 2025-12-12 00:34:13 +01:00
AI: Generate Captions & Labels using the OpenAI Responses API #5322
Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
@@ -11,6 +11,8 @@ import (
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/openai"
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/schema"
|
||||
"github.com/photoprism/photoprism/internal/api/download"
|
||||
"github.com/photoprism/photoprism/pkg/clean"
|
||||
"github.com/photoprism/photoprism/pkg/fs"
|
||||
@@ -58,6 +60,11 @@ type ApiRequestOptions struct {
|
||||
UseMmap bool `yaml:"UseMmap,omitempty" json:"use_mmap,omitempty"`
|
||||
UseMlock bool `yaml:"UseMlock,omitempty" json:"use_mlock,omitempty"`
|
||||
NumThread int `yaml:"NumThread,omitempty" json:"num_thread,omitempty"`
|
||||
MaxOutputTokens int `yaml:"MaxOutputTokens,omitempty" json:"max_output_tokens,omitempty"`
|
||||
Detail string `yaml:"Detail,omitempty" json:"detail,omitempty"`
|
||||
ForceJson bool `yaml:"ForceJson,omitempty" json:"force_json,omitempty"`
|
||||
SchemaVersion string `yaml:"SchemaVersion,omitempty" json:"schema_version,omitempty"`
|
||||
CombineOutputs string `yaml:"CombineOutputs,omitempty" json:"combine_outputs,omitempty"`
|
||||
}
|
||||
|
||||
// ApiRequestContext represents a context parameter returned from a previous request.
|
||||
@@ -77,6 +84,7 @@ type ApiRequest struct {
|
||||
Context *ApiRequestContext `form:"context" yaml:"Context,omitempty" json:"context,omitempty"`
|
||||
Stream bool `form:"stream" yaml:"Stream,omitempty" json:"stream"`
|
||||
Images Files `form:"images" yaml:"Images,omitempty" json:"images,omitempty"`
|
||||
Schema json.RawMessage `form:"schema" yaml:"Schema,omitempty" json:"schema,omitempty"`
|
||||
ResponseFormat ApiFormat `form:"-" yaml:"-" json:"-"`
|
||||
}
|
||||
|
||||
@@ -195,6 +203,14 @@ func (r *ApiRequest) GetResponseFormat() ApiFormat {
|
||||
|
||||
// JSON returns the request data as JSON-encoded bytes.
|
||||
func (r *ApiRequest) JSON() ([]byte, error) {
|
||||
if r == nil {
|
||||
return nil, errors.New("api request is nil")
|
||||
}
|
||||
|
||||
if r.ResponseFormat == ApiFormatOpenAI {
|
||||
return r.openAIJSON()
|
||||
}
|
||||
|
||||
return json.Marshal(*r)
|
||||
}
|
||||
|
||||
@@ -229,6 +245,8 @@ func (r *ApiRequest) sanitizedForLog() ApiRequest {
|
||||
|
||||
sanitized.Url = sanitizeLogPayload(r.Url)
|
||||
|
||||
sanitized.Schema = r.Schema
|
||||
|
||||
return sanitized
|
||||
}
|
||||
|
||||
@@ -287,3 +305,134 @@ func isLikelyBase64(value string) bool {
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// openAIJSON converts the request data into an OpenAI Responses API payload.
|
||||
func (r *ApiRequest) openAIJSON() ([]byte, error) {
|
||||
detail := openai.DefaultDetail
|
||||
|
||||
if opts := r.Options; opts != nil && strings.TrimSpace(opts.Detail) != "" {
|
||||
detail = strings.TrimSpace(opts.Detail)
|
||||
}
|
||||
|
||||
messages := make([]openai.InputMessage, 0, 2)
|
||||
|
||||
if system := strings.TrimSpace(r.System); system != "" {
|
||||
messages = append(messages, openai.InputMessage{
|
||||
Role: "system",
|
||||
Type: "message",
|
||||
Content: []openai.ContentItem{
|
||||
{
|
||||
Type: openai.ContentTypeText,
|
||||
Text: system,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
userContent := make([]openai.ContentItem, 0, len(r.Images)+1)
|
||||
|
||||
if prompt := strings.TrimSpace(r.Prompt); prompt != "" {
|
||||
userContent = append(userContent, openai.ContentItem{
|
||||
Type: openai.ContentTypeText,
|
||||
Text: prompt,
|
||||
})
|
||||
}
|
||||
|
||||
for _, img := range r.Images {
|
||||
if img == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
userContent = append(userContent, openai.ContentItem{
|
||||
Type: openai.ContentTypeImage,
|
||||
ImageURL: img,
|
||||
Detail: detail,
|
||||
})
|
||||
}
|
||||
|
||||
if len(userContent) > 0 {
|
||||
messages = append(messages, openai.InputMessage{
|
||||
Role: "user",
|
||||
Type: "message",
|
||||
Content: userContent,
|
||||
})
|
||||
}
|
||||
|
||||
if len(messages) == 0 {
|
||||
return nil, errors.New("openai request requires at least one message")
|
||||
}
|
||||
|
||||
payload := openai.HTTPRequest{
|
||||
Model: strings.TrimSpace(r.Model),
|
||||
Input: messages,
|
||||
}
|
||||
|
||||
if payload.Model == "" {
|
||||
payload.Model = openai.DefaultModel
|
||||
}
|
||||
|
||||
if strings.HasPrefix(strings.ToLower(payload.Model), "gpt-5") {
|
||||
payload.Reasoning = &openai.Reasoning{Effort: "low"}
|
||||
}
|
||||
|
||||
if opts := r.Options; opts != nil {
|
||||
if opts.MaxOutputTokens > 0 {
|
||||
payload.MaxOutputTokens = opts.MaxOutputTokens
|
||||
}
|
||||
|
||||
if opts.Temperature > 0 {
|
||||
payload.Temperature = opts.Temperature
|
||||
}
|
||||
|
||||
if opts.TopP > 0 {
|
||||
payload.TopP = opts.TopP
|
||||
}
|
||||
|
||||
if opts.PresencePenalty != 0 {
|
||||
payload.PresencePenalty = opts.PresencePenalty
|
||||
}
|
||||
|
||||
if opts.FrequencyPenalty != 0 {
|
||||
payload.FrequencyPenalty = opts.FrequencyPenalty
|
||||
}
|
||||
}
|
||||
|
||||
if format := buildOpenAIResponseFormat(r); format != nil {
|
||||
payload.Text = &openai.TextOptions{
|
||||
Format: format,
|
||||
}
|
||||
}
|
||||
|
||||
return json.Marshal(payload)
|
||||
}
|
||||
|
||||
// buildOpenAIResponseFormat determines which response_format to send to OpenAI.
|
||||
func buildOpenAIResponseFormat(r *ApiRequest) *openai.ResponseFormat {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
opts := r.Options
|
||||
hasSchema := len(r.Schema) > 0
|
||||
|
||||
if !hasSchema && (opts == nil || !opts.ForceJson) {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := &openai.ResponseFormat{}
|
||||
|
||||
if hasSchema {
|
||||
result.Type = openai.ResponseFormatJSONSchema
|
||||
result.Schema = r.Schema
|
||||
|
||||
if opts != nil && strings.TrimSpace(opts.SchemaVersion) != "" {
|
||||
result.Name = strings.TrimSpace(opts.SchemaVersion)
|
||||
} else {
|
||||
result.Name = schema.JsonSchemaName(r.Schema, openai.DefaultSchemaVersion)
|
||||
}
|
||||
} else {
|
||||
result.Type = openai.ResponseFormatJSONObject
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user