Removed parameter Channels

It seems to be standarized, so it is now used as an additional check for input signatures.
2025-12-12 00:34:13 +01:00 · 2025-04-16 08:19:58 +00:00
parent eca0bc5205
commit d082929dee
7 changed files with 37 additions and 33 deletions
--- a/internal/ai/classify/model.go
+++ b/internal/ai/classify/model.go
@@ -53,10 +53,9 @@ func NewNasnet(assetsPath string, disabled bool) *Model {
 		TFVersion: "1.12.0",
 		Tags:      []string{"photoprism"},
 		Input: &tensorflow.PhotoInput{
-			Name:     "input_1",
-			Height:   224,
-			Width:    224,
-			Channels: 3,
+			Name:   "input_1",
+			Height: 224,
+			Width:  224,
 			Interval: &tensorflow.Interval{
 				Start: -1,
 				End:   1,
--- a/internal/ai/classify/model_external_test.go
+++ b/internal/ai/classify/model_external_test.go
@@ -35,9 +35,8 @@ var modelsInfo = map[string]*tensorflow.ModelInfo{
 	},
 	"efficientnet-v2-tensorflow2-imagenet1k-m-classification-v2.tar.gz": &tensorflow.ModelInfo{
 		Input: &tensorflow.PhotoInput{
-			Height:   480,
-			Width:    480,
-			Channels: 3,
+			Height: 480,
+			Width:  480,
 		},
 		Output: &tensorflow.ModelOutput{
 			OutputsLogits: true,
@@ -50,9 +49,8 @@ var modelsInfo = map[string]*tensorflow.ModelInfo{
 	},
 	"inception-v3-tensorflow2-classification-v2.tar.gz": &tensorflow.ModelInfo{
 		Input: &tensorflow.PhotoInput{
-			Height:   299,
-			Width:    299,
-			Channels: 3,
+			Height: 299,
+			Width:  299,
 		},
 		Output: &tensorflow.ModelOutput{
 			OutputsLogits: true,
--- a/internal/ai/tensorflow/image_test.go
+++ b/internal/ai/tensorflow/image_test.go
@@ -11,9 +11,8 @@ import (
 )

 var defaultImageInput = &PhotoInput{
-	Height:   224,
-	Width:    224,
-	Channels: 3,
+	Height: 224,
+	Width:  224,
 }

 func TestConvertValue(t *testing.T) {
--- a/internal/ai/tensorflow/info.go
+++ b/internal/ai/tensorflow/info.go
@@ -11,6 +11,11 @@ import (
 	"google.golang.org/protobuf/proto"
 )

+// The number of channels expected. This is a fixed value because there
+// seems to be an standard for input images defined as "What decodeImage
+// returns"
+const ExpectedChannels = 3
+
 // Interval of allowed values
 type Interval struct {
 	Start float32 `yaml:"Start,omitempty" json:"start,omitempty"`
@@ -37,7 +42,6 @@ type PhotoInput struct {
 	OutputIndex int       `yaml:"Index,omitempty" json:"index,omitempty"`
 	Height      int64     `yaml:"Height,omitempty" json:"height,omitempty"`
 	Width       int64     `yaml:"Width,omitempty" json:"width,omitempty"`
-	Channels    int64     `yaml:"Channels,omitempty" json:"channels,omitempty"`
 }

 // When dimensions are not defined, it means the model accepts any size of
@@ -87,10 +91,6 @@ func (p *PhotoInput) Merge(other *PhotoInput) {
 	if p.Width == 0 {
 		p.Width = other.Width
 	}
-
-	if p.Channels == 0 {
-		p.Channels = other.Channels
-	}
 }

 // The output expected for a model
@@ -166,7 +166,7 @@ func GetInputAndOutputFromMetaSignature(meta *pb.MetaGraphDef) (*PhotoInput, *Mo
 	}

 	sig := meta.GetSignatureDef()
-	for _, v := range sig {
+	for k, v := range sig {
 		inputs := v.GetInputs()
 		outputs := v.GetOutputs()

@@ -179,7 +179,14 @@ func GetInputAndOutputFromMetaSignature(meta *pb.MetaGraphDef) (*PhotoInput, *Mo
 				inputDims := (*inputTensor).GetTensorShape().Dim
 				outputDims := (*outputTensor).GetTensorShape().Dim

-				if len(inputDims) == 4 && len(outputDims) == 2 {
+				if inputDims[3].GetSize() != ExpectedChannels {
+					log.Warnf("tensorflow: skipping signature %v because channels are expected to be %d, have %d",
+						k, ExpectedChannels, inputDims[3].GetSize())
+				}
+
+				if len(inputDims) == 4 &&
+					inputDims[3].GetSize() == ExpectedChannels &&
+					len(outputDims) == 2 {
 					var err error
 					var inputIdx, outputIdx = 0, 0

@@ -206,7 +213,6 @@ func GetInputAndOutputFromMetaSignature(meta *pb.MetaGraphDef) (*PhotoInput, *Mo
 							OutputIndex: inputIdx,
 							Height:      inputDims[1].GetSize(),
 							Width:       inputDims[2].GetSize(),
-							Channels:    inputDims[3].GetSize(),
 						}, &ModelOutput{
 							Name:          outputName,
 							OutputIndex:   outputIdx,
--- a/internal/ai/tensorflow/model.go
+++ b/internal/ai/tensorflow/model.go
@@ -28,13 +28,15 @@ func GuessInputAndOutput(model *tf.SavedModel) (input *PhotoInput, output *Model
 	modelOps := model.Graph.Operations()

 	for i := range modelOps {
-		if strings.HasPrefix(modelOps[i].Type(), "Placeholder") && modelOps[i].NumOutputs() == 1 && modelOps[i].Output(0).Shape().NumDimensions() == 4 {
+		if strings.HasPrefix(modelOps[i].Type(), "Placeholder") &&
+			modelOps[i].NumOutputs() == 1 &&
+			modelOps[i].Output(0).Shape().NumDimensions() == 4 &&
+			modelOps[i].Output(0).Shape().Size(3) == ExpectedChannels { // check the channels are 3
 			shape := modelOps[i].Output(0).Shape()
 			input = &PhotoInput{
-				Name:     modelOps[i].Name(),
-				Height:   shape.Size(1),
-				Width:    shape.Size(2),
-				Channels: shape.Size(3),
+				Name:   modelOps[i].Name(),
+				Height: shape.Size(1),
+				Width:  shape.Size(2),
 			}
 		} else if (modelOps[i].Type() == "Softmax" || strings.HasPrefix(modelOps[i].Type(), "StatefulPartitionedCall")) &&
 			modelOps[i].NumOutputs() == 1 && modelOps[i].Output(0).Shape().NumDimensions() == 2 {
@@ -57,7 +59,7 @@ func GetInputAndOutputFromSavedModel(model *tf.SavedModel) (*PhotoInput, *ModelO
 		return nil, nil, fmt.Errorf("GetInputAndOutputFromSavedModel: nil input")
 	}

-	for _, v := range model.Signatures {
+	for k, v := range model.Signatures {
 		inputs := v.Inputs
 		outputs := v.Outputs

@@ -66,7 +68,13 @@ func GetInputAndOutputFromSavedModel(model *tf.SavedModel) (*PhotoInput, *ModelO
 			outputVarName, outputTensor := GetOne(outputs)

 			if inputTensor != nil && outputTensor != nil {
+				if inputTensor.Shape.Size(3) != ExpectedChannels {
+					log.Warnf("tensorflow: skipping signature %v because channels are expected to be %d, have %d",
+						k, ExpectedChannels, inputTensor.Shape.Size(3))
+				}
+
 				if inputTensor.Shape.NumDimensions() == 4 &&
+					inputTensor.Shape.Size(3) == ExpectedChannels &&
 					outputTensor.Shape.NumDimensions() == 2 {
 					var inputIdx, outputIdx = 0, 0
 					var err error
@@ -92,7 +100,6 @@ func GetInputAndOutputFromSavedModel(model *tf.SavedModel) (*PhotoInput, *ModelO
 							OutputIndex: inputIdx,
 							Height:      inputTensor.Shape.Size(1),
 							Width:       inputTensor.Shape.Size(2),
-							Channels:    inputTensor.Shape.Size(3),
 						}, &ModelOutput{
 							Name:          outputName,
 							OutputIndex:   outputIdx,
--- a/internal/ai/vision/model.go
+++ b/internal/ai/vision/model.go
@@ -128,7 +128,6 @@ func (m *Model) ClassifyModel() *classify.Model {
 		}

 		m.Meta.Input.SetResolution(m.Resolution)
-		m.Meta.Input.Channels = 3

 		// Try to load custom model based on the configuration values.
 		defaultPath := filepath.Join(AssetsPath, "nasnet")
@@ -247,7 +246,6 @@ func (m *Model) NsfwModel() *nsfw.Model {
 		}

 		m.Meta.Input.SetResolution(m.Resolution)
-		m.Meta.Input.Channels = 3

 		if m.Meta == nil {
 			m.Meta = &tensorflow.ModelInfo{}
--- a/internal/ai/vision/models.go
+++ b/internal/ai/vision/models.go
@@ -23,7 +23,6 @@ var (
 					Start: -1.0,
 					End:   1.0,
 				},
-				Channels:    3,
 				OutputIndex: 0,
 			},
 			Output: &tensorflow.ModelOutput{
@@ -46,7 +45,6 @@ var (
 				Name:        "input_tensor",
 				Height:      224,
 				Width:       224,
-				Channels:    3,
 				OutputIndex: 0,
 			},
 			Output: &tensorflow.ModelOutput{
@@ -69,7 +67,6 @@ var (
 				Name:        "input",
 				Height:      160,
 				Width:       160,
-				Channels:    3,
 				OutputIndex: 0,
 			},
 			Output: &tensorflow.ModelOutput{