Removed parameter Channels

It seems to be standarized, so it is now used as an additional check for
input signatures.
This commit is contained in:
raystlin
2025-04-16 08:19:58 +00:00
parent eca0bc5205
commit d082929dee
7 changed files with 37 additions and 33 deletions

View File

@@ -53,10 +53,9 @@ func NewNasnet(assetsPath string, disabled bool) *Model {
TFVersion: "1.12.0",
Tags: []string{"photoprism"},
Input: &tensorflow.PhotoInput{
Name: "input_1",
Height: 224,
Width: 224,
Channels: 3,
Name: "input_1",
Height: 224,
Width: 224,
Interval: &tensorflow.Interval{
Start: -1,
End: 1,

View File

@@ -35,9 +35,8 @@ var modelsInfo = map[string]*tensorflow.ModelInfo{
},
"efficientnet-v2-tensorflow2-imagenet1k-m-classification-v2.tar.gz": &tensorflow.ModelInfo{
Input: &tensorflow.PhotoInput{
Height: 480,
Width: 480,
Channels: 3,
Height: 480,
Width: 480,
},
Output: &tensorflow.ModelOutput{
OutputsLogits: true,
@@ -50,9 +49,8 @@ var modelsInfo = map[string]*tensorflow.ModelInfo{
},
"inception-v3-tensorflow2-classification-v2.tar.gz": &tensorflow.ModelInfo{
Input: &tensorflow.PhotoInput{
Height: 299,
Width: 299,
Channels: 3,
Height: 299,
Width: 299,
},
Output: &tensorflow.ModelOutput{
OutputsLogits: true,

View File

@@ -11,9 +11,8 @@ import (
)
var defaultImageInput = &PhotoInput{
Height: 224,
Width: 224,
Channels: 3,
Height: 224,
Width: 224,
}
func TestConvertValue(t *testing.T) {

View File

@@ -11,6 +11,11 @@ import (
"google.golang.org/protobuf/proto"
)
// The number of channels expected. This is a fixed value because there
// seems to be an standard for input images defined as "What decodeImage
// returns"
const ExpectedChannels = 3
// Interval of allowed values
type Interval struct {
Start float32 `yaml:"Start,omitempty" json:"start,omitempty"`
@@ -37,7 +42,6 @@ type PhotoInput struct {
OutputIndex int `yaml:"Index,omitempty" json:"index,omitempty"`
Height int64 `yaml:"Height,omitempty" json:"height,omitempty"`
Width int64 `yaml:"Width,omitempty" json:"width,omitempty"`
Channels int64 `yaml:"Channels,omitempty" json:"channels,omitempty"`
}
// When dimensions are not defined, it means the model accepts any size of
@@ -87,10 +91,6 @@ func (p *PhotoInput) Merge(other *PhotoInput) {
if p.Width == 0 {
p.Width = other.Width
}
if p.Channels == 0 {
p.Channels = other.Channels
}
}
// The output expected for a model
@@ -166,7 +166,7 @@ func GetInputAndOutputFromMetaSignature(meta *pb.MetaGraphDef) (*PhotoInput, *Mo
}
sig := meta.GetSignatureDef()
for _, v := range sig {
for k, v := range sig {
inputs := v.GetInputs()
outputs := v.GetOutputs()
@@ -179,7 +179,14 @@ func GetInputAndOutputFromMetaSignature(meta *pb.MetaGraphDef) (*PhotoInput, *Mo
inputDims := (*inputTensor).GetTensorShape().Dim
outputDims := (*outputTensor).GetTensorShape().Dim
if len(inputDims) == 4 && len(outputDims) == 2 {
if inputDims[3].GetSize() != ExpectedChannels {
log.Warnf("tensorflow: skipping signature %v because channels are expected to be %d, have %d",
k, ExpectedChannels, inputDims[3].GetSize())
}
if len(inputDims) == 4 &&
inputDims[3].GetSize() == ExpectedChannels &&
len(outputDims) == 2 {
var err error
var inputIdx, outputIdx = 0, 0
@@ -206,7 +213,6 @@ func GetInputAndOutputFromMetaSignature(meta *pb.MetaGraphDef) (*PhotoInput, *Mo
OutputIndex: inputIdx,
Height: inputDims[1].GetSize(),
Width: inputDims[2].GetSize(),
Channels: inputDims[3].GetSize(),
}, &ModelOutput{
Name: outputName,
OutputIndex: outputIdx,

View File

@@ -28,13 +28,15 @@ func GuessInputAndOutput(model *tf.SavedModel) (input *PhotoInput, output *Model
modelOps := model.Graph.Operations()
for i := range modelOps {
if strings.HasPrefix(modelOps[i].Type(), "Placeholder") && modelOps[i].NumOutputs() == 1 && modelOps[i].Output(0).Shape().NumDimensions() == 4 {
if strings.HasPrefix(modelOps[i].Type(), "Placeholder") &&
modelOps[i].NumOutputs() == 1 &&
modelOps[i].Output(0).Shape().NumDimensions() == 4 &&
modelOps[i].Output(0).Shape().Size(3) == ExpectedChannels { // check the channels are 3
shape := modelOps[i].Output(0).Shape()
input = &PhotoInput{
Name: modelOps[i].Name(),
Height: shape.Size(1),
Width: shape.Size(2),
Channels: shape.Size(3),
Name: modelOps[i].Name(),
Height: shape.Size(1),
Width: shape.Size(2),
}
} else if (modelOps[i].Type() == "Softmax" || strings.HasPrefix(modelOps[i].Type(), "StatefulPartitionedCall")) &&
modelOps[i].NumOutputs() == 1 && modelOps[i].Output(0).Shape().NumDimensions() == 2 {
@@ -57,7 +59,7 @@ func GetInputAndOutputFromSavedModel(model *tf.SavedModel) (*PhotoInput, *ModelO
return nil, nil, fmt.Errorf("GetInputAndOutputFromSavedModel: nil input")
}
for _, v := range model.Signatures {
for k, v := range model.Signatures {
inputs := v.Inputs
outputs := v.Outputs
@@ -66,7 +68,13 @@ func GetInputAndOutputFromSavedModel(model *tf.SavedModel) (*PhotoInput, *ModelO
outputVarName, outputTensor := GetOne(outputs)
if inputTensor != nil && outputTensor != nil {
if inputTensor.Shape.Size(3) != ExpectedChannels {
log.Warnf("tensorflow: skipping signature %v because channels are expected to be %d, have %d",
k, ExpectedChannels, inputTensor.Shape.Size(3))
}
if inputTensor.Shape.NumDimensions() == 4 &&
inputTensor.Shape.Size(3) == ExpectedChannels &&
outputTensor.Shape.NumDimensions() == 2 {
var inputIdx, outputIdx = 0, 0
var err error
@@ -92,7 +100,6 @@ func GetInputAndOutputFromSavedModel(model *tf.SavedModel) (*PhotoInput, *ModelO
OutputIndex: inputIdx,
Height: inputTensor.Shape.Size(1),
Width: inputTensor.Shape.Size(2),
Channels: inputTensor.Shape.Size(3),
}, &ModelOutput{
Name: outputName,
OutputIndex: outputIdx,

View File

@@ -128,7 +128,6 @@ func (m *Model) ClassifyModel() *classify.Model {
}
m.Meta.Input.SetResolution(m.Resolution)
m.Meta.Input.Channels = 3
// Try to load custom model based on the configuration values.
defaultPath := filepath.Join(AssetsPath, "nasnet")
@@ -247,7 +246,6 @@ func (m *Model) NsfwModel() *nsfw.Model {
}
m.Meta.Input.SetResolution(m.Resolution)
m.Meta.Input.Channels = 3
if m.Meta == nil {
m.Meta = &tensorflow.ModelInfo{}

View File

@@ -23,7 +23,6 @@ var (
Start: -1.0,
End: 1.0,
},
Channels: 3,
OutputIndex: 0,
},
Output: &tensorflow.ModelOutput{
@@ -46,7 +45,6 @@ var (
Name: "input_tensor",
Height: 224,
Width: 224,
Channels: 3,
OutputIndex: 0,
},
Output: &tensorflow.ModelOutput{
@@ -69,7 +67,6 @@ var (
Name: "input",
Height: 160,
Width: 160,
Channels: 3,
OutputIndex: 0,
},
Output: &tensorflow.ModelOutput{