Custom CoreML output layer that sums multiArray output

Please bear with me. I’m new to CoreML and machine learning. I have a CoreML model that I was able to convert from a research paper implementation that used Caffe. It’s a CSRNet, the objective being crowd-counting. After much wrangling, I’m able to load the MLmodel into Python using Coremltools, pre-process an image using Pillow and predict an output. The result is a MultiArray (from a density map), which I’ve then processed further to derive the actual numerical prediction.

How do I add a custom layer as an output to the model that takes the current output and performs the following functionality? I’ve read numerous articles, and am still at a loss. (Essentially, it sums the values all the values in the MultiArray) I’d like to be able to save the model/ layer and import it into Xcode so that the MLModel result is a single numerical value, and not a MultiArray.

This is the code I’m currently using to convert the output from the model into a number (in Python):

# predict output

output = model.predict({'data': img})
summed_output = sum(output.values())
prediction = np.sum(summed_output)
print("prediction: ", prediction)

Full (abbreviated) code:

import coremltools as ct
from PIL import Image
import numpy as np

# instantiate model (CSRNet)
model = ct.models.MLModel('shanghai_b.mlmodel')

# function to resize image
def load_image(path, resize_to=None):
    img = PIL.Image.open(path)
    if resize_to is not None:
        img = img.resize(resize_to, PIL.Image.ANTIALIAS)
    img_np = np.array(img).astype(np.float32)
    return img_np, img

# select image 
image = 'IMG_173.jpg'

#resize image
_, img = load_image(image, resize_to=(900, 675)) 

# predict output
output = model.predict({'data': img})
summed_output = sum(output.values())
prediction = np.sum(summed_output)
print("prediction: ", prediction)

Xcode shows the output for the MLModel as being: “MultiArray (Double 1 x 168 x 225)”. The spec description for the same model as it currently stands when I import it into python using Coremltools is as follows:

<bound method MLModel.predict of input {
  name: "data"
  type {
    imageType {
      width: 224
      height: 224
      colorSpace: RGB
    }
  }
}
output {
  name: "estdmap"
  type {
    multiArrayType {
      dataType: DOUBLE
    }
  }
}
>

Thanks for any help! I’m happy to post any other code in the process if it’s useful.

P.S. I’m adding the code I have inside my Xcode project as a reference as well.

private func detectImage(_ image: CIImage) {

        guard let model = try? VNCoreMLModel(for: HundredsPredictor().model) else {
            fatalError("Loading to CoreML failed") }
        
        let modelRequest = VNCoreMLRequest(model: model) { (request, error) in
            if error != nil {
                print(error?.localizedDescription ?? "Error")
            } else {
                guard let result = request.results as? [VNObservation] else {fatalError("Error")}
                
                if #available(iOS 14.0, *) {
                    print(result)
                    
                    // output: [<VNCoreMLFeatureValueObservation: 0x282069da0> 344A87BC-B13E-4195-922E-7381694C91FF requestRevision=1 confidence=1.000000 timeRange={{0/1 = 0.000}, {0/1 = 0.000}} "density_map" - "MultiArray: Double 1 × 168 × 225 array" (1.000000)]
                    
                } else {
                    // Fallback on earlier versions
                }
                if let firstResult = result.first {
                    print(firstResult)

                    // output: [<VNCoreMLFeatureValueObservation: 0x282069da0> 344A87BC-B13E-4195-922E-7381694C91FF requestRevision=1 confidence=1.000000 timeRange={{0/1 = 0.000}, {0/1 = 0.000}} "density_map" - "MultiArray : Double 1 × 168 × 225 array" (1.000000)]

                }
            }
        }

        let handler = VNImageRequestHandler(ciImage: image)
        do {
            try handler.perform([modelRequest])
            print(handler)
        }
        catch let error as NSError {
            print(error)
        }
    }

Update: Solution

In python:

from helpers import get_nn
# helper file sourced from Matthijs Hollemans github
# url: https://github.com/hollance/coreml-survival-guide/blob/master/Scripts/helpers.py

# load original model
spec = ct.utils.load_spec("HundredsPredictor.mlmodel")

nn = get_nn(spec)

#construct new layer
new_layer = nn.layers.add()
new_layer.name = "summingLayer"

params = ct.proto.NeuralNetwork_pb2.ReduceLayerParams
new_layer.reduce.mode = params.SUM
new_layer.reduce.axis = params.CHW

# append new layer to model
new_layer.output.append(nn.layers[-2].output[0])
nn.layers[-2].output[0] = nn.layers[-2].name + "_output"
new_layer.input.append(nn.layers[-2].output[0])

spec.description.output[0].type.multiArrayType.shape[0] = 1

# save new model
ct.models.utils.save_spec(spec, "HundredPredictorSummed.mlmodel")

In Swift, after importing the new updated model:

private func detectImage(_ image: CIImage) {
        
        guard let model = try? VNCoreMLModel(for: HundredPredictorSummed().model) else {
            fatalError("Loading to CoreML failed") }
        
        let request = VNCoreMLRequest(model: model) { [weak self] request, error in
            guard let results = request.results as? [VNCoreMLFeatureValueObservation],
                  let topResult = results.first else {
                fatalError("Unexpected result type from VNCoreMLRequest")}
            
            DispatchQueue.main.async {

                guard let data = topResult.featureValue.multiArrayValue else { return }

                let ptr = data.dataPointer.assumingMemoryBound(to: Double.self)
                let sum = ptr[0]
                print("SUM: ", sum)
                
                self?.detectLabel.text = "~(String(Int(round(sum)))) ppl"

            }
            
        }
        
        let handler = VNImageRequestHandler(ciImage: image)
        
        DispatchQueue.global(qos: .userInteractive).async {
            do {
                try handler.perform([request])
            } catch {
                print(error)
        
            }
        }
        
    }

Answer

You can add a ReduceSumLayerParams to the end of the model. You’ll need to do this in Python by hand. If you set its reduceAll parameter to true, it will compute the sum over the entire tensor.

However, in my opinion, it’s just as easy to use the model as-is, and in your Swift code grab a pointer to the MLMultiArray’s data and use vDSP.sum(a) to compute the sum.