Tensorflow Interpreter throwing error for "data count" iOS - ios

I am using TensorFlowLiteSwift and the model I'm working with is responsible for flattening an image when the image is cropped in a trapezoidal shape.
Now, Tensorflow does not provide much of a documentation. So, I have been trying to implement things from their example projects.
But here is the catch, it throws error saying "Provided data count must match the required count" and the required count is 4. I backtracked the byteCount in Interpreter.swift but could not find the actual setter.
So, is the .tflite model responsible for the "required count?" And if no, then how does this get set?
Here is a chunk of code I think would help understanding my problem:
/// Performs image preprocessing, invokes the `Interpreter`, and processes the inference results.
func runModel(on item: ImageProcessInfo) -> UIImage? {
let rgbData = item.resizedImage.scaledData(with: CGSize(width: 1000, height: 900),
byteCount: inputWidth * inputHeight
* batchSize,
isQuantized: false)
var corner = item.corners.map { $0.map { p -> (Float, Float) in
return (Float(p.x), Float(p.y))
} }
var item = item
guard let height = NSMutableData(capacity: 0) else { return nil }
height.append(&item.originalHeight, length: 4)
guard let width = NSMutableData(capacity: 0) else { return nil }
width.append(&item.originalWidth, length: 4)
guard let corners = NSMutableData(capacity: 0) else { return nil }
corners.append(&corner, length: 4)
do {
try interpreter.copy(rgbData!, toInputAt: 0)
try interpreter.copy(height as Data, toInputAt: 1)
try interpreter.copy(width as Data, toInputAt: 2)
try interpreter.copy(corners as Data, toInputAt: 3)
try interpreter.invoke()
let outputTensor1 = try self.interpreter.output(at: 0)
guard let cgImage = postprocessImageData(data: outputTensor1.data, size: CGSize(width: 1000, height: 900)) else {
return nil
}
let outputImage = UIImage(cgImage: cgImage)
return outputImage
} catch {
dump(error)
return nil
}
}
extension UIImage {
func scaledData(with size: CGSize, byteCount: Int, isQuantized: Bool) -> Data? {
guard let cgImage = self.cgImage, cgImage.width > 0, cgImage.height > 0 else { return nil }
guard let imageData = imageData(from: cgImage, with: size) else { return nil }
var scaledBytes = [UInt8](repeating: 0, count: byteCount)
var index = 0
for component in imageData.enumerated() {
let offset = component.offset
let isAlphaComponent = (offset % 4)
== 3
guard !isAlphaComponent else { continue }
scaledBytes[index] = component.element
index += 1
}
if isQuantized { return Data(scaledBytes) }
let scaledFloats = scaledBytes.map { (Float32($0) - 127.5) / 127.5 }
return Data(copyingBufferOf: scaledFloats)
}
private func imageData(from cgImage: CGImage, with size: CGSize) -> Data? {
let bitmapInfo = CGBitmapInfo(
rawValue: CGBitmapInfo.byteOrder32Big.rawValue | CGImageAlphaInfo.premultipliedLast.rawValue
)
let width = Int(size.width)
let scaledBytesPerRow = (cgImage.bytesPerRow / cgImage.width) * width
guard let context = CGContext(
data: nil,
width: width,
height: Int(size.height),
bitsPerComponent: cgImage.bitsPerComponent,
bytesPerRow: scaledBytesPerRow,
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: bitmapInfo.rawValue)
else {
return nil
}
context.draw(cgImage, in: CGRect(origin: .zero, size: size))
return context.makeImage()?.dataProvider?.data as Data?
}
}
#discardableResult
public func copy(_ data: Data, toInputAt index: Int) throws -> Tensor {
let maxIndex = inputTensorCount - 1
guard case 0...maxIndex = index else {
throw InterpreterError.invalidTensorIndex(index: index, maxIndex: maxIndex)
}
guard let cTensor = TfLiteInterpreterGetInputTensor(cInterpreter, Int32(index)) else {
throw InterpreterError.allocateTensorsRequired
}
/* Error here */
let byteCount = TfLiteTensorByteSize(cTensor)
guard data.count == byteCount else {
throw InterpreterError.invalidTensorDataCount(provided: data.count, required: byteCount)
}
#if swift(>=5.0)
let status = data.withUnsafeBytes {
TfLiteTensorCopyFromBuffer(cTensor, $0.baseAddress, data.count)
}
#else
let status = data.withUnsafeBytes { TfLiteTensorCopyFromBuffer(cTensor, $0, data.count) }
#endif // swift(>=5.0)
guard status == kTfLiteOk else { throw InterpreterError.failedToCopyDataToInputTensor }
return try input(at: index)
}

What are the input shapes? Can you identify which one is complaining about the size?
At the first glance, corners.append(&corner, length: 4) seems weird - does corners contain only 1 Float (byte size 4)?
The byteCount for a tensor is filled by underlying C API, and simply returns tensor->bytes for underlying TfLiteTensor struct that is filled in the model loading stage.

Related

Having trouble with input image with iOS Swift TensorFlowLite Image Classification Model?

I've been trying to add a plant recognition classifier to my app through a Firebase cloud-hosted ML model, and I've gotten close - problem is, I'm pretty sure I'm messing up the input for the image data somewhere along the way. My classifier is churning out nonsense probabilities/results based on this classifier's output, and I've been testing the same classifier through a python script which is giving me accurate results.
The input for the model requires a 224x224 image with 3 channels scaled to 0,1. I've done all this but can't seem to figure out the CGImage through the Camera/ImagePicker. Here is the bit of the code that processes the input for the image:
if let imageData = info[.originalImage] as? UIImage {
DispatchQueue.main.async {
let resizedImage = imageData.scaledImage(with: CGSize(width:224, height:224))
let ciImage = CIImage(image: resizedImage!)
let CGcontext = CIContext(options: nil)
let image : CGImage = CGcontext.createCGImage(ciImage!, from: ciImage!.extent)!
guard let context = CGContext(
data: nil,
width: image.width, height: image.height,
bitsPerComponent: 8, bytesPerRow: image.width * 4,
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue
) else {
return
}
context.draw(image, in: CGRect(x: 0, y: 0, width: image.width, height: image.height))
guard let imageData = context.data else { return }
print("Image data showing as: \(imageData)")
var inputData = Data()
do {
for row in 0 ..< 224 {
for col in 0 ..< 224 {
let offset = 4 * (row * context.width + col)
// (Ignore offset 0, the unused alpha channel)
let red = imageData.load(fromByteOffset: offset+1, as: UInt8.self)
let green = imageData.load(fromByteOffset: offset+2, as: UInt8.self)
let blue = imageData.load(fromByteOffset: offset+3, as: UInt8.self)
// Normalize channel values to [0.0, 1.0].
var normalizedRed = Float32(red) / 255.0
var normalizedGreen = Float32(green) / 255.0
var normalizedBlue = Float32(blue) / 255.0
// Append normalized values to Data object in RGB order.
let elementSize = MemoryLayout.size(ofValue: normalizedRed)
var bytes = [UInt8](repeating: 0, count: elementSize)
memcpy(&bytes, &normalizedRed, elementSize)
inputData.append(&bytes, count: elementSize)
memcpy(&bytes, &normalizedGreen, elementSize)
inputData.append(&bytes, count: elementSize)
memcpy(&bytes, &normalizedBlue, elementSize)
inputData.append(&bytes, count: elementSize)
}
}
print("Successfully added inputData")
self.parent.invokeInterpreter(inputData: inputData)
} catch let error {
print("Failed to add input: \(error)")
}
}
}
Afterwards, I process the inputData with the following:
func invokeInterpreter(inputData: Data) {
do {
var interpreter = try Interpreter(modelPath: ProfileUserData.sharedUserData.modelPath)
var labels: [String] = []
try interpreter.allocateTensors()
try interpreter.copy(inputData, toInputAt: 0)
try interpreter.invoke()
let output = try interpreter.output(at: 0)
switch output.dataType {
case .uInt8:
guard let quantization = output.quantizationParameters else {
print("No results returned because the quantization values for the output tensor are nil.")
return
}
let quantizedResults = [UInt8](output.data)
let results = quantizedResults.map {
quantization.scale * Float(Int($0) - quantization.zeroPoint)
}
let sum = results.reduce(0, +)
print("Sum of all dequantized results is: \(sum)")
print("Count of dequantized results is: \(results.indices.count)")
let filename = "plantLabels"
let fileExtension = "csv"
guard let labelPath = Bundle.main.url(forResource: filename, withExtension: fileExtension) else {
print("Labels file not found in bundle. Please check labels file.")
return
}
do {
let contents = try String(contentsOf: labelPath, encoding: .utf8)
labels = contents.components(separatedBy: .newlines)
print("Count of label rows is: \(labels.indices.count)")
} catch {
fatalError("Labels file named \(filename).\(fileExtension) cannot be read. Please add a " +
"valid labels file and try again.")
}
let zippedResults = zip(labels.indices, results)
// Sort the zipped results by confidence value in descending order.
let sortedResults = zippedResults.sorted { $0.1 > $1.1 }.prefix(3)
print("Printing sortedResults: \(sortedResults)")
case .float32:
print("Output tensor data type [Float32] is unsupported for this model.")
default:
print("Output tensor data type \(output.dataType) is unsupported for this model.")
return
}
} catch {
//Error with interpreter
print("Error with running interpreter: \(error.localizedDescription)")
}
}

Issue applying a Shader after a MPSImageLanczosScale on Apple Metal

I'm having weird result when I apply a shader on a MTLTexture after applying a MPSImageLanczosScale.
Even if the transform as scale = 1 and translationX = 0 and translationY = 0.
It's working well if I don't apply the MPSImageLanczosScale. Below you can see the result without and with applying the MPSImageLanczosScale.
My render method look like this:
func filter(pixelBuffer: CVPixelBuffer) -> CVPixelBuffer? {
guard let commandQueue = commandQueue, var commandBuffer = commandQueue.makeCommandBuffer() else {
print("Failed to create Metal command queue")
CVMetalTextureCacheFlush(textureCache!, 0)
return nil
}
var newPixelBuffer: CVPixelBuffer?
CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, outputPixelBufferPool!, &newPixelBuffer)
guard var outputPixelBuffer = newPixelBuffer else {
print("Allocation failure: Could not get pixel buffer from pool (\(self.description))")
return nil
}
guard let inputTexture = makeTextureFromCVPixelBuffer(pixelBuffer: pixelBuffer, textureFormat: .bgra8Unorm) else {
return nil
}
guard var intermediateTexture = makeTextureFromCVPixelBuffer(pixelBuffer: outputPixelBuffer, textureFormat: .bgra8Unorm) else {
return nil
}
let imageLanczosScale = MPSImageLanczosScale(device: metalDevice)
let transform = MPSScaleTransform(scaleX: Double(scale), scaleY: Double(scale), translateX: Double(translationX), translateY: Double(translationY))
withUnsafePointer(to: &transform) { (transformPtr: UnsafePointer<MPSScaleTransform>) -> () in
imageLanczosScale.scaleTransform = transformPtr
}
imageLanczosScale.encode(commandBuffer: commandBuffer, sourceTexture: inputTexture, destinationTexture: outputTexture)
guard let commandEncoder = commandBuffer.makeComputeCommandEncoder(),
let outputTexture = makeTextureFromCVPixelBuffer(pixelBuffer: outputPixelBuffer, textureFormat: .bgra8Unorm) else { return nil }
commandEncoder.label = "Shader"
commandEncoder.setComputePipelineState(shaderPipline)
commandEncoder.setTexture(intermediateTexture, index: 1)
commandEncoder.setTexture(outputTexture, index: 0)
let w = shaderPipline.threadExecutionWidth
let h = shaderPipline.maxTotalThreadsPerThreadgroup / w
let threadsPerThreadgroup = MTLSizeMake(w, h, 1)
let threadgroupsPerGrid = MTLSize(width: (intermediateTexture.width + w - 1) / w, height: (intermediateTexture.height + h - 1) / h, depth: 1)
commandEncoder.dispatchThreadgroups(threadgroupsPerGrid, threadsPerThreadgroup: threadsPerThreadgroup)
commandEncoder.endEncoding()
commandBuffer.commit()
return outputPixelBuffer
}
No idea what im doing wrong. any ideas?

Tensorflow lite image output different between python and iOS/Android

I convert Keras model to TF lite the output dimension is (1, 256, 256, 1).
the result on python is correct, but when I try to construct the image on ios swift the result is wrong.
Here is the code, that I use to construct an UIImage from a list of output.
// helper function
---------------------------------------
// MARK: - Extensions
extension Data {
init<T>(copyingBufferOf array: [T]) {
self = array.withUnsafeBufferPointer(Data.init)
}
/// Convert a Data instance to Array representation.
func toArray<T>(type: T.Type) -> [T] where T: ExpressibleByIntegerLiteral {
var array = [T](repeating: 0, count: self.count/MemoryLayout<T>.stride)
_ = array.withUnsafeMutableBytes { copyBytes(to: $0) }
return array
}
}
func imageFromSRGBColorArray(pixels: [UInt32], width: Int, height: Int) -> UIImage?
{
guard width > 0 && height > 0 else { return nil }
guard pixels.count == width * height else { return nil }
// Make a mutable copy
var data = pixels
// Convert array of pixels to a CGImage instance.
let cgImage = data.withUnsafeMutableBytes { (ptr) -> CGImage in
let ctx = CGContext(
data: ptr.baseAddress,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: MemoryLayout<UInt32>.size * width,
space: CGColorSpace(name: CGColorSpace.sRGB)!,
bitmapInfo: CGBitmapInfo.byteOrder32Little.rawValue
+ CGImageAlphaInfo.premultipliedFirst.rawValue
)!
return ctx.makeImage()!
}
// Convert the CGImage instance to an UIImage instance.
return UIImage(cgImage: cgImage)
}
let results = outputTensor.data.toArray(type: UInt32.self)
let maskImage = imageFromSRGBColorArray(pixels: results, width: 256, height: 256)
the result I get is completely wrong compared to python.
I think the function imageFromSRGBColorArray is not correct.
can anyone help me to figure out the problem?

Crash specialized String.imageSize(),

I have a crash in this extension method of String:
func imageSize() -> CGSize {
// self = "https://s3-eu-west-1.amazonaws.com/mimg.haraj.com.sa/userfiles30/2018-8-6/524x334-1_-E7VSb5T20mOouX.jpg"
var width = 0
var height = 0
let split0 = self.split(separator: "/")
if split0.count > 0 {
let split1 = split0.last?.split(separator: "-")
if (split1?.count)! > 0 {
let split2 = split1?.first?.decomposedStringWithCanonicalMapping.split(separator: "x")
width = (split2?.first?.decomposedStringWithCanonicalMapping.toInt())!
if (split2?.count)! > 1 {
// let split2 = split1![1].decomposedStringWithCanonicalMapping.split(separator: "-")
height = (split2?.last?.decomposedStringWithCanonicalMapping.toInt())!
}
}
}
return CGSize(width: width, height: height)
}
The crash is on line return CGSize(width: width, height: height)
I have created an NSString version like this to use the same above method:
#objc extension NSString {
func imageSize1() -> CGSize {
return (self as String).imageSize()
}
}
This is then called from obj-c code:
CGSize imageSize = [url imageSize1];
Examples of url are:
https://s3-eu-west-1.amazonaws.com/mimg.haraj.com.sa/userfiles30/2019-02-07/675x900-1_-CdC62Y2hcV7208.jpg
https://s3-eu-west-1.amazonaws.com/mimg.haraj.com.sa/userfiles30/2019-02-07/675x900-1_-697e3no8ec2E1I.jpg
https://s3-eu-west-1.amazonaws.com/mimg.haraj.com.sa/userfiles30/2019-02-07/675x900-1_-8Af5D20wh9b62z.jpg
What this imageSize() method does is that it parses the image size from the url. The urls above contain the sizes 675x900 -> widthxheight.
In rare case we encounter a url where there is no information of the size and the url is not in the format above. So if the size is not found CGSize = (0 , 0) is returned.
I have tested this method on all expected scenarios.
But due to some reasons the method is causing crashes. May be I missed/messed something.
Here is the link to Crashlytics issue.
Any help would be appreciated.
Try don't use force unwrap !
let exampleString1 = "https://s3-eu-west-1.amazonaws.com/mimg.haraj.com.sa/userfiles30/2018-8-6/524x334-1_-E7VSb5T20mOouX.jpg"
let exampleString2 = "https://s3-eu-west-1.amazonaws.com/mimg.haraj.com.sa/userfiles30/2019-02-07/675x900-1_-697e3no8ec2E1I.jpg"
let exampleString3 = "https://s3-eu-west-1.amazonaws.com/mimg.haraj.com.sa/userfiles30/2019-02-07/675x900-1_-CdC62Y2hcV7208.jpg"
extension String {
func imageSize() -> CGSize? {
// last url component
guard let imageName = self.split(separator: "/").last else { return nil }
guard let imageSizeString = imageName.split(separator: "-").first else { return nil }
let sizes = imageSizeString.split(separator: "x")
guard let first = sizes.first,
let last = sizes.last,
let wight = Int(String(first)),
let height = Int(String(last))
else { return nil }
return CGSize(width: wight, height: height)
}
}
exampleString1.imageSize() // Optional((524.0, 334.0))
exampleString2.imageSize() // Optional((675.0, 900.0))
exampleString3.imageSize() // Optional((675.0, 900.0))
Also try to use guard let and return nil if something wrong. For example Url schema can be changed
The crash is most likely due to force unwrapping optionals. There are several cases in your code where you're using it, which will lead to a runtime error if file name in your URL has a different format than you expect. Try
func imageSize() -> CGSize {
// self = "https://s3-eu-west-1.amazonaws.com/mimg.haraj.com.sa/userfiles30/2018-8-6/524x334-1_-E7VSb5T20mOouX.jpg"
var width = 0
var height = 0
let split0 = self.split(separator: "/")
if let split1 = split0.last?.split(separator: "-")
{
if let split2 = split1.first?.decomposedStringWithCanonicalMapping.split(separator: "x")
{
width = (split2.first?.decomposedStringWithCanonicalMapping.toInt()) ?? 0
if split2.count > 1 {
height = (split2.last?.decomposedStringWithCanonicalMapping.toInt()) ?? 0
}
}
}
return CGSize(width: width, height: height)
}

How to convert YUV frames (from OTVideoFrame) to CVPixelBuffer

I need to convert YUV Frames to CVPixelBuffer that I get from OTVideoFrame Class
This class provides an array of planes in the video frame which contains three elements for y,u,v frame each at index 0,1,2.
#property (nonatomic, retain) NSPointerArray *planes
and format of the video frame
#property (nonatomic, retain) OTVideoFormat *format
That contains Properties like width, height, bytesPerRow etc. of the frame
I need to add filter to the image I receive in the form of OTVideoFrame, I have already tried these answers :
How to convert from YUV to CIImage for iOS
Create CVPixelBuffer from YUV with IOSurface backed
These two links have the solutions in Objective-C but I want to do it in swift. One of the answers in second link was in swift but it lacks some information about the YUVFrame struct that the answer has reference to.
The Format that I receive is NV12
Here is what I have been trying to do till now but I don't know how to proceed next :-
/**
* Calcualte the size of each plane from OTVideoFrame.
*
* #param frame The frame to render.
* #return tuple containing three elements for size of each plane
*/
fileprivate func calculatePlaneSize(forFrame frame: OTVideoFrame)
-> (ySize: Int, uSize: Int, vSize: Int){
guard let frameFormat = frame.format
else {
return (0, 0 ,0)
}
let baseSize = Int(frameFormat.imageWidth * frameFormat.imageHeight) * MemoryLayout<GLubyte>.size
return (baseSize, baseSize / 4, baseSize / 4)
}
/**
* Renders a frame to the video renderer.
*
* #param frame The frame to render.
*/
func renderVideoFrame(_ frame: OTVideoFrame) {
let planeSize = calculatePlaneSize(forFrame: frame)
let yPlane = UnsafeMutablePointer<GLubyte>.allocate(capacity: planeSize.ySize)
let uPlane = UnsafeMutablePointer<GLubyte>.allocate(capacity: planeSize.uSize)
let vPlane = UnsafeMutablePointer<GLubyte>.allocate(capacity: planeSize.vSize)
memcpy(yPlane, frame.planes?.pointer(at: 0), planeSize.ySize)
memcpy(uPlane, frame.planes?.pointer(at: 1), planeSize.uSize)
memcpy(vPlane, frame.planes?.pointer(at: 2), planeSize.vSize)
let yStride = frame.format!.bytesPerRow.object(at: 0) as! Int
// multiply chroma strides by 2 as bytesPerRow represents 2x2 subsample
let uStride = frame.format!.bytesPerRow.object(at: 1) as! Int
let vStride = frame.format!.bytesPerRow.object(at: 2) as! Int
let width = frame.format!.imageWidth
let height = frame.format!.imageHeight
var pixelBuffer: CVPixelBuffer? = nil
var err: CVReturn;
err = CVPixelBufferCreate(kCFAllocatorDefault, Int(width), Int(height), kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange, nil, &pixelBuffer)
if (err != 0) {
NSLog("Error at CVPixelBufferCreate %d", err)
fatalError()
}
}
Taking Guidance from those two links I tried to create Pixel buffer but I got stuck every time at this point because the conversion of the Objective-C code after this is not similar to what we have in Swift 3.
For those who are looking for a fast solution, I did with swift Accelerate
using vImageConvert_AnyToAny(_:_:_:_:_:) function.
import Foundation
import Accelerate
import UIKit
import OpenTok
class Accelerater{
var infoYpCbCrToARGB = vImage_YpCbCrToARGB()
init() {
_ = configureYpCbCrToARGBInfo()
}
func configureYpCbCrToARGBInfo() -> vImage_Error {
print("Configuring")
var pixelRange = vImage_YpCbCrPixelRange(Yp_bias: 0,
CbCr_bias: 128,
YpRangeMax: 255,
CbCrRangeMax: 255,
YpMax: 255,
YpMin: 1,
CbCrMax: 255,
CbCrMin: 0)
let error = vImageConvert_YpCbCrToARGB_GenerateConversion(
kvImage_YpCbCrToARGBMatrix_ITU_R_601_4!,
&pixelRange,
&infoYpCbCrToARGB,
kvImage420Yp8_Cb8_Cr8,
kvImageARGB8888,
vImage_Flags(kvImagePrintDiagnosticsToConsole))
print("Configration done \(error)")
return error
}
public func convertFrameVImageYUV(toUIImage frame: OTVideoFrame, flag: Bool) -> UIImage {
var result: UIImage? = nil
let width = frame.format?.imageWidth ?? 0
let height = frame.format?.imageHeight ?? 0
var pixelBuffer: CVPixelBuffer? = nil
_ = CVPixelBufferCreate(kCFAllocatorDefault, Int(width), Int(height), kCVPixelFormatType_32BGRA, nil, &pixelBuffer)
_ = convertFrameVImageYUV(frame, to: pixelBuffer)
var ciImage: CIImage? = nil
if let pixelBuffer = pixelBuffer {
ciImage = CIImage(cvPixelBuffer: pixelBuffer)
}
let temporaryContext = CIContext(options: nil)
var uiImage: CGImage? = nil
if let ciImage = ciImage {
uiImage = temporaryContext.createCGImage(ciImage, from: CGRect(x: 0, y: 0, width: CVPixelBufferGetWidth(pixelBuffer!), height: CVPixelBufferGetHeight(pixelBuffer!)))
}
if let uiImage = uiImage {
result = UIImage(cgImage: uiImage)
}
CVPixelBufferUnlockBaseAddress(pixelBuffer!, [])
return result!
}
func convertFrameVImageYUV(_ frame: OTVideoFrame, to pixelBufferRef: CVPixelBuffer?) -> vImage_Error{
let start = CFAbsoluteTimeGetCurrent()
if pixelBufferRef == nil {
print("No PixelBuffer refrance found")
return vImage_Error(kvImageInvalidParameter)
}
let width = frame.format?.imageWidth ?? 0
let height = frame.format?.imageHeight ?? 0
let subsampledWidth = frame.format!.imageWidth/2
let subsampledHeight = frame.format!.imageHeight/2
print("subsample height \(subsampledHeight) \(subsampledWidth)")
let planeSize = calculatePlaneSize(forFrame: frame)
print("ysize : \(planeSize.ySize) \(planeSize.uSize) \(planeSize.vSize)")
let yPlane = UnsafeMutablePointer<GLubyte>.allocate(capacity: planeSize.ySize)
let uPlane = UnsafeMutablePointer<GLubyte>.allocate(capacity: planeSize.uSize)
let vPlane = UnsafeMutablePointer<GLubyte>.allocate(capacity: planeSize.vSize)
memcpy(yPlane, frame.planes?.pointer(at: 0), planeSize.ySize)
memcpy(uPlane, frame.planes?.pointer(at: 1), planeSize.uSize)
memcpy(vPlane, frame.planes?.pointer(at: 2), planeSize.vSize)
let yStride = frame.format!.bytesPerRow.object(at: 0) as! Int
// multiply chroma strides by 2 as bytesPerRow represents 2x2 subsample
let uStride = frame.format!.bytesPerRow.object(at: 1) as! Int
let vStride = frame.format!.bytesPerRow.object(at: 2) as! Int
var yPlaneBuffer = vImage_Buffer(data: yPlane, height: vImagePixelCount(height), width: vImagePixelCount(width), rowBytes: yStride)
var uPlaneBuffer = vImage_Buffer(data: uPlane, height: vImagePixelCount(subsampledHeight), width: vImagePixelCount(subsampledWidth), rowBytes: uStride)
var vPlaneBuffer = vImage_Buffer(data: vPlane, height: vImagePixelCount(subsampledHeight), width: vImagePixelCount(subsampledWidth), rowBytes: vStride)
CVPixelBufferLockBaseAddress(pixelBufferRef!, .readOnly)
let pixelBufferData = CVPixelBufferGetBaseAddress(pixelBufferRef!)
let rowBytes = CVPixelBufferGetBytesPerRow(pixelBufferRef!)
var destinationImageBuffer = vImage_Buffer()
destinationImageBuffer.data = pixelBufferData
destinationImageBuffer.height = vImagePixelCount(height)
destinationImageBuffer.width = vImagePixelCount(width)
destinationImageBuffer.rowBytes = rowBytes
var permuteMap: [UInt8] = [3, 2, 1, 0] // BGRA
let convertError = vImageConvert_420Yp8_Cb8_Cr8ToARGB8888(&yPlaneBuffer, &uPlaneBuffer, &vPlaneBuffer, &destinationImageBuffer, &infoYpCbCrToARGB, &permuteMap, 255, vImage_Flags(kvImagePrintDiagnosticsToConsole))
CVPixelBufferUnlockBaseAddress(pixelBufferRef!, [])
yPlane.deallocate()
uPlane.deallocate()
vPlane.deallocate()
let end = CFAbsoluteTimeGetCurrent()
print("Decoding time \((end-start)*1000)")
return convertError
}
fileprivate func calculatePlaneSize(forFrame frame: OTVideoFrame)
-> (ySize: Int, uSize: Int, vSize: Int)
{
guard let frameFormat = frame.format
else {
return (0, 0 ,0)
}
let baseSize = Int(frameFormat.imageWidth * frameFormat.imageHeight) * MemoryLayout<GLubyte>.size
return (baseSize, baseSize / 4, baseSize / 4)
}
}
Performance tested on iPhone7, one frame conversion is less than a millisecond.
Here's what worked for me (I've taken your function and changed it a bit):
func createPixelBufferWithVideoFrame(_ frame: OTVideoFrame) -> CVPixelBuffer? {
if let fLock = frameLock {
fLock.lock()
let planeSize = calculatePlaneSize(forFrame: frame)
let yPlane = UnsafeMutablePointer<GLubyte>.allocate(capacity: planeSize.ySize)
let uPlane = UnsafeMutablePointer<GLubyte>.allocate(capacity: planeSize.uSize)
let vPlane = UnsafeMutablePointer<GLubyte>.allocate(capacity: planeSize.vSize)
memcpy(yPlane, frame.planes?.pointer(at: 0), planeSize.ySize)
memcpy(uPlane, frame.planes?.pointer(at: 1), planeSize.uSize)
memcpy(vPlane, frame.planes?.pointer(at: 2), planeSize.vSize)
let width = frame.format!.imageWidth
let height = frame.format!.imageHeight
var pixelBuffer: CVPixelBuffer? = nil
var err: CVReturn;
err = CVPixelBufferCreate(kCFAllocatorDefault, Int(width), Int(height), kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange, nil, &pixelBuffer)
if (err != 0) {
NSLog("Error at CVPixelBufferCreate %d", err)
return nil
}
if let pixelBuffer = pixelBuffer {
CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
let yPlaneTo = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0)
memcpy(yPlaneTo, yPlane, planeSize.ySize)
let uvRow: Int = planeSize.uSize*2/Int(width)
let halfWidth: Int = Int(width)/2
if let uPlaneTo = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1) {
let uvPlaneTo = uPlaneTo.bindMemory(to: GLubyte.self, capacity: Int(uvRow*halfWidth*2))
for i in 0..<uvRow {
for j in 0..<halfWidth {
let dataIndex: Int = Int(i) * Int(halfWidth) + Int(j)
let uIndex: Int = (i * Int(width)) + Int(j) * 2
let vIndex: Int = uIndex + 1
uvPlaneTo[uIndex] = uPlane[dataIndex]
uvPlaneTo[vIndex] = vPlane[dataIndex]
}
}
}
}
fLock.unlock()
return pixelBuffer
}
return nil
}

Resources