Copy a CVPixelBuffer on any iOS device - ios

I'm having a great deal of difficulty coming up with code that reliably copies a CVPixelBuffer on any iOS device. My first attempt worked fine until I tried it on an iPad Pro:
extension CVPixelBuffer {
func deepcopy() -> CVPixelBuffer? {
let width = CVPixelBufferGetWidth(self)
let height = CVPixelBufferGetHeight(self)
let format = CVPixelBufferGetPixelFormatType(self)
var pixelBufferCopyOptional:CVPixelBuffer?
CVPixelBufferCreate(nil, width, height, format, nil, &pixelBufferCopyOptional)
if let pixelBufferCopy = pixelBufferCopyOptional {
CVPixelBufferLockBaseAddress(self, kCVPixelBufferLock_ReadOnly)
CVPixelBufferLockBaseAddress(pixelBufferCopy, 0)
let baseAddress = CVPixelBufferGetBaseAddress(self)
let dataSize = CVPixelBufferGetDataSize(self)
let target = CVPixelBufferGetBaseAddress(pixelBufferCopy)
memcpy(target, baseAddress, dataSize)
CVPixelBufferUnlockBaseAddress(pixelBufferCopy, 0)
CVPixelBufferUnlockBaseAddress(self, kCVPixelBufferLock_ReadOnly)
}
return pixelBufferCopyOptional
}
}
The above crashes on an iPad Pro because CVPixelBufferGetDataSize(self) is slightly larger than CVPixelBufferGetDataSize(pixelBufferCopy), so the memcpy writes to unallocated memory.
So I gave up with that and tried this:
func copy() -> CVPixelBuffer?
{
precondition(CFGetTypeID(self) == CVPixelBufferGetTypeID(), "copy() cannot be called on a non-CVPixelBuffer")
var _copy: CVPixelBuffer?
CVPixelBufferCreate(
nil,
CVPixelBufferGetWidth(self),
CVPixelBufferGetHeight(self),
CVPixelBufferGetPixelFormatType(self),
CVBufferGetAttachments(self, .shouldPropagate),
&_copy)
guard let copy = _copy else { return nil }
CVPixelBufferLockBaseAddress(self, .readOnly)
CVPixelBufferLockBaseAddress(copy, [])
defer
{
CVPixelBufferUnlockBaseAddress(copy, [])
CVPixelBufferUnlockBaseAddress(self, .readOnly)
}
for plane in 0 ..< CVPixelBufferGetPlaneCount(self)
{
let dest = CVPixelBufferGetBaseAddressOfPlane(copy, plane)
let source = CVPixelBufferGetBaseAddressOfPlane(self, plane)
let height = CVPixelBufferGetHeightOfPlane(self, plane)
let bytesPerRow = CVPixelBufferGetBytesPerRowOfPlane(self, plane)
memcpy(dest, source, height * bytesPerRow)
}
return copy
}
That works on both my test devices, but it's just reached actual customers and it turns out it crashes on the iPad 6 (and only that device so far). It's an EXC_BAD_ACCESS on the call to memcpy() again.
Seems crazy that there isn't a simple API call for this given how hard it seems to be to make it work reliably. Or am I make it harder than it needs to be? Thanks for any advice!

This questions and answer combo is solid gold. Let me add value with a slight refactor and some control flow to account for CVPixelBuffers that do not have planes.
public extension CVPixelBuffer {
func copy() throws -> CVPixelBuffer {
precondition(CFGetTypeID(self) == CVPixelBufferGetTypeID(), "copy() cannot be called on a non-CVPixelBuffer")
var _copy: CVPixelBuffer?
let width = CVPixelBufferGetWidth(self)
let height = CVPixelBufferGetHeight(self)
let formatType = CVPixelBufferGetPixelFormatType(self)
let attachments = CVBufferGetAttachments(self, .shouldPropagate)
CVPixelBufferCreate(nil, width, height, formatType, attachments, &_copy)
guard let copy = _copy else {
throw PixelBufferCopyError.allocationFailed
}
CVPixelBufferLockBaseAddress(self, .readOnly)
CVPixelBufferLockBaseAddress(copy, [])
defer {
CVPixelBufferUnlockBaseAddress(copy, [])
CVPixelBufferUnlockBaseAddress(self, .readOnly)
}
let pixelBufferPlaneCount: Int = CVPixelBufferGetPlaneCount(self)
if pixelBufferPlaneCount == 0 {
let dest = CVPixelBufferGetBaseAddress(copy)
let source = CVPixelBufferGetBaseAddress(self)
let height = CVPixelBufferGetHeight(self)
let bytesPerRowSrc = CVPixelBufferGetBytesPerRow(self)
let bytesPerRowDest = CVPixelBufferGetBytesPerRow(copy)
if bytesPerRowSrc == bytesPerRowDest {
memcpy(dest, source, height * bytesPerRowSrc)
}else {
var startOfRowSrc = source
var startOfRowDest = dest
for _ in 0..<height {
memcpy(startOfRowDest, startOfRowSrc, min(bytesPerRowSrc, bytesPerRowDest))
startOfRowSrc = startOfRowSrc?.advanced(by: bytesPerRowSrc)
startOfRowDest = startOfRowDest?.advanced(by: bytesPerRowDest)
}
}
}else {
for plane in 0 ..< pixelBufferPlaneCount {
let dest = CVPixelBufferGetBaseAddressOfPlane(copy, plane)
let source = CVPixelBufferGetBaseAddressOfPlane(self, plane)
let height = CVPixelBufferGetHeightOfPlane(self, plane)
let bytesPerRowSrc = CVPixelBufferGetBytesPerRowOfPlane(self, plane)
let bytesPerRowDest = CVPixelBufferGetBytesPerRowOfPlane(copy, plane)
if bytesPerRowSrc == bytesPerRowDest {
memcpy(dest, source, height * bytesPerRowSrc)
}else {
var startOfRowSrc = source
var startOfRowDest = dest
for _ in 0..<height {
memcpy(startOfRowDest, startOfRowSrc, min(bytesPerRowSrc, bytesPerRowDest))
startOfRowSrc = startOfRowSrc?.advanced(by: bytesPerRowSrc)
startOfRowDest = startOfRowDest?.advanced(by: bytesPerRowDest)
}
}
}
}
return copy
}
}
Valid with Swift 5. To provide a little more background... There are many formats that AVCaptureVideoDataOutput .videoSettings property can take. Not all of them have planes especially ones that ML Models might need.

The second implementation looks quite solid. The only problem I can imagine is that a plane in the new pixel buffer is allocated with a different stride length (bytes per row). The stride length is based on width × (bytes per pixel) and then rounded up in an unspecified way to achieve optimal memory access.
So check if:
CVPixelBufferGetBytesPerRowOfPlane(self, plane) == CVPixelBufferGetBytesPerRowOfPlane(copy, plane
If not, copy the pixel plane row by row:
for plane in 0 ..< CVPixelBufferGetPlaneCount(self)
{
let dest = CVPixelBufferGetBaseAddressOfPlane(copy, plane)
let source = CVPixelBufferGetBaseAddressOfPlane(self, plane)
let height = CVPixelBufferGetHeightOfPlane(self, plane)
let bytesPerRowSrc = CVPixelBufferGetBytesPerRowOfPlane(self, plane)
let bytesPerRowDest = CVPixelBufferGetBytesPerRowOfPlane(copy, plane)
if bytesPerRowSrc == bytesPerRowDest {
memcpy(dest, source, height * bytesPerRowSrc)
} else {
var startOfRowSrc = source
var startOfRowDest = dest
for _ in 0..<height {
memcpy(startOfRowDest, startOfRowSrc, min(bytesPerRowSrc, bytesPerRowDest))
startOfRowSrc += bytesPerRowSrc
startOfRowDest += bytesPerRowDest
}
}
}

Related

How do I get the distance of a specific coordinate from the screen size depthMap of ARDepthData?

I am trying to get the distance of a specific coordinate from a depthMap resized to the screen size, but it is not working.
I have tried to implement the following steps.
convert the depthMap to CIImage, and then resize the image to the orientation and size of the screen using affine transformation
convert the converted image to a screen-sized CVPixelBuffer
get the distance in meters stored in CVPixelBuffer from a one-dimensional array by width * y + x when getting the coordinates of (x, y).
I have implemented the above procedure, but I cannot get the appropriate index from the one-dimensional array. What should I do?
The code for the procedure is shown below.
1.
let depthMap = depthData.depthMap
// convert the depthMap to CIImage
let image = CIImage(cvPixelBuffer: depthMap)
let imageSize = CGSize(width: depthMap.width, height: depthMap.height)
// 1) キャプチャ画像を 0.0〜1.0 の座標に変換
let normalizeTransform = CGAffineTransform(scaleX: 1.0/imageSize.width, y: 1.0/imageSize.height)
// 2) 「Flip the Y axis (for some mysterious reason this is only necessary in portrait mode)」とのことでポートレートの場合に座標変換。
// Y軸だけでなくX軸も反転が必要。
let interfaceOrientation = self.arView.window!.windowScene!.interfaceOrientation
let flipTransform = (interfaceOrientation.isPortrait) ? CGAffineTransform(scaleX: -1, y: -1).translatedBy(x: -1, y: -1) : .identity
// 3) キャプチャ画像上でのスクリーンの向き・位置に移動
let displayTransform = frame.displayTransform(for: interfaceOrientation, viewportSize: arView.bounds.size)
// 4) 0.0〜1.0 の座標系からスクリーンの座標系に変換
let toViewPortTransform = CGAffineTransform(scaleX: arView.bounds.size.width, y: arView.bounds.size.height)
// 5) 1〜4までの変換を行い、変換後の画像をスクリーンサイズでクリップ
let transformedImage = image.transformed(by: normalizeTransform.concatenating(flipTransform).concatenating(displayTransform).concatenating(toViewPortTransform)).cropped(to: arView.bounds)
// convert the converted image to a screen-sized CVPixelBuffer
if let convertDepthMap = transformedImage.pixelBuffer(cgSize: arView.bounds.size) {
previewImage.image = transformedImage.toUIImage()
DispatchQueue.main.async {
self.processDepthData(convertDepthMap)
}
}
// The process of acquiring CVPixelBuffer is implemented in extension
extension CIImage {
func toUIImage() -> UIImage {
UIImage(ciImage: self)
}
func pixelBuffer(cgSize size:CGSize) -> CVPixelBuffer? {
var pixelBuffer: CVPixelBuffer?
let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue,
kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] as CFDictionary
let width:Int = Int(size.width)
let height:Int = Int(size.height)
CVPixelBufferCreate(kCFAllocatorDefault,
width,
height,
kCVPixelFormatType_DepthFloat32,
attrs,
&pixelBuffer)
// put bytes into pixelBuffer
let context = CIContext()
context.render(self, to: pixelBuffer!)
return pixelBuffer
}
}
private func processDepthData(_ depthMap: CVPixelBuffer) {
CVPixelBufferLockBaseAddress(depthMap, .readOnly)
let width = CVPixelBufferGetWidth(depthMap)
let height = CVPixelBufferGetHeight(depthMap)
if let baseAddress = CVPixelBufferGetBaseAddress(depthMap) {
let mutablePointer = baseAddress.bindMemory(to: Float32.self, capacity: width*height)
let bufferPointer = UnsafeBufferPointer(start: mutablePointer, count: width*height)
let depthArray = Array(bufferPointer)
CVPixelBufferUnlockBaseAddress(depthMap, .readOnly)
// index = width * y + x to trying to get the distance in meters for the coordinate of (300, 100), but it gets the distance for another coordinate
print(depthArray[width * 100 + 300])
}
}

My custom metal image filter is slow. How can I make it faster?

I've seen a lot of other's online tutorial that are able to achieve 0.0X seconds mark on filtering an image. Meanwhile my code here took 1.09 seconds to filter an image.(Just to reduce brightness by half).
edit after first comment
time measured with 2 methods
Date() timeinterval , when the button “apply filter” tapped and after the apply filter function is done running
build it on iphone and count manually with my timer on my watch
Since I'm new to metal & kernel stuff, I don't really know the difference between my code and those tutorials that achieve faster result. Which part of my code can be improved/ use different approach to make it a lot faster.
here's my kernel code
#include <metal_stdlib>
using namespace metal;
kernel void black(
texture2d<float, access::write> outTexture [[texture(0)]],
texture2d<float, access::read> inTexture [[texture(1)]],
uint2 id [[thread_position_in_grid]]) {
float3 val = inTexture.read(id).rgb;
float r = val.r / 4;
float g = val.g / 4;
float b = val.b / 2;
float4 out = float4(r, g, b, 1.0);
outTexture.write(out.rgba, id);
}
this is my swift code
import Metal
import MetalKit
// UIImage -> CGImage -> MTLTexture -> COMPUTE HAPPENS |
// UIImage <- CGImage <- MTLTexture <--
class Filter {
var device: MTLDevice
var defaultLib: MTLLibrary?
var grayscaleShader: MTLFunction?
var commandQueue: MTLCommandQueue?
var commandBuffer: MTLCommandBuffer?
var commandEncoder: MTLComputeCommandEncoder?
var pipelineState: MTLComputePipelineState?
var inputImage: UIImage
var height, width: Int
// most devices have a limit of 512 threads per group
let threadsPerBlock = MTLSize(width: 32, height: 32, depth: 1)
init(){
print("initialized")
self.device = MTLCreateSystemDefaultDevice()!
print(device)
//changes: I did do catch try, and use bundle parameter when making make default library
let frameworkBundle = Bundle(for: type(of: self))
print(frameworkBundle)
self.defaultLib = device.makeDefaultLibrary()
self.grayscaleShader = defaultLib?.makeFunction(name: "black")
self.commandQueue = self.device.makeCommandQueue()
self.commandBuffer = self.commandQueue?.makeCommandBuffer()
self.commandEncoder = self.commandBuffer?.makeComputeCommandEncoder()
//ERROR HERE
if let shader = grayscaleShader {
print("in")
self.pipelineState = try? self.device.makeComputePipelineState(function: shader)
} else { fatalError("unable to make compute pipeline") }
self.inputImage = UIImage(named: "stockImage")!
self.height = Int(self.inputImage.size.height)
self.width = Int(self.inputImage.size.width)
}
func getCGImage(from uiimg: UIImage) -> CGImage? {
UIGraphicsBeginImageContext(uiimg.size)
uiimg.draw(in: CGRect(origin: .zero, size: uiimg.size))
let contextImage = UIGraphicsGetImageFromCurrentImageContext()
UIGraphicsEndImageContext()
return contextImage?.cgImage
}
func getMTLTexture(from cgimg: CGImage) -> MTLTexture {
let textureLoader = MTKTextureLoader(device: self.device)
do{
let texture = try textureLoader.newTexture(cgImage: cgimg, options: nil)
let textureDescriptor = MTLTextureDescriptor.texture2DDescriptor(pixelFormat: texture.pixelFormat, width: width, height: height, mipmapped: false)
textureDescriptor.usage = [.shaderRead, .shaderWrite]
return texture
} catch {
fatalError("Couldn't convert CGImage to MTLtexture")
}
}
func getCGImage(from mtlTexture: MTLTexture) -> CGImage? {
var data = Array<UInt8>(repeatElement(0, count: 4*width*height))
mtlTexture.getBytes(&data,
bytesPerRow: 4*width,
from: MTLRegionMake2D(0, 0, width, height),
mipmapLevel: 0)
let bitmapInfo = CGBitmapInfo(rawValue: (CGBitmapInfo.byteOrder32Big.rawValue | CGImageAlphaInfo.premultipliedLast.rawValue))
let colorSpace = CGColorSpaceCreateDeviceRGB()
let context = CGContext(data: &data,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: 4*width,
space: colorSpace,
bitmapInfo: bitmapInfo.rawValue)
return context?.makeImage()
}
func getUIImage(from cgimg: CGImage) -> UIImage? {
return UIImage(cgImage: cgimg)
}
func getEmptyMTLTexture() -> MTLTexture? {
let textureDescriptor = MTLTextureDescriptor.texture2DDescriptor(
pixelFormat: MTLPixelFormat.rgba8Unorm,
width: width,
height: height,
mipmapped: false)
textureDescriptor.usage = [.shaderRead, .shaderWrite]
return self.device.makeTexture(descriptor: textureDescriptor)
}
func getInputMTLTexture() -> MTLTexture? {
if let inputImage = getCGImage(from: self.inputImage) {
return getMTLTexture(from: inputImage)
}
else { fatalError("Unable to convert Input image to MTLTexture") }
}
func getBlockDimensions() -> MTLSize {
let blockWidth = width / self.threadsPerBlock.width
let blockHeight = height / self.threadsPerBlock.height
return MTLSizeMake(blockWidth, blockHeight, 1)
}
func applyFilter() -> UIImage? {
print("start")
let date = Date()
print(date)
if let encoder = self.commandEncoder, let buffer = self.commandBuffer,
let outputTexture = getEmptyMTLTexture(), let inputTexture = getInputMTLTexture() {
encoder.setTextures([outputTexture, inputTexture], range: 0..<2)
encoder.setComputePipelineState(self.pipelineState!)
encoder.dispatchThreadgroups(self.getBlockDimensions(), threadsPerThreadgroup: threadsPerBlock)
encoder.endEncoding()
buffer.commit()
buffer.waitUntilCompleted()
guard let outputImage = getCGImage(from: outputTexture) else { fatalError("Couldn't obtain CGImage from MTLTexture") }
print("stop")
let date2 = Date()
print(date2.timeIntervalSince(date))
return getUIImage(from: outputImage)
} else { fatalError("optional unwrapping failed") }
}
}
In case someone still need the answer, I found a different approach which is make it as custom CIFilter. It works pretty fast and super easy to undestand!
You using UIImage, CGImage. These objects stored in CPU memory.
Need implement code with using just CIImage or MTLTexture.
These object are storing in GPU memory and have best performace.

Depth map normalization

I am applying CIDepthBlurEffect filter to a PHAsset as follows:
PHImageManager.default().requestImageDataAndOrientation(for: self.asset!, options: imageOptions) { (data, responseString, imageOrientation, info) in
if data != nil {
DispatchQueue.main.async {
if let depthImage = CIImage(data: data!, options: [CIImageOption.auxiliaryDisparity : true])?.oriented(imageOrientation) {
if let newMainImage = CIImage(data: data!)?.oriented(imageOrientation) {
let filter = CIFilter(name : "CIDepthBlurEffect",
parameters: [kCIInputImageKey : newMainImage,
kCIInputDisparityImageKey: depthImage,
"inputAperture" : withRadius])
self.imageView.image = UIImage(ciImage: filter!.outputImage!)
}
}
}
}
This works great most of the time. However, sometimes, the filter blur planes seem to be off. Almost as the Z plane is wrong or shifted.
Does this method require the depth data to be normalized?
Previously, when CVPixelBuffer was used, I was using this extension:
final func normalize() -> CVPixelBuffer {
let width = CVPixelBufferGetWidth(self)
let height = CVPixelBufferGetHeight(self)
CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
let floatBuffer = unsafeBitCast(CVPixelBufferGetBaseAddress(self), to: UnsafeMutablePointer<Float>.self)
var minPixel: Float = 1.0
var maxPixel: Float = 0.0
for y in 0 ..< height {
for x in 0 ..< width {
let pixel = floatBuffer[y * width + x]
minPixel = min(pixel, minPixel)
maxPixel = max(pixel, maxPixel)
}
}
let range = maxPixel - minPixel
for y in 0 ..< height {
for x in 0 ..< width {
let pixel = floatBuffer[y * width + x]
floatBuffer[y * width + x] = (pixel - minPixel) / range
}
}
CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
return self
}
Is there such a way to do this to a CIImage?

Having trouble with input image with iOS Swift TensorFlowLite Image Classification Model?

I've been trying to add a plant recognition classifier to my app through a Firebase cloud-hosted ML model, and I've gotten close - problem is, I'm pretty sure I'm messing up the input for the image data somewhere along the way. My classifier is churning out nonsense probabilities/results based on this classifier's output, and I've been testing the same classifier through a python script which is giving me accurate results.
The input for the model requires a 224x224 image with 3 channels scaled to 0,1. I've done all this but can't seem to figure out the CGImage through the Camera/ImagePicker. Here is the bit of the code that processes the input for the image:
if let imageData = info[.originalImage] as? UIImage {
DispatchQueue.main.async {
let resizedImage = imageData.scaledImage(with: CGSize(width:224, height:224))
let ciImage = CIImage(image: resizedImage!)
let CGcontext = CIContext(options: nil)
let image : CGImage = CGcontext.createCGImage(ciImage!, from: ciImage!.extent)!
guard let context = CGContext(
data: nil,
width: image.width, height: image.height,
bitsPerComponent: 8, bytesPerRow: image.width * 4,
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue
) else {
return
}
context.draw(image, in: CGRect(x: 0, y: 0, width: image.width, height: image.height))
guard let imageData = context.data else { return }
print("Image data showing as: \(imageData)")
var inputData = Data()
do {
for row in 0 ..< 224 {
for col in 0 ..< 224 {
let offset = 4 * (row * context.width + col)
// (Ignore offset 0, the unused alpha channel)
let red = imageData.load(fromByteOffset: offset+1, as: UInt8.self)
let green = imageData.load(fromByteOffset: offset+2, as: UInt8.self)
let blue = imageData.load(fromByteOffset: offset+3, as: UInt8.self)
// Normalize channel values to [0.0, 1.0].
var normalizedRed = Float32(red) / 255.0
var normalizedGreen = Float32(green) / 255.0
var normalizedBlue = Float32(blue) / 255.0
// Append normalized values to Data object in RGB order.
let elementSize = MemoryLayout.size(ofValue: normalizedRed)
var bytes = [UInt8](repeating: 0, count: elementSize)
memcpy(&bytes, &normalizedRed, elementSize)
inputData.append(&bytes, count: elementSize)
memcpy(&bytes, &normalizedGreen, elementSize)
inputData.append(&bytes, count: elementSize)
memcpy(&bytes, &normalizedBlue, elementSize)
inputData.append(&bytes, count: elementSize)
}
}
print("Successfully added inputData")
self.parent.invokeInterpreter(inputData: inputData)
} catch let error {
print("Failed to add input: \(error)")
}
}
}
Afterwards, I process the inputData with the following:
func invokeInterpreter(inputData: Data) {
do {
var interpreter = try Interpreter(modelPath: ProfileUserData.sharedUserData.modelPath)
var labels: [String] = []
try interpreter.allocateTensors()
try interpreter.copy(inputData, toInputAt: 0)
try interpreter.invoke()
let output = try interpreter.output(at: 0)
switch output.dataType {
case .uInt8:
guard let quantization = output.quantizationParameters else {
print("No results returned because the quantization values for the output tensor are nil.")
return
}
let quantizedResults = [UInt8](output.data)
let results = quantizedResults.map {
quantization.scale * Float(Int($0) - quantization.zeroPoint)
}
let sum = results.reduce(0, +)
print("Sum of all dequantized results is: \(sum)")
print("Count of dequantized results is: \(results.indices.count)")
let filename = "plantLabels"
let fileExtension = "csv"
guard let labelPath = Bundle.main.url(forResource: filename, withExtension: fileExtension) else {
print("Labels file not found in bundle. Please check labels file.")
return
}
do {
let contents = try String(contentsOf: labelPath, encoding: .utf8)
labels = contents.components(separatedBy: .newlines)
print("Count of label rows is: \(labels.indices.count)")
} catch {
fatalError("Labels file named \(filename).\(fileExtension) cannot be read. Please add a " +
"valid labels file and try again.")
}
let zippedResults = zip(labels.indices, results)
// Sort the zipped results by confidence value in descending order.
let sortedResults = zippedResults.sorted { $0.1 > $1.1 }.prefix(3)
print("Printing sortedResults: \(sortedResults)")
case .float32:
print("Output tensor data type [Float32] is unsupported for this model.")
default:
print("Output tensor data type \(output.dataType) is unsupported for this model.")
return
}
} catch {
//Error with interpreter
print("Error with running interpreter: \(error.localizedDescription)")
}
}

Issue applying a Shader after a MPSImageLanczosScale on Apple Metal

I'm having weird result when I apply a shader on a MTLTexture after applying a MPSImageLanczosScale.
Even if the transform as scale = 1 and translationX = 0 and translationY = 0.
It's working well if I don't apply the MPSImageLanczosScale. Below you can see the result without and with applying the MPSImageLanczosScale.
My render method look like this:
func filter(pixelBuffer: CVPixelBuffer) -> CVPixelBuffer? {
guard let commandQueue = commandQueue, var commandBuffer = commandQueue.makeCommandBuffer() else {
print("Failed to create Metal command queue")
CVMetalTextureCacheFlush(textureCache!, 0)
return nil
}
var newPixelBuffer: CVPixelBuffer?
CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, outputPixelBufferPool!, &newPixelBuffer)
guard var outputPixelBuffer = newPixelBuffer else {
print("Allocation failure: Could not get pixel buffer from pool (\(self.description))")
return nil
}
guard let inputTexture = makeTextureFromCVPixelBuffer(pixelBuffer: pixelBuffer, textureFormat: .bgra8Unorm) else {
return nil
}
guard var intermediateTexture = makeTextureFromCVPixelBuffer(pixelBuffer: outputPixelBuffer, textureFormat: .bgra8Unorm) else {
return nil
}
let imageLanczosScale = MPSImageLanczosScale(device: metalDevice)
let transform = MPSScaleTransform(scaleX: Double(scale), scaleY: Double(scale), translateX: Double(translationX), translateY: Double(translationY))
withUnsafePointer(to: &transform) { (transformPtr: UnsafePointer<MPSScaleTransform>) -> () in
imageLanczosScale.scaleTransform = transformPtr
}
imageLanczosScale.encode(commandBuffer: commandBuffer, sourceTexture: inputTexture, destinationTexture: outputTexture)
guard let commandEncoder = commandBuffer.makeComputeCommandEncoder(),
let outputTexture = makeTextureFromCVPixelBuffer(pixelBuffer: outputPixelBuffer, textureFormat: .bgra8Unorm) else { return nil }
commandEncoder.label = "Shader"
commandEncoder.setComputePipelineState(shaderPipline)
commandEncoder.setTexture(intermediateTexture, index: 1)
commandEncoder.setTexture(outputTexture, index: 0)
let w = shaderPipline.threadExecutionWidth
let h = shaderPipline.maxTotalThreadsPerThreadgroup / w
let threadsPerThreadgroup = MTLSizeMake(w, h, 1)
let threadgroupsPerGrid = MTLSize(width: (intermediateTexture.width + w - 1) / w, height: (intermediateTexture.height + h - 1) / h, depth: 1)
commandEncoder.dispatchThreadgroups(threadgroupsPerGrid, threadsPerThreadgroup: threadsPerThreadgroup)
commandEncoder.endEncoding()
commandBuffer.commit()
return outputPixelBuffer
}
No idea what im doing wrong. any ideas?

Resources