Implement CoreML Custom Layer With Two Inputs - metal

I have a tensorflow graph that I want to convert to CoreML, but it uses some operations that are missing, which I will have to implement as Custom Layers.
The two operations I'm focussing on now are Sin and FloorDiv.
Sin was quite straightforward, I could follow this tutorial, and I have a working Swift class and Metal kernel that does the job, which I tested with a toy coreml file:
import Foundation
import CoreML
import Accelerate
#objc(Sin) class Sin: NSObject, MLCustomLayer {
let sinPipeline: MTLComputePipelineState
required init(parameters: [String : Any]) throws {
print(#function, parameters)
let sinFunction = GPUDispatch.sharedInstance.library.makeFunction(name: "sin")!
sinPipeline = try! GPUDispatch.sharedInstance.device.makeComputePipelineState(
function: sinFunction)
super.init()
}
func setWeightData(_ weights: [Data]) throws {
print(#function, weights)
}
func outputShapes(forInputShapes inputShapes: [[NSNumber]]) throws
-> [[NSNumber]] {
print(#function, inputShapes)
return inputShapes
}
func evaluate(inputs: [MLMultiArray], outputs: [MLMultiArray]) throws {
for i in 0..<inputs.count {
let input = inputs[i]
let output = outputs[i]
var count = Int32(input.count)
let iptr = UnsafeMutablePointer<Float>(OpaquePointer(input.dataPointer))
let optr = UnsafeMutablePointer<Float>(OpaquePointer(output.dataPointer))
vvsinf(optr, iptr, &count)
}
}
func encode(commandBuffer: MTLCommandBuffer,
inputs: [MTLTexture], outputs: [MTLTexture]) throws {
if let encoder = commandBuffer.makeComputeCommandEncoder() {
for i in 0..<inputs.count {
encoder.setTexture(inputs[i], index: 0)
encoder.setTexture(outputs[i], index: 1)
encoder.dispatch(pipeline: sinPipeline, texture: inputs[i])
encoder.endEncoding()
}
}
}
}
and in Sin.metal:
kernel void sin(
texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
ushort3 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const float4 x = float4(inTexture.read(gid.xy, gid.z));
const float4 y = sin(x);
outTexture.write(half4(y), gid.xy, gid.z);
}
What I don't understand is how this would work if the custom layer has two inputs, such as I would require for FloorDiv, which returns floor(x / y).
How would I adapt the Sin class I provided to produce something like sin(x*y), even if it's just on the CPU? Are there any other good tutorials for this sort of thing?

The pattern is different than I expected, but is quite obvious now I've played with the code some more.
This is a class that implements FloorDiv:
import Foundation
import CoreML
import Accelerate
#objc(FloorDiv) class FloorDiv: NSObject, MLCustomLayer {
let floorDivPipeline: MTLComputePipelineState
required init(parameters: [String : Any]) throws {
print(#function, parameters)
let floorDivFunction = GPUDispatch.sharedInstance.library.makeFunction(name: "floordiv")!
floorDivPipeline = try! GPUDispatch.sharedInstance.device.makeComputePipelineState(
function: floorDivFunction)
super.init()
}
func setWeightData(_ weights: [Data]) throws {
print(#function, weights)
}
func outputShapes(forInputShapes inputShapes: [[NSNumber]]) throws
-> [[NSNumber]] {
print(#function, inputShapes)
return inputShapes
}
func evaluate(inputs: [MLMultiArray], outputs: [MLMultiArray]) throws {
let numerator = inputs[0]
let denominator = inputs[1]
var output = outputs[0]
assert(numerator.count == denominator.count)
var count = Int32(numerator.count)
let numerator_ptr = UnsafeMutablePointer<Float>(OpaquePointer(numerator.dataPointer))
let denominator_ptr = UnsafeMutablePointer<Float>(OpaquePointer(denominator.dataPointer))
let output_ptr = UnsafeMutablePointer<Float>(OpaquePointer(output.dataPointer))
vvdivf(output_ptr, numerator_ptr, denominator_ptr, &count)
vvfloorf(output_ptr, output_ptr, &count)
}
func encode(commandBuffer: MTLCommandBuffer,
inputs: [MTLTexture], outputs: [MTLTexture]) throws {
if let encoder = commandBuffer.makeComputeCommandEncoder() {
encoder.setTexture(inputs[0], index: 0)
encoder.setTexture(inputs[1], index: 1)
encoder.setTexture(outputs[0], index: 2)
encoder.dispatch(pipeline: floorDivPipeline, texture: inputs[0])
encoder.endEncoding()
}
}
}
And here is the Metal kernel:
#include <metal_stdlib>
using namespace metal;
kernel void floordiv(
texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::read> inTexture2 [[texture(1)]],
texture2d_array<half, access::write> outTexture [[texture(2)]],
ushort3 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const float4 x = float4(inTexture.read(gid.xy, gid.z));
const float4 x2 = float4(inTexture2.read(gid.xy, gid.z));
const float4 y = floor(x / x2);
outTexture.write(half4(y), gid.xy, gid.z);
}

Related

Rotating Metal texture 180 degrees

I added a sample code if someone want's to try fixing it: https://www.dropbox.com/s/6t6neo40qjganra/To%20be%20fixed.zip?dl=1
Im making an AR app using Vuforia SDK. Their sample code contains video background rendering using metal. It works fine for vertical orientation, but I need to change it to portrait. The problem is, that after changing it, the video is rendered upside down. Detected targets are in correct orientation, so I think this should be fixed in Metal rendering class. Could someone help me do that? Below is the code Im using to draw that background. How can I rotate it 180 degrees?
private var mMetalDevice: MTLDevice
private var mVideoBackgroundPipelineState: MTLRenderPipelineState!
private var mUniformColorShaderPipelineState: MTLRenderPipelineState!
private var mTexturedVertexShaderPipelineState: MTLRenderPipelineState!
private var mDefaultSamplerState: MTLSamplerState?
private var mVideoBackgroundVertices: MTLBuffer!
private var mVideoBackgroundIndices: MTLBuffer!
private var mVideoBackgroundTextureCoordinates: MTLBuffer!
/// Initialize the renderer ready for use
init(metalDevice: MTLDevice, layer: CAMetalLayer, library: MTLLibrary?, textureDepth: MTLTexture) {
mMetalDevice = metalDevice
let stateDescriptor = MTLRenderPipelineDescriptor()
//
// Video background
//
stateDescriptor.vertexFunction = library?.makeFunction(name: "texturedVertex")
stateDescriptor.fragmentFunction = library?.makeFunction(name: "texturedFragment")
stateDescriptor.colorAttachments[0].pixelFormat = layer.pixelFormat
stateDescriptor.depthAttachmentPixelFormat = textureDepth.pixelFormat
// And create the pipeline state with the descriptor
do {
try mVideoBackgroundPipelineState = metalDevice.makeRenderPipelineState(descriptor: stateDescriptor)
} catch {
print("Failed to create video background render pipeline state:",error)
}
//
// Augmentations
//
// Create pipeline for transparent object overlays
stateDescriptor.vertexFunction = library?.makeFunction(name: "uniformColorVertex")
stateDescriptor.fragmentFunction = library?.makeFunction(name: "uniformColorFragment")
stateDescriptor.colorAttachments[0].pixelFormat = layer.pixelFormat
stateDescriptor.colorAttachments[0].isBlendingEnabled = true
stateDescriptor.colorAttachments[0].rgbBlendOperation = .add
stateDescriptor.colorAttachments[0].alphaBlendOperation = .add
stateDescriptor.colorAttachments[0].sourceRGBBlendFactor = .sourceAlpha
stateDescriptor.colorAttachments[0].sourceAlphaBlendFactor = .sourceAlpha
stateDescriptor.colorAttachments[0].destinationRGBBlendFactor = .oneMinusSourceAlpha
stateDescriptor.colorAttachments[0].destinationAlphaBlendFactor = .oneMinusSourceAlpha
stateDescriptor.depthAttachmentPixelFormat = textureDepth.pixelFormat
do {
try mUniformColorShaderPipelineState = metalDevice.makeRenderPipelineState(descriptor: stateDescriptor)
} catch {
print("Failed to create augmentation render pipeline state:",error)
return
}
stateDescriptor.vertexFunction = library?.makeFunction(name: "texturedVertex")
stateDescriptor.fragmentFunction = library?.makeFunction(name: "texturedFragment")
// Create pipeline for rendering textures
do {
try mTexturedVertexShaderPipelineState = metalDevice.makeRenderPipelineState(descriptor: stateDescriptor)
} catch {
print("Failed to create guide view render pipeline state:", error)
return
}
mDefaultSamplerState = MetalRenderer.defaultSampler(device: metalDevice)
// Allocate space for rendering data for Video background
mVideoBackgroundVertices = mMetalDevice.makeBuffer(length: MemoryLayout<Float>.size * 3 * 4, options: [.optionCPUCacheModeWriteCombined])
mVideoBackgroundTextureCoordinates = mMetalDevice.makeBuffer(length: MemoryLayout<Float>.size * 2 * 4, options: [.optionCPUCacheModeWriteCombined])
mVideoBackgroundIndices = mMetalDevice.makeBuffer(length: MemoryLayout<UInt16>.size * 6, options: [.optionCPUCacheModeWriteCombined])
}
/// Render the video background
func renderVideoBackground(encoder: MTLRenderCommandEncoder?, projectionMatrix: MTLBuffer, mesh: VuforiaMesh) {
// Copy mesh data into metal buffers
mVideoBackgroundVertices.contents().copyMemory(from: mesh.vertices, byteCount: MemoryLayout<Float>.size * Int(mesh.numVertices) * 3)
mVideoBackgroundTextureCoordinates.contents().copyMemory(from: mesh.textureCoordinates, byteCount: MemoryLayout<Float>.size * Int(mesh.numVertices) * 2)
mVideoBackgroundIndices.contents().copyMemory(from: mesh.indices, byteCount: MemoryLayout<CShort>.size * Int(mesh.numIndices))
// Set the render pipeline state
encoder?.setRenderPipelineState(mVideoBackgroundPipelineState)
// Set the vertex buffer
encoder?.setVertexBuffer(mVideoBackgroundVertices, offset: 0, index: 0)
// Set the projection matrix
encoder?.setVertexBuffer(projectionMatrix, offset: 0, index: 1)
// Set the texture coordinate buffer
encoder?.setVertexBuffer(mVideoBackgroundTextureCoordinates, offset: 0, index: 2)
encoder?.setFragmentSamplerState(mDefaultSamplerState, index: 0)
// Draw the geometry
encoder?.drawIndexedPrimitives(
type: .triangle,
indexCount: 6,
indexType: .uint16,
indexBuffer: mVideoBackgroundIndices,
indexBufferOffset: 0
)
}
}
extension MetalRenderer {
class func defaultSampler(device: MTLDevice) -> MTLSamplerState? {
let sampler = MTLSamplerDescriptor()
sampler.minFilter = .linear
sampler.magFilter = .linear
sampler.mipFilter = .linear
sampler.maxAnisotropy = 1
sampler.sAddressMode = .clampToEdge
sampler.tAddressMode = .clampToEdge
sampler.rAddressMode = .clampToEdge
sampler.normalizedCoordinates = true
sampler.lodMinClamp = 0
sampler.lodMaxClamp = .greatestFiniteMagnitude
return device.makeSamplerState(descriptor: sampler)
}
}
Adding code from the view that creates renderer:
import UIKit
import MetalKit
protocol VuforiaViewDelegate: AnyObject {
func renderFrame(vuforiaView: VuforiaView)
}
class VuforiaView: UIView {
weak var delegate: VuforiaViewDelegate?
var mVuforiaStarted = false
private var mConfigurationChanged = true
private var mRenderer: MetalRenderer!
private var mMetalDevice: MTLDevice!
private var mMetalCommandQueue: MTLCommandQueue!
private var mCommandExecutingSemaphore: DispatchSemaphore!
private var mDepthStencilState: MTLDepthStencilState!
private var mDepthTexture: MTLTexture!
private var mVideoBackgroundProjectionBuffer: MTLBuffer!
private lazy var metalLayer = layer as! CAMetalLayer
override class var layerClass: AnyClass { CAMetalLayer.self }
// Transformations and variables - constantly updated by vuforia frame updates
private var viewport = MTLViewport()
private var trackableProjection = matrix_float4x4()
private var trackableModelView = matrix_float4x4()
private var trackableScaledModelView = matrix_float4x4()
private(set) var worldOriginProjectionMatrix = matrix_float4x4()
private(set) var worldOriginModelViewMatrix = matrix_float4x4()
private(set) var targetPose = matrix_float4x4()
private(set) var targetSize = simd_float3()
override init(frame: CGRect) {
super.init(frame: frame)
setup()
}
required init?(coder: NSCoder) {
super.init(coder: coder)
setup()
}
private func setup() {
contentScaleFactor = UIScreen.main.nativeScale
// Get the system default metal device
mMetalDevice = MTLCreateSystemDefaultDevice()
// Metal command queue
mMetalCommandQueue = mMetalDevice.makeCommandQueue()
// Create a dispatch semaphore, used to synchronise command execution
mCommandExecutingSemaphore = DispatchSemaphore(value: 1)
// Create a CAMetalLayer and set its frame to match that of the view
let layer = self.layer as! CAMetalLayer
layer.device = mMetalDevice
layer.pixelFormat = .bgra8Unorm
layer.framebufferOnly = true
layer.contentsScale = contentScaleFactor
// Get the default library from the bundle (Metal shaders)
let library = mMetalDevice.makeDefaultLibrary()
// Create a depth texture that is needed when rendering the augmentation.
let screenSize = UIScreen.main.bounds.size
let depthTextureDescriptor = MTLTextureDescriptor.texture2DDescriptor(
pixelFormat: .depth32Float,
width: Int(screenSize.width * contentScaleFactor),
height: Int(screenSize.height * contentScaleFactor),
mipmapped: false
)
depthTextureDescriptor.usage = .renderTarget
mDepthTexture = mMetalDevice.makeTexture(descriptor: depthTextureDescriptor)
// Video background projection matrix buffer
mVideoBackgroundProjectionBuffer = mMetalDevice.makeBuffer(length: MemoryLayout<Float>.size * 16, options: [])
// Fragment depth stencil
let depthStencilDescriptor = MTLDepthStencilDescriptor()
depthStencilDescriptor.depthCompareFunction = .less
depthStencilDescriptor.isDepthWriteEnabled = true
mDepthStencilState = mMetalDevice.makeDepthStencilState(descriptor: depthStencilDescriptor)
mRenderer = MetalRenderer(
metalDevice: mMetalDevice,
layer: layer,
library: library,
textureDepth: mDepthTexture
)
}
private func configureVuforia() {
let orientationValue: Int32 = {
let orientation = UIApplication.shared.windows.first(where: { $0.isKeyWindow })?.windowScene?.interfaceOrientation ?? .portrait
switch orientation {
case .portrait: return 0
case .portraitUpsideDown: return 1
case .landscapeLeft: return 2
case .landscapeRight: return 3
case .unknown: return 4
#unknown default: return 4
}
}()
let screenSize = UIScreen.main.bounds.size
configureRendering(
Int32(screenSize.width * contentScaleFactor),
Int32(screenSize.height * contentScaleFactor),
orientationValue
)
}
#objc private func renderFrameVuforia() {
objc_sync_enter(self)
if mVuforiaStarted {
if mConfigurationChanged {
mConfigurationChanged = false
configureVuforia()
}
renderFrameVuforiaInternal()
delegate?.renderFrame(vuforiaView: self)
}
objc_sync_exit(self)
}
private func renderFrameVuforiaInternal() {
//Check if Camera is Started
guard isCameraStarted() else { return }
// ========== Set up ==========
var viewportsValue = Array(arrayLiteral: 0.0, 0.0, Double(metalLayer.drawableSize.width), Double(metalLayer.drawableSize.height), 0.0, 1.0)
// --- Command buffer ---
// Get the command buffer from the command queue
let commandBuffer = mMetalCommandQueue.makeCommandBuffer()
// Get the next drawable from the CAMetalLayer
let drawable = metalLayer.nextDrawable()
// It's possible for nextDrawable to return nil, which means a call to
// renderCommandEncoderWithDescriptor will fail
guard drawable != nil else { return }
// Wait for exclusive access to the GPU
let _ = mCommandExecutingSemaphore.wait(timeout: .distantFuture)
// -- Render pass descriptor ---
// Set up a render pass decriptor
let renderPassDescriptor = MTLRenderPassDescriptor()
// Draw to the drawable's texture
renderPassDescriptor.colorAttachments[0].texture = drawable?.texture
// Clear the colour attachment in case there is no video frame
renderPassDescriptor.colorAttachments[0].loadAction = .clear
// Store the data in the texture when rendering is complete
renderPassDescriptor.colorAttachments[0].storeAction = .store
// Use textureDepth for depth operations.
renderPassDescriptor.depthAttachment.texture = mDepthTexture
// Get a command encoder to encode into the command buffer
let encoder = commandBuffer?.makeRenderCommandEncoder(descriptor: renderPassDescriptor)
if prepareToRender(
&viewportsValue,
UnsafeMutableRawPointer(Unmanaged.passRetained(mMetalDevice!).toOpaque()),
UnsafeMutableRawPointer(Unmanaged.passRetained(drawable!.texture).toOpaque()),
UnsafeMutableRawPointer(Unmanaged.passRetained(encoder!).toOpaque())
) {
viewport.originX = viewportsValue[0]
viewport.originY = viewportsValue[1]
viewport.width = viewportsValue[2]
viewport.height = viewportsValue[3]
viewport.znear = viewportsValue[4]
viewport.zfar = viewportsValue[5]
encoder?.setViewport(viewport)
// Once the camera is initialized we can get the video background rendering values
getVideoBackgroundProjection(mVideoBackgroundProjectionBuffer.contents())
// Call the renderer to draw the video background
mRenderer.renderVideoBackground(encoder: encoder, projectionMatrix: mVideoBackgroundProjectionBuffer, mesh: getVideoBackgroundMesh())
encoder?.setDepthStencilState(mDepthStencilState)
getOrigin(
&worldOriginProjectionMatrix.columns,
&worldOriginModelViewMatrix.columns
)
getImageTargetResult(
&trackableProjection.columns,
&trackableModelView.columns,
&trackableScaledModelView.columns,
&targetPose.columns,
&targetSize
)
}
// Pass Metal context data to Vuforia Engine (we may have changed the encoder since
// calling Vuforia::Renderer::begin)
finishRender(
UnsafeMutableRawPointer(Unmanaged.passRetained(drawable!.texture).toOpaque()),
UnsafeMutableRawPointer(Unmanaged.passRetained(encoder!).toOpaque())
)
// ========== Finish Metal rendering ==========
encoder?.endEncoding()
// Commit the rendering commands
// Command completed handler
commandBuffer?.addCompletedHandler { _ in self.mCommandExecutingSemaphore.signal() }
// Present the drawable when the command buffer has been executed (Metal
// calls to CoreAnimation to tell it to put the texture on the display when
// the rendering is complete)
commandBuffer?.present(drawable!)
// Commit the command buffer for execution as soon as possible
commandBuffer?.commit()
}
}
Another problem is that in portrait mode something is wrong with aspect ratio, camera background is drawn distorted. But this is for another subject.
Shaders.metal:
/*===============================================================================
Copyright (c) 2020, PTC Inc. All rights reserved.
Vuforia is a trademark of PTC Inc., registered in the United States and other
countries.
===============================================================================*/
#include <metal_stdlib>
using namespace metal;
// === Texture sampling shader ===
struct VertexTextureOut
{
float4 m_Position [[ position ]];
float2 m_TexCoord;
};
vertex VertexTextureOut texturedVertex(constant packed_float3* pPosition [[ buffer(0) ]],
constant float4x4* pMVP [[ buffer(1) ]],
constant float2* pTexCoords [[ buffer(2) ]],
uint vid [[ vertex_id ]])
{
VertexTextureOut out;
float4 in(pPosition[vid], 1.0f);
out.m_Position = *pMVP * in;
out.m_TexCoord = pTexCoords[vid];
return out;
}
fragment half4 texturedFragment(VertexTextureOut inFrag [[ stage_in ]],
texture2d<half> tex2D [[ texture(0) ]],
sampler sampler2D [[ sampler(0) ]])
{
return tex2D.sample(sampler2D, inFrag.m_TexCoord);
}
// === Uniform color shader ===
struct VertexOut
{
float4 m_Position [[ position ]];
};
vertex VertexOut uniformColorVertex(constant packed_float3* pPosition [[ buffer(0) ]],
constant float4x4* pMVP [[ buffer(1) ]],
uint vid [[ vertex_id ]])
{
VertexOut out;
float4 in(pPosition[vid], 1.0f);
out.m_Position = *pMVP * in;
return out;
}
fragment float4 uniformColorFragment(constant float4 &color [[ buffer(0) ]])
{
return color;
}
// === Vertex color shader ===
struct VertexColorOut
{
float4 m_Position [[ position ]];
float4 m_Color;
};
vertex VertexColorOut vertexColorVertex(constant packed_float3* pPosition [[ buffer(0) ]],
constant float4* pColor [[ buffer(1) ]],
constant float4x4* pMVP [[ buffer(2) ]],
uint vid [[ vertex_id ]])
{
VertexColorOut out;
float4 in(pPosition[vid], 1.0f);
out.m_Position = *pMVP * in;
out.m_Color = pColor[vid];
return out;
}
fragment float4 vertexColorFragment(VertexColorOut inFrag [[ stage_in ]])
{
return inFrag.m_Color;
}

Problems (wrong scale, aspect ratio) while rendering MTKTexture to MTKView after applying compute shaders

I am trying to process video frames from the camera using compute metal shaders and display it to the user. The problem is with displaying the modified frames. The output contains stacked copies of the processed frame with some of them clipped and they don't fill the screen completely.
P.S I am new to both iOS and metal
So far, I have identified variables that control this:
1. Number of Thread groups launched
2. MTKView's drawable size
3. sampling id in the metal shader
I have played around with these with no good result.
Below are the code and my output
The function that sets up the MTKView
func initMetalView() {
metalView = MTKView(frame: view.frame, device: metalDevice)
metalView.delegate = self
metalView.framebufferOnly = false
metalView.colorPixelFormat = .bgra8Unorm
metalView.autoResizeDrawable = false
metalView.drawableSize = CGSize(width: 1920, height: 1080)
metalView.layer.transform = CATransform3DMakeRotation(CGFloat(Float.pi),0.0,1.0,0.0)
view.insertSubview(metalView, at: 0)
}
The AVCaptureVideoDataOutputSampleBufferDelegate used to convert CMSampleBuffer to MTLTexture
extension ViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// sample buffer -> image buffer -> CoreVideo metal texture -> MTL texture
guard let cvImageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer)
else { fatalError("can't get image buffer") }
var textureCache: CVMetalTextureCache?
guard CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, metalDevice, nil, &textureCache) == kCVReturnSuccess else { fatalError("cant create texture cache") }
let width = CVPixelBufferGetWidth(cvImageBuffer)
let height = CVPixelBufferGetHeight(cvImageBuffer)
var imageTexture: CVMetalTexture?
let result = CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, textureCache!, cvImageBuffer, nil, MTLPixelFormat.bgra8Unorm, width, height, 0, &imageTexture)
guard let unwrappedImageTexture = imageTexture,
result == kCVReturnSuccess
else { fatalError("failed to create texture from image") }
inputTexture = CVMetalTextureGetTexture(unwrappedImageTexture)
}
}
The MTKViewDelegate used to apply the shader on inputTexture and display the outputTexture to metalView
extension ViewController: MTKViewDelegate {
func mtkView(_ view: MTKView, drawableSizeWillChange size: CGSize) {
}
func draw(in view: MTKView) {
guard let inputTexture = inputTexture,
let commandQueue = commandQueue,
let commandBuffer = commandQueue.makeCommandBuffer(),
let encoder = commandBuffer.makeComputeCommandEncoder(),
let pipelineState = pipelineState
else { return }
encoder.setComputePipelineState(pipelineState)
encoder.setTextures([metalView.currentDrawable!.texture, inputTexture], range: 0..<2)
encoder.dispatchThreadgroups(MTLSizeMake(inputTexture.width/16, inputTexture.height/16, 1), threadsPerThreadgroup: threadsPerBlock)
// inputTexture w:1920, h:1080
encoder.endEncoding()
commandBuffer.present(metalView.currentDrawable!)
commandBuffer.commit()
}
}
The metal compute shader
#include <metal_stdlib>
using namespace metal;
kernel void blacky (texture2d<float, access::write> outTexture [[texture(0)]],
texture2d<float, access::read> inTexture [[texture(1)]],
uint2 id [[thread_position_in_grid]]) {
uint2 flipped_id = uint2(id.y, id.x);
float3 val = inTexture.read(flipped_id).rgb;
float g = (val.r + val.g + val.b)/3.0;
float4 out = float4(g, g, g, 1);
outTexture.write(out.rgba, id);
}
You can see the current output here: https://i.imgur.com/hVDox3U

Shadertoy shader port to Metal slow on iOS

I'm trying to learn Metal shaders so I ported this mountain generation shader I found on Shadertoy to Metal.
https://www.shadertoy.com/view/llsGW7
The port works, but is very slow on iOS. It is reasonably fast on OS X but gets slower when I increase the window size. It is also slow within an OS X playground.
I've done the tutorials on MetalKit.org and read the apple docs on the metal shading language but feel like I'm a bit conceptually lacking of how everything is working under the hood. If there is anything that jumps out in this code that is obviously slowing things down I would be very grateful to learn. I am not sure if the slowdown is due to the shader code itself, or the way it's all set up.
Here's the metal shader:
#include <metal_stdlib>
using namespace metal;
constexpr sampler textureSampler(coord::normalized,
address::repeat,
min_filter::linear,
mag_filter::linear,
mip_filter::linear );
kernel void compute(texture2d<float, access::write> output [[texture(0)]],
texture2d<float, access::sample> input [[texture(1)]],
constant float &timer [[buffer(0)]],
uint2 gid [[thread_position_in_grid]])
{
int width = input.get_width();
int height = input.get_height();
float2 uv = float2(gid) / float2(width, height);
float4 p = float4(uv,1,1)-0.5;
p.y = -p.y;
float4 d = p*0.5;
float4 t;
float4 c;
p.z += timer*200;
d.y-=0.2;
for(float i=1.7;i>0.0;i-=0.002) {
float s=0.5;
t = input.sample(textureSampler,0.3+p.xz*s/3e3) / (s+=s);
// this makes it purple
c = float4(1.0,-0.9,0.8,9.0)+d.x-t*i;
// c = float4(1.0,0.9,0.8,9.0)+d.x-t*i;
if (t.x > p.y*.01+1.3) {
break;
}
p += d;
}
output.write(c, gid);
}
and here is a subclass of MTKView I'm using to render the shader
import Cocoa
import MetalKit
class MetalView: MTKView {
var queue: MTLCommandQueue!
var cps: MTLComputePipelineState!
var timer: Float = 0
var timerBuffer: MTLBuffer!
var shaderName: String!
var texture: MTLTexture!
required public init(coder: NSCoder) {
super.init(coder: coder)
self.framebufferOnly = false
self.preferredFramesPerSecond = 60
registerShaders()
setupTexture()
}
func setupTexture() {
let path = Bundle.main.path(forResource: "texture", ofType: "jpg")
let textureLoader = MTKTextureLoader(device: device!)
texture = try! textureLoader.newTexture(withContentsOf: URL(fileURLWithPath: path!), options: nil)
}
func registerShaders() {
device = MTLCreateSystemDefaultDevice()!
queue = device!.makeCommandQueue()
do {
let library = device!.newDefaultLibrary()!
let kernel = library.makeFunction(name: "compute")!
cps = try device!.makeComputePipelineState(function: kernel)
} catch let e {
Swift.print("\(e)")
}
timerBuffer = device!.makeBuffer(length: MemoryLayout<Float>.size, options: [])
}
override public func draw(_ dirtyRect: CGRect) {
if let drawable = currentDrawable {
let commandBuffer = queue.makeCommandBuffer()
let commandEncoder = commandBuffer.makeComputeCommandEncoder()
commandEncoder.setComputePipelineState(cps)
commandEncoder.setTexture(drawable.texture, at: 0)
commandEncoder.setTexture(texture, at: 1)
commandEncoder.setBuffer(timerBuffer, offset: 0, at: 0)
update()
let threadGroupCount = MTLSizeMake(8, 8, 1)
let threadGroups = MTLSizeMake(drawable.texture.width / threadGroupCount.width, drawable.texture.height / threadGroupCount.height, 1)
commandEncoder.dispatchThreadgroups(threadGroups, threadsPerThreadgroup: threadGroupCount)
commandEncoder.endEncoding()
commandBuffer.present(drawable)
commandBuffer.commit()
}
}
func update() {
timer += Float(1.0 / TimeInterval(self.preferredFramesPerSecond))
let bufferPointer = timerBuffer.contents()
memcpy(bufferPointer, &timer, MemoryLayout<Float>.size)
}
}

iOS: CIColorKernel applyWithExtent warning message

I'm trying to make original CIFilter with CIKernel.
code is here.
var kernel: CIKernel?
var inputImage: CIImage?
override init() {
super.init()
self.kernel = createKernel()
}
required init(coder aDecoder: NSCoder) {
super.init(coder: aDecoder)!
self.kernel = createKernel()
}
func outputCustomImage() -> CIImage? {
if let inputImage = self.inputImage {
let dod = inputImage.extent.insetBy(dx: -1, dy: -1)
let args = [inputImage as AnyObject]
let callback: CIKernelROICallback = {
(index, rect) in
return rect.insetBy(dx: -1, dy: -1)
}
return kernel!.applyWithExtent(dod, roiCallback: callback, arguments: args)
}
return nil
}
private func createKernel() -> CIKernel {
let kernelString =
"kernel vec4 RGB_to_GBR(sampler source_image)\n" +
"{\n" +
"vec4 originalColor, twistedColor;\n" +
"originalColor = sample(source_image, samplerCoord(source_image));\n" +
"twistedColor.r = originalColor.g;\n" +
"twistedColor.g = originalColor.b;\n" +
"twistedColor.b = originalColor.r ;\n" +
"twistedColor.a = originalColor.a;\n" +
"return twistedColor;\n" +
"}\n"
return CIKernel(string: kernelString)!
}
and warning message is
Note: CIColorKernel applyWithExtent:roiCallback:arguments: ignores callback and is not recomended. Use applyWithExtent:arguments: instead.
I think maybe the source code you supplied is different to the code generating the warning. For the filter you're writing, you only need a CIColorKernel which would generate that warning if you're supplying an ROI callback.
A general kernel (CIKernel) is useful if you need to access other pixels - for example if you were writing a blur filter. Since you are only interested in the current pixel, stick with a color kernel and use apply(withExtent:arguments:).
Generally, if you are subclassing CIFilter, you override outputImage. Take a look at this version (I've also simplified your kernel code):
class RGB_to_GBR: CIFilter {
let kernel: CIColorKernel = {
let kernelString =
"kernel vec4 RGB_to_GBR(__sample pixel)\n" +
"{\n" +
"vec4 twistedColor = pixel.gbra;\n" +
"return twistedColor;\n" +
"}\n"
return CIColorKernel(string: kernelString)!
}()
var inputImage: CIImage?
override var outputImage: CIImage? {
guard let inputImage = inputImage else {
return nil
}
return kernel.apply(withExtent: inputImage.extent,
arguments: [inputImage])
}
}

How can i return the values of a void function

I have a function that calculate the fft of the mic input.
The target is to create a framework when i call the run function the i ged the float array with all bands.
Now it work all fine, but i don't know how can i return the Array in the run function from the gotSomeAudio function.
Thank you very much for help
#objc
public class FFT:NSObject{
var audioInput: TempiAudioInput!
#objc
public func run() -> Array<Float>{
let audioInputCallback: TempiAudioInputCallback = { (numberOfFrames, timeStamp, inout samples: [Float]) -> Void in
self.gotSomeAudio(numberOfFrames, timeStamp: timeStamp, samples: samples)
}
audioInput = TempiAudioInput(audioInputCallback: audioInputCallback, sampleRate: 44100, numberOfChannels: 1)
audioInput.startRecording()
// how can i return the Array from the gotSomeAudio function?
return xyz
}
#objc
public func gotSomeAudio(numberOfFrames: Int, timeStamp: Double, samples: [Float]) -> Array<Float> {
let fft = TempiFFT(withSize: numberOfFrames, sampleRate: 44100)
// Setting a window type reduces errors
fft.windowType = TempiFFTWindowType.hanning
// Perform the FFT
fft.fftForward(samples)
// Map FFT data to logical bands. This gives 4 bands per octave across 7 octaves = 28 bands.
//fft.calculateLogarithmicBands(minFrequency: 100, maxFrequency: 11025, bandsPerOctave: 4)
//fft.calculateLinearBands(minFrequency: 0, maxFrequency: fft.nyquistFrequency, numberOfBands: Int(screenWidth))
fft.calculateLogarithmicBands(minFrequency: 100, maxFrequency: 11025, bandsPerOctave: 4)
// Process some data
return fft.bandFrequencies
}
}
You can pass it to a callback function:
public func run(complete: Array<Float> -> Void) {
let audioInputCallback: TempiAudioInputCallback = { (numberOfFrames, timeStamp, inout samples: [Float]) -> Void in
complete(self.gotSomeAudio(numberOfFrames, timeStamp: timeStamp, samples: samples))
}
...
myInstance.run() { floatArray in
// Use floatArray here.
}

Resources