I've been trying to debug (for days now) this code to produce a video from CGImages. The CGImages actually get created from CGBitMapContext's I draw into in the app code. I've simplified here to simply draw a diagonal yellow line and draw a number of frames of that static image. However, here is a frame (every frame is the same) fo the distorted video I find at the written to path.
import Foundation
import CoreGraphics
import CoreMedia
import QuartzCore
import AVFoundation
func exportVideo(
width: Int = 500,
height: Int = 500,
numberOfFrames: Int = 100
) {
let vidURL = NSURL.fileURL(withPath: "/Users/me/Desktop/testVideo.mp4")
try? FileManager.default.removeItem(at: vidURL)
let settings: [String: Any] = [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: width,
AVVideoHeightKey: height
]
let assetWriter = try! AVAssetWriter(url: vidURL, fileType: .m4v)
let writerInput = AVAssetWriterInput(mediaType: AVMediaType.video, outputSettings: settings)
assetWriter.add(writerInput)
let queue = DispatchQueue.global(qos: .background)
writerInput.expectsMediaDataInRealTime = false
let inputAdaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: writerInput, sourcePixelBufferAttributes: nil)
assetWriter.startWriting()
assetWriter.startSession(atSourceTime: CMTime.zero)
writerInput.requestMediaDataWhenReady(on: queue) {
for i in 0..<numberOfFrames where writerInput.isReadyForMoreMediaData {
guard let buffer = newPixelBufferFrom(width: width, height: height) else {
fatalError()
}
inputAdaptor.append(
buffer,
withPresentationTime: CMTime(seconds: Double(i), preferredTimescale: CMTimeScale(10))
)
}
writerInput.markAsFinished()
assetWriter.finishWriting { }
}
}
private func newPixelBufferFrom(
width: Int,
height: Int
) -> CVPixelBuffer? {
let options:[String: Any] = [
kCVPixelBufferCGImageCompatibilityKey as String: true,
kCVPixelBufferCGBitmapContextCompatibilityKey as String: true
]
var pxbuffer: CVPixelBuffer?
let status = CVPixelBufferCreate(kCFAllocatorDefault,
width,
height,
kCVPixelFormatType_32ARGB,
options as CFDictionary?,
&pxbuffer)
assert(status == kCVReturnSuccess && pxbuffer != nil, "newPixelBuffer failed")
CVPixelBufferLockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0))
let pxdata = CVPixelBufferGetBaseAddress(pxbuffer!)
let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
guard let context = CGContext(
data: pxdata,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: 4 * width,
space: rgbColorSpace,
bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue
) else {
fatalError()
}
context.setStrokeColor(UIColor.yellow.cgColor)
context.setLineWidth(5)
context.move(to: .init(x: 0, y: 0))
context.addLine(to: .init(x: width, y: height))
context.strokePath()
CVPixelBufferUnlockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0))
return pxbuffer
}
so i stumbled across "the answer" to my problem here.
It turns out that the width and height needs to be a multiple of 4. I'll only say that I hope this post helps a future poor soul as the error codes, warnings, API members, and docs totally failed to help me.
Related
The following code successfully converts an array of images into a video. However, the fps parameter has no effect of the framerate so the framerate is alway 1.
The AVVideoMaxKeyFrameIntervalKey and AVVideoMaxKeyFrameIntervalDurationKey settings have no effect on the output.
How do I set a framerate of 30 fps?
func createVideo(images: [UIImage], durationPerImage:Double = 1, fps:Double = 30){
let filePath = URL(fileURLWithPath: NSTemporaryDirectory() + "video.mp4")
let size = images[0].size
let fileManager = FileManager.default
if fileManager.fileExists(atPath: filePath.path) {
try? fileManager.removeItem(at: filePath)
}
let bitRate = size.width * size.height * fps * 4;
let videoWriter = try? AVAssetWriter(outputURL: filePath, fileType: AVFileType.mp4)
let videoSettings: [String : Any] = [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: size.width,
AVVideoHeightKey: size.height,
AVVideoCompressionPropertiesKey: [
AVVideoAverageBitRateKey: bitRate,
AVVideoMaxKeyFrameIntervalKey: fps,
AVVideoMaxKeyFrameIntervalDurationKey: (1/fps)
],
]
let bufferAttributes: [String : Any] = [
kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_32ARGB),
kCVPixelBufferWidthKey as String: size.width,
kCVPixelBufferHeightKey as String: size.height
]
let videoWriterInput = AVAssetWriterInput(mediaType: AVMediaType.video, outputSettings: videoSettings)
videoWriterInput.expectsMediaDataInRealTime = true
videoWriter?.add(videoWriterInput)
let pixelBufferAdaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: videoWriterInput, sourcePixelBufferAttributes: bufferAttributes)
videoWriter?.startWriting()
videoWriter?.startSession(atSourceTime: CMTime.zero)
var time = CMTime.zero
for image in images {
if videoWriter?.status == .unknown {
videoWriter?.startWriting()
}
if videoWriter?.status == .failed {
print("Error: \(String(describing: videoWriter?.error))")
return
}
var pixelBuffer: CVPixelBuffer?
let status = CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pixelBufferAdaptor.pixelBufferPool!, &pixelBuffer)
if status != kCVReturnSuccess { return }
CVPixelBufferLockBaseAddress(pixelBuffer!, [])
let data = CVPixelBufferGetBaseAddress(pixelBuffer!)
let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
let context = CGContext(data: data, width: Int(size.width), height: Int(size.height), bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer!), space: rgbColorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue)
context?.translateBy(x: 0, y: size.height)
context?.scaleBy(x: 1.0, y: -1.0)
UIGraphicsPushContext(context!)
image.draw(in: CGRect(x: 0, y: 0, width:size.width, height: size.height))
UIGraphicsPopContext()
CVPixelBufferUnlockBaseAddress(pixelBuffer!, [])
if !pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: time) { return }
time = CMTimeAdd(time, CMTimeMakeWithSeconds(durationPerImage, preferredTimescale: Int32(fps)))
}
videoWriterInput.markAsFinished()
videoWriter?.finishWriting {
let tracks = AVAsset(url: filePath).tracks(withMediaType: .video)
print("FPS", tracks.first!.nominalFrameRate, tracks.first!.naturalTimeScale)
}
}
Right now i am developing one module in that module i need to create video from array CGImage and while doing that processing my application get crashed at some point , i am not able to figure out exact reason behind that crash.
can anyone please suggest me i am going in right direction or not , should i convert [CGImage] to video or do i need to choose another approach.
i also tried to convert CGImage to UIImage and tried to create video but still facing same issue.
i am getting image data in [UInt8] data so what would be the correct approach converting the image formate and create video ?
In order to create video from [CGImage] following below approach.
I am converting [UInt8] data to CGImage using CGDataProvider and convert CGImage to UIImage. I have array of image and collect UIImage and then merge images and create video.
Here my code to convert CGImage from data.
private(set) var data: [UInt8]
var cgImage: CGImage? {
let colorSpaceRef = CGColorSpaceCreateDeviceRGB()
let bitsPerComponent = 8
let bitsPerPixel = channels * bitsPerComponent
let bytesPerRow = channels * width
let totalBytes = height * bytesPerRow
let bitmapInfo = CGBitmapInfo(rawValue: channels == 3 ? CGImageAlphaInfo.none.rawValue : CGImageAlphaInfo.last.rawValue)
let provider = CGDataProvider( dataInfo: nil,
data: data,
size: totalBytes,
releaseData: {_, _, _ in })!
return CGImage(width: width,
height: height,
bitsPerComponent: bitsPerComponent,
bitsPerPixel: bitsPerPixel,
bytesPerRow: bytesPerRow,
space: colorSpaceRef,
bitmapInfo: bitmapInfo,
provider: provider,
decode: nil,
shouldInterpolate: false,
intent: CGColorRenderingIntent.perceptual)
}
My app is getting crash here in this function, when i start frequent image drawing to context
(context!.draw(cgImage, in: CGRect(x: 0, y: 0, width: frameWidth,
height: frameHeight)))
If i use number of images from bundle and create video using this code its working fine. When i use created CGImage from [UInt8] data, it started getting crash after writing 3-4 images.
func newPixelBufferFrom(cgImage:CGImage) -> CVPixelBuffer?{
autoreleasepool {
let options:[String: Any] = [kCVPixelBufferCGImageCompatibilityKey as String: true, kCVPixelBufferCGBitmapContextCompatibilityKey as String: true]
var pxbuffer:CVPixelBuffer?
let frameWidth = self.videoSettings[AVVideoWidthKey] as! Int
let frameHeight = self.videoSettings[AVVideoHeightKey] as! Int
let status = CVPixelBufferCreate(kCFAllocatorDefault, frameWidth, frameHeight, kCVPixelFormatType_32ARGB, options as CFDictionary?, &pxbuffer)
assert(status == kCVReturnSuccess && pxbuffer != nil, "newPixelBuffer failed")
CVPixelBufferLockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0))
let pxdata = CVPixelBufferGetBaseAddress(pxbuffer!)
let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
let context = CGContext(data: pxdata, width: frameWidth, height: frameHeight, bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pxbuffer!), space: rgbColorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue)
assert(context != nil, "context is nil")
context!.concatenate(CGAffineTransform.identity)
context!.draw(cgImage
, in: CGRect(x: 0, y: 0, width: frameWidth, height: frameHeight))
CVPixelBufferUnlockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0))
return pxbuffer
}
here, i am using below code to create video from array of images.
typealias CXEMovieMakerCompletion = (URL) -> Void
typealias CXEMovieMakerUIImageExtractor = (AnyObject) -> UIImage?
public class CXEImagesToVideo: NSObject{
var assetWriter:AVAssetWriter!
var writeInput:AVAssetWriterInput!
var bufferAdapter:AVAssetWriterInputPixelBufferAdaptor!
var videoSettings:[String : Any]!
var frameTime:CMTime!
var fileURL:URL!
var completionBlock: CXEMovieMakerCompletion?
var movieMakerUIImageExtractor:CXEMovieMakerUIImageExtractor?
public class func videoSettings(codec:String, width:Int, height:Int) -> [String: Any]{
if(Int(width) % 16 != 0){
print("warning: video settings width must be divisible by 16")
}
let videoSettings:[String: Any] = [AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: width,
AVVideoHeightKey: height]
return videoSettings
}
public init(videoSettings: [String: Any],frameTime: CMTime) {
super.init()
self.frameTime = frameTime
let paths = NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)
let tempPath = paths[0] + "/exprotvideo1.mp4"
if(FileManager.default.fileExists(atPath: tempPath)){
guard (try? FileManager.default.removeItem(atPath: tempPath)) != nil else {
print("remove path failed")
return
}
}
self.fileURL = URL(fileURLWithPath: tempPath)
self.assetWriter = try! AVAssetWriter(url: self.fileURL, fileType: AVFileType.mp4)
self.videoSettings = videoSettings
self.writeInput = AVAssetWriterInput(mediaType: AVMediaType.video, outputSettings: videoSettings)
assert(self.assetWriter.canAdd(self.writeInput), "add failed")
self.assetWriter.add(self.writeInput)
let bufferAttributes:[String: Any] = [kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_32ARGB)]
self.bufferAdapter = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: self.writeInput, sourcePixelBufferAttributes: bufferAttributes)
self.frameTime = CMTimeMake(value: 1, timescale: 10)
}
func createMovieFrom(urls: [URL], withCompletion: #escaping CXEMovieMakerCompletion){
self.createMovieFromSource(images: urls as [AnyObject], extractor:{(inputObject:AnyObject) ->UIImage? in
return UIImage(data: try! Data(contentsOf: inputObject as! URL))}, withCompletion: withCompletion)
}
func createMovieFrom(images: [UIImage], withCompletion: #escaping CXEMovieMakerCompletion){
DispatchQueue.main.async {
self.createMovieFromSource(images: images, extractor: {(inputObject:AnyObject) -> UIImage? in
return inputObject as? UIImage}, withCompletion: withCompletion)
}
}
func imageFromLayer(layer:CALayer) -> UIImage {
UIGraphicsBeginImageContextWithOptions(layer.frame.size, layer.isOpaque, 0)
layer.render(in: UIGraphicsGetCurrentContext()!)
let outputImage = UIGraphicsGetImageFromCurrentImageContext()
UIGraphicsEndImageContext()
return outputImage!
}
func createMovieFromSource(images: [AnyObject], extractor: #escaping CXEMovieMakerUIImageExtractor, withCompletion: #escaping CXEMovieMakerCompletion){
self.completionBlock = withCompletion
self.assetWriter.startWriting()
self.assetWriter.startSession(atSourceTime: CMTime.zero)
let mediaInputQueue = DispatchQueue.init(label: "Main") // DispatchQueue(label: "mediaInputQueue")
var i = 0
let frameNumber = images.count
self.writeInput.requestMediaDataWhenReady(on: mediaInputQueue){
while(true){
if(i >= frameNumber){
break
}
if (self.writeInput.isReadyForMoreMediaData){
var sampleBuffer:CVPixelBuffer?
autoreleasepool{
let temp = images[i]
let img = extractor(temp)
if img == nil{
i += 1
print("Warning: counld not extract one of the frames")
//continue
}
sampleBuffer = self.newPixelBufferFrom(cgImage: temp.cgImage!)
}
if (sampleBuffer != nil){
if(i == 0){
self.bufferAdapter.append(sampleBuffer!, withPresentationTime: CMTime.zero)
}else{
let value = i - 1
let lastTime = CMTimeMake(value: Int64(value), timescale: self.frameTime.timescale)
let presentTime = CMTimeAdd(lastTime, self.frameTime)
self.bufferAdapter.append(sampleBuffer!, withPresentationTime: presentTime)
}
i = i + 1
}
}
}
self.writeInput.markAsFinished()
self.assetWriter.finishWriting {
DispatchQueue.main.sync {
self.completionBlock!(self.fileURL)
}
}
}
}
func newPixelBufferFrom(cgImage:CGImage) -> CVPixelBuffer?{
autoreleasepool {
let options:[String: Any] = [kCVPixelBufferCGImageCompatibilityKey as String: true, kCVPixelBufferCGBitmapContextCompatibilityKey as String: true]
var pxbuffer:CVPixelBuffer?
let frameWidth = self.videoSettings[AVVideoWidthKey] as! Int
let frameHeight = self.videoSettings[AVVideoHeightKey] as! Int
let status = CVPixelBufferCreate(kCFAllocatorDefault, frameWidth, frameHeight, kCVPixelFormatType_32ARGB, options as CFDictionary?, &pxbuffer)
assert(status == kCVReturnSuccess && pxbuffer != nil, "newPixelBuffer failed")
CVPixelBufferLockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0))
let pxdata = CVPixelBufferGetBaseAddress(pxbuffer!)
let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
let context = CGContext(data: pxdata, width: frameWidth, height: frameHeight, bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pxbuffer!), space: rgbColorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue)
// CGImageAlphaInfo.noneSkipFirst.rawValue
assert(context != nil, "context is nil")
// context?.clear(CGRect(x: 0, y: 0, width: frameWidth, height: frameHeight))
context!.concatenate(CGAffineTransform.identity)
context!.draw(cgImage
, in: CGRect(x: 0, y: 0, width: frameWidth, height: frameHeight))
CVPixelBufferUnlockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0))
return pxbuffer
}
}
}
I want to generate a mpeg video from a few images (as frames).
After I finish writing the video file, I tried saving it to Photos, but iOS consider it incompatible:
let compatible = AVAsset(url: video_url).isCompatibleWithSavedPhotosAlbum
print("COMPATIBILITY", compatible) // false
And then I tried creating an AVPlayer to play the video, and the video fails to play. So the video file must be corrupt somehow.
I reviewed my code closely, but couldn't spot the problem. Please help.
Here is my code:
class VideoWriter {
var url:URL?
var assetWriter:AVAssetWriter?
init(url:URL) {
self.url = url
do {
try self.assetWriter = AVAssetWriter(url: self.url!, fileType: AVFileTypeMPEG4)
} catch {
print("Fail to create assetWriter")
}
}
func writeFrames(frames:[UIImage], finishedHandler:#escaping ()->Void) {
let settings:[String:Any] = [
AVVideoCodecKey: AVVideoCodecH264,
AVVideoWidthKey: 480, //CANVAS_SIZE * 4 / 3,
AVVideoHeightKey: 360 //CANVAS_SIZE
]
let assetWriterInput = AVAssetWriterInput(mediaType: AVMediaTypeVideo, outputSettings: settings)
self.assetWriter?.add(assetWriterInput)
let bufferAttributes:[String: Any] = [kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_32ARGB)]
let bufferAdapter = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: assetWriterInput, sourcePixelBufferAttributes: bufferAttributes)
let frameTime = CMTimeMake(1, 30)
self.assetWriter?.startWriting()
self.assetWriter?.startSession(atSourceTime: kCMTimeZero)
// write the frames here
let mediaInputQueue = DispatchQueue(label: "mediaInputQueue")
var i = 0
let frameNumber = frames.count
assetWriterInput.requestMediaDataWhenReady(on: mediaInputQueue){
while(true){
if(i >= frameNumber){
break
}
if (assetWriterInput.isReadyForMoreMediaData){
let image = frames[i]
print("writing frame ", i)
let pixelBuffer = self.newPixelBufferFrom(cgImage: image.cgImage!)
var time:CMTime
if i == 0 {
time = kCMTimeZero
} else {
let value = i - 1
let lastTime = CMTimeMake(Int64(value), frameTime.timescale)
time = CMTimeAdd(lastTime, frameTime)
}
bufferAdapter.append(pixelBuffer!, withPresentationTime: time)
i += 1
}
}
assetWriterInput.markAsFinished()
self.assetWriter?.finishWriting(completionHandler: {
Thread.sleep(forTimeInterval: 0.5)
DispatchQueue.main.sync {
print("Completed?", self.assetWriter?.status == AVAssetWriterStatus.completed)
finishedHandler()
}
})
}
}
func newPixelBufferFrom(cgImage:CGImage) -> CVPixelBuffer?{
let options:[String: Any] = [kCVPixelBufferCGImageCompatibilityKey as String: true, kCVPixelBufferCGBitmapContextCompatibilityKey as String: true]
var pxbuffer:CVPixelBuffer?
let frameWidth = 480 //CANVAS_SIZE
let frameHeight = 360 //CANVAS_SIZE
let status = CVPixelBufferCreate(kCFAllocatorDefault, frameWidth, frameHeight, kCVPixelFormatType_32ARGB, options as CFDictionary?, &pxbuffer)
// TODO: throw exception in case of error, don't use assert
assert(status == kCVReturnSuccess && pxbuffer != nil, "newPixelBuffer failed")
CVPixelBufferLockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0))
let pxdata = CVPixelBufferGetBaseAddress(pxbuffer!)
let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
let context = CGContext(data: pxdata, width: frameWidth, height: frameHeight, bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pxbuffer!), space: rgbColorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue)
// TODO: throw exception in case of error, don't use assert
assert(context != nil, "context is nil")
context!.concatenate(CGAffineTransform.identity)
context!.draw(cgImage, in: CGRect(x: 0, y: 0, width: cgImage.width, height: cgImage.height))
CVPixelBufferUnlockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0))
return pxbuffer
}
}
btw. I didn't add audio input, is that necessary for a MPEG file?
I am trying to get Apple's sample Core ML Models that were demoed at the 2017 WWDC to function correctly. I am using the GoogLeNet to try and classify images (see the Apple Machine Learning Page). The model takes a CVPixelBuffer as an input. I have an image called imageSample.jpg that I'm using for this demo. My code is below:
var sample = UIImage(named: "imageSample")?.cgImage
let bufferThree = getCVPixelBuffer(sample!)
let model = GoogLeNetPlaces()
guard let output = try? model.prediction(input: GoogLeNetPlacesInput.init(sceneImage: bufferThree!)) else {
fatalError("Unexpected runtime error.")
}
print(output.sceneLabel)
I am always getting the unexpected runtime error in the output rather than an image classification. My code to convert the image is below:
func getCVPixelBuffer(_ image: CGImage) -> CVPixelBuffer? {
let imageWidth = Int(image.width)
let imageHeight = Int(image.height)
let attributes : [NSObject:AnyObject] = [
kCVPixelBufferCGImageCompatibilityKey : true as AnyObject,
kCVPixelBufferCGBitmapContextCompatibilityKey : true as AnyObject
]
var pxbuffer: CVPixelBuffer? = nil
CVPixelBufferCreate(kCFAllocatorDefault,
imageWidth,
imageHeight,
kCVPixelFormatType_32ARGB,
attributes as CFDictionary?,
&pxbuffer)
if let _pxbuffer = pxbuffer {
let flags = CVPixelBufferLockFlags(rawValue: 0)
CVPixelBufferLockBaseAddress(_pxbuffer, flags)
let pxdata = CVPixelBufferGetBaseAddress(_pxbuffer)
let rgbColorSpace = CGColorSpaceCreateDeviceRGB();
let context = CGContext(data: pxdata,
width: imageWidth,
height: imageHeight,
bitsPerComponent: 8,
bytesPerRow: CVPixelBufferGetBytesPerRow(_pxbuffer),
space: rgbColorSpace,
bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue)
if let _context = context {
_context.draw(image, in: CGRect.init(x: 0, y: 0, width: imageWidth, height: imageHeight))
}
else {
CVPixelBufferUnlockBaseAddress(_pxbuffer, flags);
return nil
}
CVPixelBufferUnlockBaseAddress(_pxbuffer, flags);
return _pxbuffer;
}
return nil
}
I got this code from a previous StackOverflow post (last answer here). I recognize that the code may not be correct, but I have no idea of how to do this myself. I believe that this is the section that contains the error. The model calls for the following type of input: Image<RGB,224,224>
You don't need to do a bunch of image mangling yourself to use a Core ML model with an image — the new Vision framework can do that for you.
import Vision
import CoreML
let model = try VNCoreMLModel(for: MyCoreMLGeneratedModelClass().model)
let request = VNCoreMLRequest(model: model, completionHandler: myResultsMethod)
let handler = VNImageRequestHandler(url: myImageURL)
handler.perform([request])
func myResultsMethod(request: VNRequest, error: Error?) {
guard let results = request.results as? [VNClassificationObservation]
else { fatalError("huh") }
for classification in results {
print(classification.identifier, // the scene label
classification.confidence)
}
}
The WWDC17 session on Vision should have a bit more info — it's tomorrow afternoon.
You can use a pure CoreML, but you should resize an image to (224,224)
DispatchQueue.global(qos: .userInitiated).async {
// Resnet50 expects an image 224 x 224, so we should resize and crop the source image
let inputImageSize: CGFloat = 224.0
let minLen = min(image.size.width, image.size.height)
let resizedImage = image.resize(to: CGSize(width: inputImageSize * image.size.width / minLen, height: inputImageSize * image.size.height / minLen))
let cropedToSquareImage = resizedImage.cropToSquare()
guard let pixelBuffer = cropedToSquareImage?.pixelBuffer() else {
fatalError()
}
guard let classifierOutput = try? self.classifier.prediction(image: pixelBuffer) else {
fatalError()
}
DispatchQueue.main.async {
self.title = classifierOutput.classLabel
}
}
// ...
extension UIImage {
func resize(to newSize: CGSize) -> UIImage {
UIGraphicsBeginImageContextWithOptions(CGSize(width: newSize.width, height: newSize.height), true, 1.0)
self.draw(in: CGRect(x: 0, y: 0, width: newSize.width, height: newSize.height))
let resizedImage = UIGraphicsGetImageFromCurrentImageContext()!
UIGraphicsEndImageContext()
return resizedImage
}
func cropToSquare() -> UIImage? {
guard let cgImage = self.cgImage else {
return nil
}
var imageHeight = self.size.height
var imageWidth = self.size.width
if imageHeight > imageWidth {
imageHeight = imageWidth
}
else {
imageWidth = imageHeight
}
let size = CGSize(width: imageWidth, height: imageHeight)
let x = ((CGFloat(cgImage.width) - size.width) / 2).rounded()
let y = ((CGFloat(cgImage.height) - size.height) / 2).rounded()
let cropRect = CGRect(x: x, y: y, width: size.height, height: size.width)
if let croppedCgImage = cgImage.cropping(to: cropRect) {
return UIImage(cgImage: croppedCgImage, scale: 0, orientation: self.imageOrientation)
}
return nil
}
func pixelBuffer() -> CVPixelBuffer? {
let width = self.size.width
let height = self.size.height
let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue,
kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] as CFDictionary
var pixelBuffer: CVPixelBuffer?
let status = CVPixelBufferCreate(kCFAllocatorDefault,
Int(width),
Int(height),
kCVPixelFormatType_32ARGB,
attrs,
&pixelBuffer)
guard let resultPixelBuffer = pixelBuffer, status == kCVReturnSuccess else {
return nil
}
CVPixelBufferLockBaseAddress(resultPixelBuffer, CVPixelBufferLockFlags(rawValue: 0))
let pixelData = CVPixelBufferGetBaseAddress(resultPixelBuffer)
let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
guard let context = CGContext(data: pixelData,
width: Int(width),
height: Int(height),
bitsPerComponent: 8,
bytesPerRow: CVPixelBufferGetBytesPerRow(resultPixelBuffer),
space: rgbColorSpace,
bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue) else {
return nil
}
context.translateBy(x: 0, y: height)
context.scaleBy(x: 1.0, y: -1.0)
UIGraphicsPushContext(context)
self.draw(in: CGRect(x: 0, y: 0, width: width, height: height))
UIGraphicsPopContext()
CVPixelBufferUnlockBaseAddress(resultPixelBuffer, CVPixelBufferLockFlags(rawValue: 0))
return resultPixelBuffer
}
}
The expected image size for inputs you can find in the mimodel file:
A demo project that uses both pure CoreML and Vision variants you can find here: https://github.com/handsomecode/iOS11-Demos/tree/coreml_vision/CoreML/CoreMLDemo
If the input is UIImage, rather than an URL, and you want to use VNImageRequestHandler, you can use CIImage.
func updateClassifications(for image: UIImage) {
let orientation = CGImagePropertyOrientation(image.imageOrientation)
guard let ciImage = CIImage(image: image) else { return }
let handler = VNImageRequestHandler(ciImage: ciImage, orientation: orientation)
}
From Classifying Images with Vision and Core ML
I have a UIImage array with a lot of UIImage objects,and use the methods mentioned by the link to export the image array to a video. Everything works, but the performance of converting UIImage array to CVPixelBuffer is very terrible:
private func newPixelBufferFrom(cgImage:CGImage) -> CVPixelBuffer?{
let options:[String: Any] = [kCVPixelBufferCGImageCompatibilityKey as String: true, kCVPixelBufferCGBitmapContextCompatibilityKey as String: true]
var pxbuffer:CVPixelBuffer?
let frameWidth = self.videoSettings[AVVideoWidthKey] as! Int
let frameHeight = self.videoSettings[AVVideoHeightKey] as! Int
let status = CVPixelBufferCreate(kCFAllocatorDefault, frameWidth, frameHeight, kCVPixelFormatType_32ARGB, options as CFDictionary?, &pxbuffer)
assert(status == kCVReturnSuccess && pxbuffer != nil, "newPixelBuffer failed")
CVPixelBufferLockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0))
let pxdata = CVPixelBufferGetBaseAddress(pxbuffer!)
let rgbColorSpace = CGColorSpaceCreateDeviceRGB()
let context = CGContext(data: pxdata, width: frameWidth, height: frameHeight, bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pxbuffer!), space: rgbColorSpace, bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue)
assert(context != nil, "context is nil")
context!.concatenate(CGAffineTransform.identity)
context!.draw(cgImage, in: CGRect(x: 0, y: 0, width: frameWidth, height: frameHeight))
CVPixelBufferUnlockBaseAddress(pxbuffer!, CVPixelBufferLockFlags(rawValue: 0))
return pxbuffer
}
Could you give me some ideas?
Thanks!
I solved my problem.
In my video editing application like iMovie, I need to convert one image to a video and make the image(now it's a video) is movable.
A UIImage array is from a UIImage in essence. Therefore, I avoid call newPixelBufferFrom repeatedly but only once. the following codes will be faster:
var sampleBuffer:CVPixelBuffer?
var pxDataBuffer:CVPixelBuffer?
let options:[String: Any] = [kCVPixelBufferCGImageCompatibilityKey as String: true, kCVPixelBufferCGBitmapContextCompatibilityKey as String: true]
let frameHeight = self.videoSettings[AVVideoHeightKey] as! Int
let frameWidth = self.videoSettings[AVVideoWidthKey] as! Int
let originHeight = frameWidth * img!.cgImage!.height / img!.cgImage!.width
let heightDifference = originHeight - frameHeight
let frameCounts = self.duration * Int(self.frameTime.timescale)
let spacingOfHeight = heightDifference / frameCounts
sampleBuffer = self.newPixelBufferFrom(cgImage: img!.cgImage!)
assert(sampleBuffer != nil)
var presentTime = CMTimeMake(1, self.frameTime.timescale)
var stepRows = 0
for i in 0..<frameCounts {
CVPixelBufferLockBaseAddress(sampleBuffer!, CVPixelBufferLockFlags(rawValue: 0))
let pointer = CVPixelBufferGetBaseAddress(sampleBuffer!)
var pxData = pointer?.assumingMemoryBound(to: UInt8.self)
let bytes = CVPixelBufferGetBytesPerRow(sampleBuffer!) * stepRows
pxData = pxData?.advanced(by: bytes)
let status = CVPixelBufferCreateWithBytes(kCFAllocatorDefault, frameWidth, frameHeight, kCVPixelFormatType_32ARGB, pxData!, CVPixelBufferGetBytesPerRow(sampleBuffer!), nil, nil, options as CFDictionary?, &pxDataBuffer)
assert(status == kCVReturnSuccess && pxDataBuffer != nil, "newPixelBuffer failed")
CVPixelBufferUnlockBaseAddress(sampleBuffer!, CVPixelBufferLockFlags(rawValue: 0))
while !self.writeInput.isReadyForMoreMediaData {
usleep(100)
}
if (self.writeInput.isReadyForMoreMediaData){
if i == 0{
self.bufferAdapter.append(pxDataBuffer!, withPresentationTime: zeroTime)
}else{
self.bufferAdapter.append(pxDataBuffer!, withPresentationTime: presentTime)
}
presentTime = CMTimeAdd(presentTime, self.frameTime)
}
stepRows += spacingOfHeight
}