Issue grabbing Audio Data from AVCaptureDataOutputSynchronizer (Swift) - ios

I am trying to use AVCaptureDataOutputSynchronizer to capture media from multiple capture outputs. I have issues grabbing the audio data in the dataOutputSynchronizer as the audioData in the guard statement always fails. I have is no issue with grabbing the videoData and I am able to display the frame using the code below.
I wonder if its an issue with AVCaptureSynchronizedSampleBufferData, but AVCaptureSynchronizedSampleBufferData is a container for video or audio samples collected using synchronized capture.
I am not sure if the issue is I am incorrectly configuring my AVCaptureAudioDataOutput correctly. Here is my ViewController class initializing and retrieving the video and audio data.
Note that I am running this on an iPhoneXS and running iOS 12. Code is written in swift. Any help with trying to debug this would be great!
//
// ViewController.swift
// AudioVideoSyncTest
//
// Created by Andrew Mendez on 2/23/19.
// Copyright © 2019 Andrew Mendez. All rights reserved.
//
import UIKit
import AVFoundation
class ViewController: UIViewController,AVCaptureDataOutputSynchronizerDelegate,AVCaptureVideoDataOutputSampleBufferDelegate,AVCaptureAudioDataOutputSampleBufferDelegate {
var capSession:AVCaptureSession!
var videoDataOutput = AVCaptureVideoDataOutput()
var audioDataOutput = AVCaptureAudioDataOutput()
var dataOutputSynchronizer:AVCaptureDataOutputSynchronizer!
var dataOutputQueue = DispatchQueue(label: "com.amendez.dataOutputQueue")
#IBOutlet var imageView: UIImageView!
override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view, typically from a nib.
initSession()
capSession.startRunning()
}
func initSession(){
capSession = AVCaptureSession()
capSession.sessionPreset = .photo
let dualCameraDevice = AVCaptureDevice.default(.builtInDualCamera, for: .video, position: .unspecified)
let audioDevice = AVCaptureDevice.default(for: .audio)
let videoInput = try? AVCaptureDeviceInput(device: dualCameraDevice!)
let audioInput = try? AVCaptureDeviceInput(device: audioDevice!)
do{
if capSession.canAddInput(videoInput!) == true{
capSession.addInput(videoInput!)
}else{print("Issue input camera")}
if capSession.canAddInput(audioInput!) == true {
capSession.addInput(audioInput!)
} else {print("Issue Adding audio input")}
// configuring outputs
// video output
videoDataOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_32BGRA)]
videoDataOutput.alwaysDiscardsLateVideoFrames = true
videoDataOutput.setSampleBufferDelegate(self, queue: dataOutputQueue)
guard capSession.canAddOutput(videoDataOutput) else { fatalError() }
capSession.addOutput(videoDataOutput)
let videoConnection = videoDataOutput.connection(with: .video)
videoConnection!.videoOrientation = .portrait
// audio
guard capSession.canAddOutput(audioDataOutput) else {
print("FAILED");fatalError()}
audioDataOutput.setSampleBufferDelegate(self , queue: dataOutputQueue)
capSession.addOutput(audioDataOutput)
// synchronizer
dataOutputSynchronizer = AVCaptureDataOutputSynchronizer(dataOutputs: [
videoDataOutput,
audioDataOutput])
dataOutputSynchronizer.setDelegate(self, queue: dataOutputQueue)
}catch{
print("Error Config Input")
}
}
#IBAction func startRecord(_ sender: Any) {
}
#IBAction func stopRecord(_ sender: Any) {
}
func dataOutputSynchronizer(_ synchronizer: AVCaptureDataOutputSynchronizer, didOutput synchronizedDataCollection: AVCaptureSynchronizedDataCollection) {
//get video data
guard let videoData = synchronizedDataCollection.synchronizedData(for: videoDataOutput) as? AVCaptureSynchronizedSampleBufferData else{
return
}
guard !videoData.sampleBufferWasDropped else{
print("Dropped video:\(videoData)")
return
}
let pixBuffer = CMSampleBufferGetImageBuffer(videoData.sampleBuffer)
DispatchQueue.main.async {
self.imageView.image = UIImage(ciImage: CIImage(cvImageBuffer:pixBuffer! ))
}
//get video data
guard let audioData = synchronizedDataCollection.synchronizedData(for: audioDataOutput) as? AVCaptureSynchronizedSampleBufferData else{
print("Error getting Audio Buffer")
return
}
guard !audioData.sampleBufferWasDropped else{
print("Dropped audio:\(audioData)")
return
}
print(audioData.sampleBuffer)
}
}

Related

Play & Mix Background Audio with Video Camera Recording

I'm currently in the process of debugging my video camera model that I'm using to record video and audio. I would like the video camera to continue playing background audio if there is something play and record using the mic over the audio. I initially got my av capture session to work smoothly as intended by adding the microphone input on set up which automatically stops playing background audio when the camera view is set up.
I have been working on the following solution where I add the audio input only when I start recording and attempt to remove audio input once I stop recording. Here is my current code:
import SwiftUI
import AVFoundation
// MARK: Camera View Model
class CameraViewModel: NSObject,ObservableObject,AVCaptureFileOutputRecordingDelegate, AVCapturePhotoCaptureDelegate{
#Published var session = AVCaptureSession()
#Published var alert = false
#Published var output = AVCaptureMovieFileOutput()
#Published var preview : AVCaptureVideoPreviewLayer!
// MARK: Video Recorder Properties
#Published var isRecording: Bool = false
#Published var recordedURLs: [URL] = []
#Published var previewURL: URL?
#Published var showPreview: Bool = false
// Set up is called after necessary permissions are acquired
func setUp(){
do{
self.session.beginConfiguration()
let cameraDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .front)
if cameraDevice != nil {
/* old code that added audio input on open that worked as intended
let videoInput = try AVCaptureDeviceInput(device: cameraDevice!)
let audioDevice = AVCaptureDevice.default(for: .audio)
let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
if self.session.canAddInput(videoInput) && self.session.canAddInput(audioInput){ //MARK: Audio Input
self.session.addInput(videoInput)
self.session.addInput(audioInput)
self.videoDeviceInput = videoInput
} */
// new code that only adds video input
if self.session.canAddInput(videoInput) {
self.session.addInput(videoInput)
self.videoDeviceInput = videoInput
}
if self.session.canAddOutput(self.output){
self.session.addOutput(self.output)
}
if self.session.canAddOutput(self.photoOutput){
self.session.addOutput(self.photoOutput)
}
//for audio mixing, make sure this is default set to true
self.session.automaticallyConfiguresApplicationAudioSession = true
self.session.commitConfiguration()
}
}
catch{
print(error.localizedDescription)
}
}
//start recording is called upon a user input which now attaches the mic input
func startRecording() {
// here is how I'm mixing the background audio and adding the microphone input when the camera starts recording
do
{
try AVAudioSession.sharedInstance().setActive(false)
try AVAudioSession.sharedInstance().setCategory(AVAudioSession.Category.ambient)
try AVAudioSession.sharedInstance().setCategory(.playAndRecord, mode: .default, options: AVAudioSession.CategoryOptions.mixWithOthers)
try AVAudioSession.sharedInstance().setMode(AVAudioSession.Mode.videoRecording)
try AVAudioSession.sharedInstance().setActive(true)
let audioDevice = AVCaptureDevice.default(for: .audio)
let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
if self.session.canAddInput(audioInput){
self.session.automaticallyConfiguresApplicationAudioSession = false
self.session.addInput(audioInput)
}
} catch {
print("Can't Set Audio Session Category: \(error)")
}
// MARK: Temporary URL for recording Video
let tempURL = NSTemporaryDirectory() + "\(Date()).mov"
//Need to correct image orientation before moving further
if let videoOutputConnection = output.connection(with: .video) {
//For frontCamera settings to capture mirror image
if self.videoDeviceInput.device.position == .front {
videoOutputConnection.automaticallyAdjustsVideoMirroring = false
videoOutputConnection.isVideoMirrored = true
} else {
videoOutputConnection.automaticallyAdjustsVideoMirroring = true
}
}
output.startRecording(to: URL(fileURLWithPath: tempURL), recordingDelegate: self)
isRecording = true
}
//stop recording removes the audio input
func stopRecording(){
output.stopRecording()
isRecording = false
self.flashOn = false
// stop recording is where I believe I'm doing something wrong when I remove the audio input
do{
try AVAudioSession.sharedInstance().setActive(false)
let audioDevice = AVCaptureDevice.default(for: .audio)
let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
self.session.removeInput(audioInput)
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.ambient, mode: .default, options: [.mixWithOthers])
try AVAudioSession.sharedInstance().setActive(true)
} catch {
print("Error occurred while removing audio device input: \(error)")
}
}
}
I also added the following necessary lines in my AppDelegate launch method as well
/*
below is for mixing audio
*/
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(.ambient, mode: .default, options: [.mixWithOthers])
} catch {
print("Failed to set audio session category.")
}
I believe I'm going on the correct track as the first time the app opens, background audio plays smoothly and there is a small camera flash but once I start recording, it mixes the background audio well with the phone audio input as well. I was able to see this in the preview in a new view. However, once I dismiss the preview of the recorded url and go back to the camera, the phone audio mic input stops working completely.
I also receive this error in my console:
AVAudioSession_iOS.mm:1271 Deactivating an audio session that has running I/O. All I/O should be stopped or paused prior to deactivating the audio session.
When I looked online, it said to stop or pause AVPlayer but I'm unsure where I'm even using an AVPlayer session here. I also noticed that people suggested creating two capture sessions for the audio and video but I was struggling to get that working as well, so went ahead with this option.
Editing for minimal reproducible example:
Here is the camera view model and I've attached the necessary views in a separate answer:
import SwiftUI
import AVFoundation
// MARK: Camera View Model
class CameraViewModel: NSObject,ObservableObject,AVCaptureFileOutputRecordingDelegate, AVCapturePhotoCaptureDelegate{
#Published var session = AVCaptureSession()
#Published var alert = false
#Published var output = AVCaptureMovieFileOutput()
#Published var preview : AVCaptureVideoPreviewLayer!
// MARK: Video Recorder Properties
#Published var isRecording: Bool = false
#Published var recordedURLs: [URL] = []
#Published var previewURL: URL?
#Published var showPreview: Bool = false
// Top Progress Bar
#Published var recordedDuration: CGFloat = 0
// Maximum 15 seconds
#Published var maxDuration: CGFloat = 15
//for photo
// since were going to read pic data....
#Published var photoOutput = AVCapturePhotoOutput()
#Published var isTaken = false
#Published var picData = Data(count: 0)
#Published var thumbnailData = Data(count: 0)
#Published var flashOn = false
#objc dynamic var videoDeviceInput: AVCaptureDeviceInput!
private let sessionQueue = DispatchQueue(label: "session queue")
// MARK: Device Configuration Properties
private let videoDeviceDiscoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera, .builtInDualCamera, .builtInTrueDepthCamera], mediaType: .video, position: .unspecified)
#AppStorage("camerapermission") var camerapermission = 0
func checkPermission(){
switch AVCaptureDevice.authorizationStatus(for: .video) {
case .authorized:
self.checkAudioPermission()
return
case .notDetermined:
AVCaptureDevice.requestAccess(for: .video) { (status) in
if status{
self.checkAudioPermission()
}
}
case .denied:
self.camerapermission = 2
self.alert.toggle()
return
default:
return
}
}
func checkAudioPermission() {
switch AVAudioSession.sharedInstance().recordPermission {
case .granted :
print("permission granted")
self.camerapermission = 1
setUp()
case .denied:
print("permission denied")
self.camerapermission = 2
self.alert.toggle()
case .undetermined:
print("request permission here")
AVAudioSession.sharedInstance().requestRecordPermission({ granted in
if granted {
print("permission granted here")
DispatchQueue.main.async {
self.camerapermission = 1
}
self.setUp()
}
})
default:
print("unknown")
}
}
func setUp(){
do{
self.session.beginConfiguration()
let cameraDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .front)
if cameraDevice != nil {
let videoInput = try AVCaptureDeviceInput(device: cameraDevice!)
// let audioDevice = AVCaptureDevice.default(for: .audio)
// let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
// if self.session.canAddInput(videoInput) && self.session.canAddInput(audioInput){ //MARK: Audio Input
// self.session.addInput(videoInput)
// self.session.addInput(audioInput)
// self.videoDeviceInput = videoInput
// }
//
/* mixing code buggy */
if self.session.canAddInput(videoInput) {
self.session.addInput(videoInput)
self.videoDeviceInput = videoInput
}
if self.session.canAddOutput(self.output){
self.session.addOutput(self.output)
}
if self.session.canAddOutput(self.photoOutput){
self.session.addOutput(self.photoOutput)
}
//for audio mixing, make sure this is default set to true
self.session.automaticallyConfiguresApplicationAudioSession = true
self.session.commitConfiguration()
}
}
catch{
print(error.localizedDescription)
}
}
public func set(zoom: CGFloat){
let factor = zoom < 1 ? 1 : zoom
let device = self.videoDeviceInput.device
do {
try device.lockForConfiguration()
device.videoZoomFactor = factor
device.unlockForConfiguration()
}
catch {
print(error.localizedDescription)
}
}
func changeCamera() {
sessionQueue.async {
if self.videoDeviceInput != nil {
let currentVideoDevice = self.videoDeviceInput.device
let currentPosition = currentVideoDevice.position
let preferredPosition: AVCaptureDevice.Position
let preferredDeviceType: AVCaptureDevice.DeviceType
switch currentPosition {
case .unspecified, .front:
preferredPosition = .back
preferredDeviceType = .builtInWideAngleCamera
case .back:
preferredPosition = .front
preferredDeviceType = .builtInWideAngleCamera
#unknown default:
print("Unknown capture position. Defaulting to back, dual-camera.")
preferredPosition = .back
preferredDeviceType = .builtInWideAngleCamera
}
let devices = self.videoDeviceDiscoverySession.devices
var newVideoDevice: AVCaptureDevice? = nil
// First, seek a device with both the preferred position and device type. Otherwise, seek a device with only the preferred position.
if let device = devices.first(where: { $0.position == preferredPosition && $0.deviceType == preferredDeviceType }) {
newVideoDevice = device
} else if let device = devices.first(where: { $0.position == preferredPosition }) {
newVideoDevice = device
}
if let videoDevice = newVideoDevice {
do {
let videoDeviceInput = try AVCaptureDeviceInput(device: videoDevice)
self.session.beginConfiguration()
// Remove the existing device input first, because AVCaptureSession doesn't support
// simultaneous use of the rear and front cameras.
self.session.removeInput(self.videoDeviceInput)
// MARK: Audio Input
if self.session.canAddInput(videoDeviceInput){
self.session.addInput(videoDeviceInput)
self.videoDeviceInput = videoDeviceInput
}
if self.session.canAddOutput(self.output){
self.session.addOutput(self.output)
}
if self.session.canAddOutput(self.photoOutput){
self.session.addOutput(self.photoOutput)
}
self.session.commitConfiguration()
} catch {
print("Error occurred while creating video device input: \(error)")
}
}
}
}
}
// take and retake functions...
func switchFlash() {
self.flashOn.toggle()
}
func takePic(){
let settings = AVCapturePhotoSettings()
if flashOn {
settings.flashMode = .on
} else {
settings.flashMode = .off
}
//Need to correct image orientation before moving further
if let photoOutputConnection = photoOutput.connection(with: .video) {
//For frontCamera settings to capture mirror image
if self.videoDeviceInput.device.position == .front {
photoOutputConnection.automaticallyAdjustsVideoMirroring = false
photoOutputConnection.isVideoMirrored = true
} else {
photoOutputConnection.automaticallyAdjustsVideoMirroring = true
}
}
self.photoOutput.capturePhoto(with: settings, delegate: self)
print("retaking a photo taken...")
DispatchQueue.global(qos: .background).async {
//self.session.stopRunning()
DispatchQueue.main.async {
withAnimation{self.isTaken.toggle()}
}
}
}
func reTake(){
DispatchQueue.global(qos: .background).async {
self.session.startRunning()
DispatchQueue.main.async {
withAnimation{self.isTaken.toggle()}
//clearing ...
self.flashOn = false
self.picData = Data(count: 0)
}
}
}
func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
if error != nil{
return
}
print("pic taken...")
guard let imageData = photo.fileDataRepresentation() else{return}
self.picData = imageData
}
func startRecording() {
/* mixing code buggy */
do
{
try AVAudioSession.sharedInstance().setActive(false)
try AVAudioSession.sharedInstance().setCategory(AVAudioSession.Category.ambient)
try AVAudioSession.sharedInstance().setCategory(.playAndRecord, mode: .default, options: AVAudioSession.CategoryOptions.mixWithOthers)
try AVAudioSession.sharedInstance().setMode(AVAudioSession.Mode.videoRecording)
try AVAudioSession.sharedInstance().setActive(true)
let audioDevice = AVCaptureDevice.default(for: .audio)
let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
if self.session.canAddInput(audioInput){
self.session.automaticallyConfiguresApplicationAudioSession = false
self.session.addInput(audioInput)
}
} catch {
print("Can't Set Audio Session Category: \(error)")
}
// MARK: Temporary URL for recording Video
let tempURL = NSTemporaryDirectory() + "\(Date()).mov"
//Need to correct image orientation before moving further
if let videoOutputConnection = output.connection(with: .video) {
//For frontCamera settings to capture mirror image
if self.videoDeviceInput.device.position == .front {
videoOutputConnection.automaticallyAdjustsVideoMirroring = false
videoOutputConnection.isVideoMirrored = true
} else {
videoOutputConnection.automaticallyAdjustsVideoMirroring = true
}
}
output.startRecording(to: URL(fileURLWithPath: tempURL), recordingDelegate: self)
isRecording = true
}
func stopRecording(){
output.stopRecording()
isRecording = false
self.flashOn = false
/* mixing code buggy */
do{
try AVAudioSession.sharedInstance().setActive(false)
let audioDevice = AVCaptureDevice.default(for: .audio)
let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
self.session.removeInput(audioInput)
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.ambient, mode: .default, options: [.mixWithOthers])
try AVAudioSession.sharedInstance().setActive(true)
} catch {
print("Error occurred while removing audio device input: \(error)")
}
}
func generateThumbnail() {
let image = self.imageFromVideo(url: previewURL!, at: 0)
DispatchQueue.main.async {
self.thumbnailData = image?.pngData() ?? Data(count: 0)
}
}
func imageFromVideo(url: URL, at time: TimeInterval) -> UIImage? {
let asset = AVURLAsset(url: url)
let assetIG = AVAssetImageGenerator(asset: asset)
assetIG.appliesPreferredTrackTransform = true
assetIG.apertureMode = AVAssetImageGenerator.ApertureMode.encodedPixels
let cmTime = CMTime(seconds: time, preferredTimescale: 60)
let thumbnailImageRef: CGImage
do {
thumbnailImageRef = try assetIG.copyCGImage(at: cmTime, actualTime: nil)
print("SUCESS: THUMBNAIL")
} catch let error {
print("Error: \(error)")
return nil
}
return UIImage(cgImage: thumbnailImageRef)
}
func restartSession() {
if !self.session.isRunning {
DispatchQueue.global(qos: .background).async {
self.session.startRunning()
}
}
}
func stopSession() {
// DispatchQueue.global(qos: .background).async {
self.session.stopRunning()
// }
}
func fileOutput(_ output: AVCaptureFileOutput, didFinishRecordingTo outputFileURL: URL, from connections: [AVCaptureConnection], error: Error?) {
if let error = error {
print(error.localizedDescription)
return
}
// CREATED SUCCESSFULLY
print(outputFileURL)
guard let data = try? Data(contentsOf: outputFileURL) else {
return
}
print("File size before compression: \(Double(data.count / 1048576)) mb")
self.recordedURLs.append(outputFileURL)
if self.recordedURLs.count == 1{
self.previewURL = outputFileURL
self.generateThumbnail()
return
}
/*
Below code can be ignored because only recording one url
*/
// CONVERTING URLs TO ASSETS
let assets = recordedURLs.compactMap { url -> AVURLAsset in
return AVURLAsset(url: url)
}
self.previewURL = nil
// MERGING VIDEOS
mergeVideos(assets: assets) { exporter in
exporter.exportAsynchronously {
if exporter.status == .failed{
// HANDLE ERROR
print(exporter.error!)
}
else{
if let finalURL = exporter.outputURL{
print(finalURL)
DispatchQueue.main.async {
self.previewURL = finalURL
print("inside final url")
}
}
}
}
}
}
func mergeVideos(assets: [AVURLAsset],completion: #escaping (_ exporter: AVAssetExportSession)->()){
let compostion = AVMutableComposition()
var lastTime: CMTime = .zero
guard let videoTrack = compostion.addMutableTrack(withMediaType: .video, preferredTrackID: Int32(kCMPersistentTrackID_Invalid)) else{return}
guard let audioTrack = compostion.addMutableTrack(withMediaType: .audio, preferredTrackID: Int32(kCMPersistentTrackID_Invalid)) else{return}
for asset in assets {
// Linking Audio and Video
do{
try videoTrack.insertTimeRange(CMTimeRange(start: .zero, duration: asset.duration), of: asset.tracks(withMediaType: .video)[0], at: lastTime)
// Safe Check if Video has Audio
if !asset.tracks(withMediaType: .audio).isEmpty{
try audioTrack.insertTimeRange(CMTimeRange(start: .zero, duration: asset.duration), of: asset.tracks(withMediaType: .audio)[0], at: lastTime)
}
}
catch{
// HANDLE ERROR
print(error.localizedDescription)
}
// Updating Last Time
lastTime = CMTimeAdd(lastTime, asset.duration)
}
// MARK: Temp Output URL
let tempURL = URL(fileURLWithPath: NSTemporaryDirectory() + "Reel-\(Date()).mp4")
// VIDEO IS ROTATED
// BRINGING BACK TO ORIGNINAL TRANSFORM
let layerInstructions = AVMutableVideoCompositionLayerInstruction(assetTrack: videoTrack)
// MARK: Transform
var transform = CGAffineTransform.identity
transform = transform.rotated(by: 90 * (.pi / 180))
transform = transform.translatedBy(x: 0, y: -videoTrack.naturalSize.height)
layerInstructions.setTransform(transform, at: .zero)
let instructions = AVMutableVideoCompositionInstruction()
instructions.timeRange = CMTimeRange(start: .zero, duration: lastTime)
instructions.layerInstructions = [layerInstructions]
let videoComposition = AVMutableVideoComposition()
videoComposition.renderSize = CGSize(width: videoTrack.naturalSize.height, height: videoTrack.naturalSize.width)
videoComposition.instructions = [instructions]
videoComposition.frameDuration = CMTimeMake(value: 1, timescale: 30)
guard let exporter = AVAssetExportSession(asset: compostion, presetName: AVAssetExportPresetHighestQuality) else{return}
exporter.outputFileType = .mp4
exporter.outputURL = tempURL
exporter.videoComposition = videoComposition
completion(exporter)
}
func compressVideo(inputURL: URL,
outputURL: URL,
handler:#escaping (_ exportSession: AVAssetExportSession?) -> Void) {
let urlAsset = AVURLAsset(url: inputURL, options: nil)
guard let exportSession = AVAssetExportSession(asset: urlAsset,
presetName: AVAssetExportPresetMediumQuality) else {
handler(nil)
return
}
exportSession.outputURL = outputURL
exportSession.outputFileType = .mp4
exportSession.exportAsynchronously {
handler(exportSession)
}
}
}
You can ignore the merge videos code as there is only one recorded url but right now, you should be able to run this code if you've added camera and microphone permissions to your info.plist.
It currently has the buggy mixing code where background audio does work the first time but after restarting audio session, it no longer works. Any help would be greatly appreciated!

Anyone know how to use Apple's vision framework for real-time text recognition?

I can't seem to find a way to not use the document scanner, and supplement it with AVFoundation instead. I'm trying to create a feature where the user can click a button, scan text, and then save that to some textview w/o having the user click the camera button, keep scan, save, etc.
I've got it to work with object detection, but I can't get it to work for text-recognition. So, is there any way to use Apple's vision framework for real-time text recognition? Any help would be much appreciated
For performance reasons, I'd prefer to not convert the CMSampleBuffer to a UIImage, and would instead use the following to create an AVCaptureVideoPreviewLayer for live video:
class CameraFeedView: UIView {
private var previewLayer: AVCaptureVideoPreviewLayer!
override class var layerClass: AnyClass {
return AVCaptureVideoPreviewLayer.self
}
init(frame: CGRect, session: AVCaptureSession, videoOrientation: AVCaptureVideoOrientation) {
super.init(frame: frame)
previewLayer = layer as? AVCaptureVideoPreviewLayer
previewLayer.session = session
previewLayer.videoGravity = .resizeAspect
previewLayer.connection?.videoOrientation = videoOrientation
}
required init?(coder: NSCoder) {
fatalError("init(coder:) has not been implemented")
}
}
Once you have this, you can work on the live video data using Vision:
class CameraViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
private let videoDataOutputQueue = DispatchQueue(label: "CameraFeedDataOutput", qos: .userInitiated,
attributes: [], autoreleaseFrequency: .workItem)
private var drawingView: UILabel = {
let view = UILabel(frame: UIScreen.main.bounds)
view.font = UIFont.boldSystemFont(ofSize: 30.0)
view.textColor = .red
view.translatesAutoresizingMaskIntoConstraints = false
return view
}()
private var cameraFeedSession: AVCaptureSession?
private var cameraFeedView: CameraFeedView! //Wrap
override func viewDidLoad() {
super.viewDidLoad()
do {
try setupAVSession()
} catch {
print("setup av session failed")
}
}
func setupAVSession() throws {
// Create device discovery session for a wide angle camera
let wideAngle = AVCaptureDevice.DeviceType.builtInWideAngleCamera
let discoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [wideAngle], mediaType: .video, position: .back)
// Select a video device, make an input
guard let videoDevice = discoverySession.devices.first else {
print("Could not find a wide angle camera device.")
}
guard let deviceInput = try? AVCaptureDeviceInput(device: videoDevice) else {
print("Could not create video device input.")
}
let session = AVCaptureSession()
session.beginConfiguration()
// We prefer a 1080p video capture but if camera cannot provide it then fall back to highest possible quality
if videoDevice.supportsSessionPreset(.hd1920x1080) {
session.sessionPreset = .hd1920x1080
} else {
session.sessionPreset = .high
}
// Add a video input
guard session.canAddInput(deviceInput) else {
print("Could not add video device input to the session")
}
session.addInput(deviceInput)
let dataOutput = AVCaptureVideoDataOutput()
if session.canAddOutput(dataOutput) {
session.addOutput(dataOutput)
// Add a video data output
dataOutput.alwaysDiscardsLateVideoFrames = true
dataOutput.videoSettings = [
String(kCVPixelBufferPixelFormatTypeKey): Int(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)
]
dataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
} else {
print("Could not add video data output to the session")
}
let captureConnection = dataOutput.connection(with: .video)
captureConnection?.preferredVideoStabilizationMode = .standard
captureConnection?.videoOrientation = .portrait
// Always process the frames
captureConnection?.isEnabled = true
session.commitConfiguration()
cameraFeedSession = session
// Get the interface orientaion from window scene to set proper video orientation on capture connection.
let videoOrientation: AVCaptureVideoOrientation
switch view.window?.windowScene?.interfaceOrientation {
case .landscapeRight:
videoOrientation = .landscapeRight
default:
videoOrientation = .portrait
}
// Create and setup video feed view
cameraFeedView = CameraFeedView(frame: view.bounds, session: session, videoOrientation: videoOrientation)
setupVideoOutputView(cameraFeedView)
cameraFeedSession?.startRunning()
}
The key functions to implement once you've got an AVCaptureSession set up are the delegate and request handler:
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
let requestHandler = VNImageRequestHandler(cmSampleBuffer: sampleBuffer, orientation: .down)
let request = VNRecognizeTextRequest(completionHandler: textDetectHandler)
do {
// Perform the text-detection request.
try requestHandler.perform([request])
} catch {
print("Unable to perform the request: \(error).")
}
}
func textDetectHandler(request: VNRequest, error: Error?) {
guard let observations =
request.results as? [VNRecognizedTextObservation] else { return }
// Process each observation to find the recognized body pose points.
let recognizedStrings = observations.compactMap { observation in
// Return the string of the top VNRecognizedText instance.
return observation.topCandidates(1).first?.string
}
DispatchQueue.main.async {
self.drawingView.text = recognizedStrings.first
}
}
}
Note, you will probably want to process each of the recognizedStrings in order to choose the one with the highest confidence, but this is a proof of concept. You could also add a bounding box, and the docs have an example of that.

Real time face detect from live camera (not from static image) using Vision & AVFoundation Framework

I need to detect real face from iPhone front camera. So I have used vision framework to achieve it. But it is detecting the face from static image (human photo) also which is not required. Here is my code snippet.
class ViewController {
func sessionPrepare() {
session = AVCaptureSession()
guard let session = session, let captureDevice = frontCamera else { return }
do {
let deviceInput = try AVCaptureDeviceInput(device: captureDevice)
session.beginConfiguration()
if session.canAddInput(deviceInput) {
session.addInput(deviceInput)
}
let output = AVCaptureVideoDataOutput()
output.videoSettings = [
String(kCVPixelBufferPixelFormatTypeKey) : Int(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)
]
output.alwaysDiscardsLateVideoFrames = true
if session.canAddOutput(output) {
session.addOutput(output)
}
session.commitConfiguration()
let queue = DispatchQueue(label: "output.queue")
output.setSampleBufferDelegate(self, queue: queue)
print("setup delegate")
} catch {
print("can't setup session")
}
}
}
}
It is also detecting face from a static image if I place it in front of camera.
extension ViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer)
let attachments = CMCopyDictionaryOfAttachments(kCFAllocatorDefault, sampleBuffer, kCMAttachmentMode_ShouldPropagate)
let ciImage = CIImage(cvImageBuffer: pixelBuffer!, options: attachments as! [String : Any]?)
let ciImageWithOrientation = ciImage.applyingOrientation(Int32(UIImageOrientation.leftMirrored.rawValue))
detectFace(on: ciImageWithOrientation)
}
}
func detectFace(on image: CIImage) {
try? faceDetectionRequest.perform([faceDetection], on: image)
if let results = faceDetection.results as? [VNFaceObservation] {
if !results.isEmpty {
faceLandmarks.inputFaceObservations = results
detectLandmarks(on: image)
DispatchQueue.main.async {
self.shapeLayer.sublayers?.removeAll()
}
}
}
}
func detectLandmarks(on image: CIImage) {
try? faceLandmarksDetectionRequest.perform([faceLandmarks], on: image)
if let landmarksResults = faceLandmarks.results as? [VNFaceObservation] {
for observation in landmarksResults {
DispatchQueue.main.async {
if let boundingBox = self.faceLandmarks.inputFaceObservations?.first?.boundingBox {
let faceBoundingBox = boundingBox.scaled(to: self.view.bounds.size)
//different types of landmarks
let faceContour = observation.landmarks?.faceContour
let leftEye = observation.landmarks?.leftEye
let rightEye = observation.landmarks?.rightEye
let nose = observation.landmarks?.nose
let lips = observation.landmarks?.innerLips
let leftEyebrow = observation.landmarks?.leftEyebrow
let rightEyebrow = observation.landmarks?.rightEyebrow
let noseCrest = observation.landmarks?.noseCrest
let outerLips = observation.landmarks?.outerLips
}
}
}
}
}
So is there any way to get it done using only from real time camera detection? I would be very grateful for your help and advice
I need to do the same and after a lot of experiments finally, I have found this
https://github.com/syaringan357/iOS-MobileFaceNet-MTCNN-FaceAntiSpoofing
It is detecting only Live camera faces. But it is not using Vision framework.

AVAssetWriterInput append fails with error code -11800 AVErrorUnknown -12780

I am trying to capture camera video in memory using AVCaptureSession so that I can later write the video data to a movie file. While I have been able to successfully start a capture session, I am not able to successful write the CMSampleBuffers I've captured to a compressed movie file using AVAssetWriter.
Appending sample buffers using AVAssetWriterInput's append method fails and when I inspect the AVAssetWriter's error property, I get the following:
Error Domain=AVFoundationErrorDomain Code=-11800 "The operation could not be completed" UserInfo={NSUnderlyingError=0x17005d070 {Error Domain=NSOSStatusErrorDomain Code=-12780 "(null)"}, NSLocalizedFailureReason=An unknown error occurred (-12780), NSLocalizedDescription=The operation could not be completed}
As far as I can tell -11800 indicates an AVErrorUnknown, however I have not been able to find information about the -12780 error code, which as far as I can tell is undocumented. Below I have pasted the main files in the example project I setup to demonstrate the issue.
Any guidance would be greatly appreciated. Thanks!
ViewController.swift
import UIKit
import AVFoundation
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
private let recordingClipQueue = DispatchQueue(label: "com.example.recordingClipQueue")
private let videoDataOutputQueue = DispatchQueue(label: "com.example.videoDataOutputQueue")
private let session = AVCaptureSession()
private var backfillSampleBufferList = [CMSampleBuffer]()
override func viewDidLoad() {
super.viewDidLoad()
session.sessionPreset = AVCaptureSessionPreset640x480
let videoDevice = AVCaptureDevice.defaultDevice(withMediaType: AVMediaTypeVideo);
let videoDeviceInput: AVCaptureDeviceInput;
do {
videoDeviceInput = try AVCaptureDeviceInput(device: videoDevice)
} catch {
print("Error creating device input from video device: \(error).")
return
}
guard session.canAddInput(videoDeviceInput) else {
print("Could not add video device input to capture session.")
return
}
session.addInput(videoDeviceInput)
let videoDataOutput = AVCaptureVideoDataOutput()
videoDataOutput.videoSettings = [ kCVPixelBufferPixelFormatTypeKey as NSString : Int(kCMPixelFormat_32BGRA) ]
videoDataOutput.alwaysDiscardsLateVideoFrames = true
videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
guard session.canAddOutput(videoDataOutput) else {
print("Could not add video data output to capture session.")
return
}
session.addOutput(videoDataOutput)
videoDataOutput.connection(withMediaType: AVMediaTypeVideo).isEnabled = true
session.startRunning()
}
private func backfillSizeInSeconds() -> Double {
if backfillSampleBufferList.count < 1 {
return 0.0
}
let earliestSampleBuffer = backfillSampleBufferList.first!
let latestSampleBuffer = backfillSampleBufferList.last!
let earliestSampleBufferPTS = CMSampleBufferGetOutputPresentationTimeStamp(earliestSampleBuffer).value
let latestSampleBufferPTS = CMSampleBufferGetOutputPresentationTimeStamp(latestSampleBuffer).value
let timescale = CMSampleBufferGetOutputPresentationTimeStamp(latestSampleBuffer).timescale
return Double(latestSampleBufferPTS - earliestSampleBufferPTS) / Double(timescale)
}
private func createClipFromBackfill() {
guard backfillSampleBufferList.count > 0 else {
print("createClipFromBackfill() called before any samples were recorded.")
return
}
let clipURL = URL(fileURLWithPath:
NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0] +
"/recorded_clip.mp4")
if FileManager.default.fileExists(atPath: clipURL.path) {
do {
try FileManager.default.removeItem(atPath: clipURL.path)
} catch {
print("Could not delete existing clip file: \(error).")
}
}
var _videoFileWriter: AVAssetWriter?
do {
_videoFileWriter = try AVAssetWriter(url: clipURL, fileType: AVFileTypeQuickTimeMovie)
} catch {
print("Could not create video file writer: \(error).")
return
}
guard let videoFileWriter = _videoFileWriter else {
print("Video writer was nil.")
return
}
let settingsAssistant = AVOutputSettingsAssistant(preset: AVOutputSettingsPreset640x480)!
guard videoFileWriter.canApply(outputSettings: settingsAssistant.videoSettings, forMediaType: AVMediaTypeVideo) else {
print("Video file writer could not apply video output settings.")
return
}
let earliestRecordedSampleBuffer = backfillSampleBufferList.first!
let _formatDescription = CMSampleBufferGetFormatDescription(earliestRecordedSampleBuffer)
guard let formatDescription = _formatDescription else {
print("Earliest recording pixel buffer format description was nil.")
return
}
let videoWriterInput = AVAssetWriterInput(mediaType: AVMediaTypeVideo,
outputSettings: settingsAssistant.videoSettings,
sourceFormatHint: formatDescription)
guard videoFileWriter.canAdd(videoWriterInput) else {
print("Could not add video writer input to video file writer.")
return
}
videoFileWriter.add(videoWriterInput)
guard videoFileWriter.startWriting() else {
print("Video file writer not ready to write file.")
return
}
videoFileWriter.startSession(atSourceTime: CMSampleBufferGetOutputPresentationTimeStamp(earliestRecordedSampleBuffer))
videoWriterInput.requestMediaDataWhenReady(on: recordingClipQueue) {
while videoWriterInput.isReadyForMoreMediaData {
if self.backfillSampleBufferList.count > 0 {
let sampleBufferToAppend = self.backfillSampleBufferList.first!.deepCopy()
let appendSampleBufferSucceeded = videoWriterInput.append(sampleBufferToAppend)
if !appendSampleBufferSucceeded {
print("Failed to append sample buffer to asset writer input: \(videoFileWriter.error!)")
print("Video file writer status: \(videoFileWriter.status.rawValue)")
}
self.backfillSampleBufferList.remove(at: 0)
} else {
videoWriterInput.markAsFinished()
videoFileWriter.finishWriting {
print("Saved clip to \(clipURL)")
}
break
}
}
}
}
// MARK: AVCaptureVideoDataOutputSampleBufferDelegate
func captureOutput(_ captureOutput: AVCaptureOutput!,
didOutputSampleBuffer sampleBuffer: CMSampleBuffer!,
from connection: AVCaptureConnection!) {
guard let buffer = sampleBuffer else {
print("Captured sample buffer was nil.")
return
}
let sampleBufferCopy = buffer.deepCopy()
backfillSampleBufferList.append(sampleBufferCopy)
if backfillSizeInSeconds() > 3.0 {
session.stopRunning()
createClipFromBackfill()
}
}
func captureOutput(_ captureOutput: AVCaptureOutput!,
didDrop sampleBuffer: CMSampleBuffer!,
from connection: AVCaptureConnection!) {
print("Sample buffer dropped.")
}
}
CVPixelBuffer+Copy.swift:
import CoreVideo
extension CVPixelBuffer {
func deepCopy() -> CVPixelBuffer {
precondition(CFGetTypeID(self) == CVPixelBufferGetTypeID(), "deepCopy() cannot copy a non-CVPixelBuffer")
var _copy : CVPixelBuffer?
CVPixelBufferCreate(
nil,
CVPixelBufferGetWidth(self),
CVPixelBufferGetHeight(self),
CVPixelBufferGetPixelFormatType(self),
CVBufferGetAttachments(self, CVAttachmentMode.shouldPropagate),
&_copy)
guard let copy = _copy else {
print("Pixel buffer copy was nil.")
fatalError()
}
CVBufferPropagateAttachments(self, copy)
CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags.readOnly)
CVPixelBufferLockBaseAddress(copy, CVPixelBufferLockFlags(rawValue: 0))
let sourceBaseAddress = CVPixelBufferGetBaseAddress(self)
let copyBaseAddress = CVPixelBufferGetBaseAddress(copy)
memcpy(copyBaseAddress, sourceBaseAddress, CVPixelBufferGetHeight(self) * CVPixelBufferGetBytesPerRow(self))
CVPixelBufferUnlockBaseAddress(copy, CVPixelBufferLockFlags(rawValue: 0))
CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags.readOnly)
return copy
}
}
CMSampleBuffer+Copy.swift:
import CoreMedia
extension CMSampleBuffer {
func deepCopy() -> CMSampleBuffer {
let _pixelBuffer = CMSampleBufferGetImageBuffer(self)
guard let pixelBuffer = _pixelBuffer else {
print("Pixel buffer to copy was nil.")
fatalError()
}
let pixelBufferCopy = pixelBuffer.deepCopy()
let _formatDescription = CMSampleBufferGetFormatDescription(self)
guard let formatDescription = _formatDescription else {
print("Format description to copy was nil.")
fatalError()
}
var timingInfo = kCMTimingInfoInvalid
let getTimingInfoResult = CMSampleBufferGetSampleTimingInfo(self, 0, &timingInfo)
guard getTimingInfoResult == noErr else {
print("Could not get timing info to copy: \(getTimingInfoResult).")
fatalError()
}
timingInfo.presentationTimeStamp = CMSampleBufferGetOutputPresentationTimeStamp(self)
var _copy : CMSampleBuffer?
let createCopyResult = CMSampleBufferCreateForImageBuffer(kCFAllocatorDefault,
pixelBufferCopy,
true,
nil,
nil,
formatDescription,
&timingInfo,
&_copy);
guard createCopyResult == noErr else {
print("Error creating copy of sample buffer: \(createCopyResult).")
fatalError()
}
guard let copy = _copy else {
print("Copied sample buffer was nil.")
fatalError()
}
return copy
}
}
I also ran into this while trying to synthesize videos. I finally figured out that -[AVAssetWriterInput appendSampleBuffer:] only works on device (as of iOS 11.2.6 anyway) if the underlying pixel buffer is backed by an IOSurface.
If you modify your CVPixelBuffer.deepCopy() method to include the (id)kCVPixelBufferIOSurfacePropertiesKey: #{} key-value pair in the attributes dictionary you pass to CVPixelBufferCreate, it'll probably work.
After more research and experimentation, it appears using AVAssetWriterInputPixelBufferAdaptor to append the CVPixelBuffers of the CMSampleBuffers I'm storing to the AVAssetWriterInput works without generating an error.
Below is the modified version of ViewController.swift implementation that uses AVAssetWriterInputPixelBufferAdaptor to append pixel buffers.
ViewController.swift
import UIKit
import AVFoundation
import Photos
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
private let recordingClipQueue = DispatchQueue(label: "com.example.recordingClipQueue")
private let videoDataOutputQueue = DispatchQueue(label: "com.example.videoDataOutputQueue")
private let session = AVCaptureSession()
private var backfillSampleBufferList = [CMSampleBuffer]()
override func viewDidLoad() {
super.viewDidLoad()
session.sessionPreset = AVCaptureSessionPreset640x480
let videoDevice = AVCaptureDevice.defaultDevice(withMediaType: AVMediaTypeVideo);
let videoDeviceInput: AVCaptureDeviceInput;
do {
videoDeviceInput = try AVCaptureDeviceInput(device: videoDevice)
} catch {
print("Error creating device input from video device: \(error).")
return
}
guard session.canAddInput(videoDeviceInput) else {
print("Could not add video device input to capture session.")
return
}
session.addInput(videoDeviceInput)
let videoDataOutput = AVCaptureVideoDataOutput()
videoDataOutput.videoSettings = [ kCVPixelBufferPixelFormatTypeKey as NSString : Int(kCMPixelFormat_32BGRA) ]
videoDataOutput.alwaysDiscardsLateVideoFrames = true
videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
guard session.canAddOutput(videoDataOutput) else {
print("Could not add video data output to capture session.")
return
}
session.addOutput(videoDataOutput)
videoDataOutput.connection(withMediaType: AVMediaTypeVideo).isEnabled = true
session.startRunning()
}
private func backfillSizeInSeconds() -> Double {
if backfillSampleBufferList.count < 1 {
return 0.0
}
let earliestSampleBuffer = backfillSampleBufferList.first!
let latestSampleBuffer = backfillSampleBufferList.last!
let earliestSampleBufferPTS = CMSampleBufferGetOutputPresentationTimeStamp(earliestSampleBuffer).value
let latestSampleBufferPTS = CMSampleBufferGetOutputPresentationTimeStamp(latestSampleBuffer).value
let timescale = CMSampleBufferGetOutputPresentationTimeStamp(latestSampleBuffer).timescale
return Double(latestSampleBufferPTS - earliestSampleBufferPTS) / Double(timescale)
}
private func createClipFromBackfill() {
guard backfillSampleBufferList.count > 0 else {
print("createClipFromBackfill() called before any samples were recorded.")
return
}
let clipURL = URL(fileURLWithPath:
NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0] +
"/recorded_clip.mp4")
if FileManager.default.fileExists(atPath: clipURL.path) {
do {
try FileManager.default.removeItem(atPath: clipURL.path)
} catch {
print("Could not delete existing clip file: \(error).")
}
}
var _videoFileWriter: AVAssetWriter?
do {
_videoFileWriter = try AVAssetWriter(url: clipURL, fileType: AVFileTypeMPEG4)
} catch {
print("Could not create video file writer: \(error).")
return
}
guard let videoFileWriter = _videoFileWriter else {
print("Video writer was nil.")
return
}
let settingsAssistant = AVOutputSettingsAssistant(preset: AVOutputSettingsPreset640x480)!
guard videoFileWriter.canApply(outputSettings: settingsAssistant.videoSettings, forMediaType: AVMediaTypeVideo) else {
print("Video file writer could not apply video output settings.")
return
}
let earliestRecordedSampleBuffer = backfillSampleBufferList.first!
let _formatDescription = CMSampleBufferGetFormatDescription(earliestRecordedSampleBuffer)
guard let formatDescription = _formatDescription else {
print("Earliest recording pixel buffer format description was nil.")
return
}
let videoWriterInput = AVAssetWriterInput(mediaType: AVMediaTypeVideo,
outputSettings: settingsAssistant.videoSettings,
sourceFormatHint: formatDescription)
guard videoFileWriter.canAdd(videoWriterInput) else {
print("Could not add video writer input to video file writer.")
return
}
videoFileWriter.add(videoWriterInput)
let pixelAdapterBufferAttributes = [ kCVPixelBufferPixelFormatTypeKey as String : Int(kCMPixelFormat_32BGRA) ]
let pixelAdapter = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: videoWriterInput,
sourcePixelBufferAttributes: pixelAdapterBufferAttributes)
guard videoFileWriter.startWriting() else {
print("Video file writer not ready to write file.")
return
}
videoFileWriter.startSession(atSourceTime: CMSampleBufferGetOutputPresentationTimeStamp(earliestRecordedSampleBuffer))
videoWriterInput.requestMediaDataWhenReady(on: recordingClipQueue) {
while videoWriterInput.isReadyForMoreMediaData {
if self.backfillSampleBufferList.count > 0 {
let sampleBufferToAppend = self.backfillSampleBufferList.first!.deepCopy()
let appendSampleBufferSucceeded = pixelAdapter.append(CMSampleBufferGetImageBuffer(sampleBufferToAppend)!,
withPresentationTime: CMSampleBufferGetOutputPresentationTimeStamp(sampleBufferToAppend))
if !appendSampleBufferSucceeded {
print("Failed to append sample buffer to asset writer input: \(videoFileWriter.error!)")
print("Video file writer status: \(videoFileWriter.status.rawValue)")
}
self.backfillSampleBufferList.remove(at: 0)
} else {
videoWriterInput.markAsFinished()
videoFileWriter.finishWriting {
print("Saving clip to \(clipURL)")
}
break
}
}
}
}
// MARK: AVCaptureVideoDataOutputSampleBufferDelegate
func captureOutput(_ captureOutput: AVCaptureOutput!,
didOutputSampleBuffer sampleBuffer: CMSampleBuffer!,
from connection: AVCaptureConnection!) {
guard let buffer = sampleBuffer else {
print("Captured sample buffer was nil.")
return
}
let sampleBufferCopy = buffer.deepCopy()
backfillSampleBufferList.append(sampleBufferCopy)
if backfillSizeInSeconds() > 3.0 {
session.stopRunning()
createClipFromBackfill()
}
}
func captureOutput(_ captureOutput: AVCaptureOutput!,
didDrop sampleBuffer: CMSampleBuffer!,
from connection: AVCaptureConnection!) {
print("Sample buffer dropped.")
}
}
I ran into issues with the same error codes when creating CVPixelBuffers and CMSampleBuffers manually to create a video with individual frames rendered by CoreGraphics. I could solve the problem by using a AVAssetWriterInputPixelBufferAdaptor instead, like you suggested in your own answer. For some reason, this was only needed when the code was run on an actual device. On the simulator, manually creating the buffers worked fine.
I noticed that the same error codes AVFoundationErrorDomain Code -11800 and NSOSStatusErrorDomain Code -12780 can also occur for other reasons, for example:
There exists already a file at the destination URL provided to AVAssetWriter
The destination URL is not a file URL (it must be created with URL.init(fileURLWithPath:) and not with URL.init(string:)).
(Posting this for the sake of completeness, your code already handles this correctly.)

Swift IOS Record Video and Audio with AVFoundation

I was able to successfully grab the recorded video by following this question
here
Basically
Inherit from AVCaptureFileOutputRecordingDelegate prototype
Loop through available devices
Creating a session with the camera
Start Recording
Stop Recording
Get the Record video by implementing above prototype's method
But the file doesn't comes with the audio.
According to this question, i have to record audio separately and merge the video and audio using mentioned classes
But i have no idea how to implement video and audio recording at the same time.
for device in devices {
// Make sure this particular device supports video
if (device.hasMediaType(AVMediaTypeVideo)) {
// Finally check the position and confirm we've got the back camera
if(device.position == AVCaptureDevicePosition.Back) {
captureDevice = device as? AVCaptureDevice
if captureDevice != nil {
print("Capture device found")
beginSession()
}
}
}
}
in this loop only available device types are .Front and .Back
Following is the way to record video with audio using AVFoundation framework. The steps are:
1. Prepare the session:
self.captureSession = AVCaptureSession()
2. Prepare available video and audio devices:
let session = AVCaptureDevice.DiscoverySession.init(deviceTypes:[.builtInWideAngleCamera, .builtInMicrophone], mediaType: AVMediaType.video, position: AVCaptureDevice.Position.unspecified)
let cameras = (session.devices.compactMap{$0})
for camera in cameras {
if camera.position == .front {
self.frontCamera = camera
}
if camera.position == .back {
self.rearCamera = camera
try camera.lockForConfiguration()
camera.focusMode = .continuousAutoFocus
camera.unlockForConfiguration()
}
}
3. Prepare session inputs:
guard let captureSession = self.captureSession else {
throw CameraControllerError.captureSessionIsMissing
}
if let rearCamera = self.rearCamera {
self.rearCameraInput = try AVCaptureDeviceInput(device: rearCamera)
if captureSession.canAddInput(self.rearCameraInput!) {
captureSession.addInput(self.rearCameraInput!)
self.currentCameraPosition = .rear
} else {
throw CameraControllerError.inputsAreInvalid
}
} else if let frontCamera = self.frontCamera {
self.frontCameraInput = try AVCaptureDeviceInput(device: frontCamera)
if captureSession.canAddInput(self.frontCameraInput!) {
captureSession.addInput(self.frontCameraInput!)
self.currentCameraPosition = .front
} else {
throw CameraControllerError.inputsAreInvalid
}
} else {
throw CameraControllerError.noCamerasAvailable
}
// Add audio input
if let audioDevice = self.audioDevice {
self.audioInput = try AVCaptureDeviceInput(device: audioDevice)
if captureSession.canAddInput(self.audioInput!) {
captureSession.addInput(self.audioInput!)
} else {
throw CameraControllerError.inputsAreInvalid
}
}
4. Prepare output:
self.videoOutput = AVCaptureMovieFileOutput()
if captureSession.canAddOutput(self.videoOutput!) {
captureSession.addOutput(self.videoOutput!)
}
captureSession.startRunning()
5. Start recording:
func recordVideo(completion: #escaping (URL?, Error?) -> Void) {
guard let captureSession = self.captureSession, captureSession.isRunning else {
completion(nil, CameraControllerError.captureSessionIsMissing)
return
}
let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
let fileUrl = paths[0].appendingPathComponent("output.mp4")
try? FileManager.default.removeItem(at: fileUrl)
videoOutput!.startRecording(to: fileUrl, recordingDelegate: self)
self.videoRecordCompletionBlock = completion
}
6. Stop recording:
func stopRecording(completion: #escaping (Error?) -> Void) {
guard let captureSession = self.captureSession, captureSession.isRunning else {
completion(CameraControllerError.captureSessionIsMissing)
return
}
self.videoOutput?.stopRecording()
}
7. Implement the delegate:
func fileOutput(_ output: AVCaptureFileOutput, didFinishRecordingTo outputFileURL: URL, from connections: [AVCaptureConnection], error: Error?) {
if error == nil {
//do something
} else {
//do something
}
}
I took idea from here: https://www.appcoda.com/avfoundation-swift-guide/
Here is the complete project https://github.com/rubaiyat6370/iOS-Tutorial/
Found the answer, This answer goes with this code
It can simply done by
declare another capture device variable
loop through devices and initialize camera and audio capture device variable
add audio input to session
code
var captureDevice : AVCaptureDevice?
var captureAudio :AVCaptureDevice?
Loop through devices and Initialize capture devices
var captureDeviceVideoFound: Bool = false
var captureDeviceAudioFound:Bool = false
// Loop through all the capture devices on this phone
for device in devices {
// Make sure this particular device supports video
if (device.hasMediaType(AVMediaTypeVideo)) {
// Finally check the position and confirm we've got the front camera
if(device.position == AVCaptureDevicePosition.Front) {
captureDevice = device as? AVCaptureDevice //initialize video
if captureDevice != nil {
print("Capture device found")
captureDeviceVideoFound = true;
}
}
}
if(device.hasMediaType(AVMediaTypeAudio)){
print("Capture device audio init")
captureAudio = device as? AVCaptureDevice //initialize audio
captureDeviceAudioFound = true
}
}
if(captureDeviceAudioFound && captureDeviceVideoFound){
beginSession()
}
Inside Session
try captureSession.addInput(AVCaptureDeviceInput(device: captureDevice))
try captureSession.addInput(AVCaptureDeviceInput(device: captureAudio))
This will output the video file with audio. no need to merge audio or do anything.
This apples documentation helps
Followed the answer from #Mumu but it didn't work for me because of the call to AVCaptureDevice.DiscoverySession.init that was returning video devices only.
Here is my version that works on iOS 14, Swift 5:
var captureSession: AVCaptureSession? = nil
var camera: AVCaptureDevice? = nil
var microphone: AVCaptureDevice? = nil
var videoOutput: AVCaptureFileOutput? = nil
var previewLayer: AVCaptureVideoPreviewLayer? = nil
func findDevices() {
camera = nil
microphone = nil
//Search for video media type and we need back camera only
let session = AVCaptureDevice.DiscoverySession.init(deviceTypes:[.builtInWideAngleCamera],
mediaType: AVMediaType.video, position: AVCaptureDevice.Position.back)
var devices = (session.devices.compactMap{$0})
//Search for microphone
let asession = AVCaptureDevice.DiscoverySession.init(deviceTypes:[.builtInMicrophone],
mediaType: AVMediaType.audio, position: AVCaptureDevice.Position.unspecified)
//Combine all devices into one list
devices.append(contentsOf: asession.devices.compactMap{$0})
for device in devices {
if device.position == .back {
do {
try device.lockForConfiguration()
device.focusMode = .continuousAutoFocus
device.flashMode = .off
device.whiteBalanceMode = .continuousAutoWhiteBalance
device.unlockForConfiguration()
camera = device
} catch {
}
}
if device.hasMediaType(.audio) {
microphone = device
}
}
}
func initVideoRecorder()->Bool {
captureSession = AVCaptureSession()
guard let captureSession = captureSession else {return false}
captureSession.sessionPreset = .hd4K3840x2160
findDevices()
guard let camera = camera else { return false}
do {
let cameraInput = try AVCaptureDeviceInput(device: camera)
captureSession.addInput(cameraInput)
} catch {
self.camera = nil
return false
}
if let audio = microphone {
do {
let audioInput = try AVCaptureDeviceInput(device: audio)
captureSession.addInput(audioInput)
} catch {
}
}
videoOutput = AVCaptureMovieFileOutput()
if captureSession.canAddOutput(videoOutput!) {
captureSession.addOutput(videoOutput!)
captureSession.startRunning()
videoOutput?.connection(with: .video)?.videoOrientation = .landscapeRight
previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
previewLayer?.videoGravity = .resizeAspect
previewLayer?.connection?.videoOrientation = .landscapeRight
return true
}
return false
}
func startRecording()->Bool {
guard let captureSession = captureSession, captureSession.isRunning else {return false}
let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
let fileUrl = paths[0].appendingPathComponent(getVideoName())
try? FileManager.default.removeItem(at: fileUrl)
videoOutput?.startRecording(to: fileUrl, recordingDelegate: self)
return true
}
I had this problem also, but when I grouped adding the video input and the sound input after, the audio worked. This is my code for adding the inputs.
if (cameraSession.canAddInput(deviceInput) == true && cameraSession.canAddInput(audioDeviceInput) == true) {//detects if devices can be added
cameraSession.addInput(deviceInput)//adds video
cameraSession.addInput(audioDeviceInput)//adds audio
}
Also I found you have to have video input first or else there won't be audio. I originally had them in two if statements, but I found putting them in one lets video and audio be recorded together. Hope this helps.
Record Video With Audio
//Get Video Device
if let devices = AVCaptureDevice.devices(withMediaType: AVMediaTypeVideo) as? [AVCaptureDevice] {
for device in devices {
if device.hasMediaType(AVMediaTypeVideo) {
if device.position == AVCaptureDevicePosition.back {
videoCaptureDevice = device
}
}
}
if videoCaptureDevice != nil {
do {
// Add Video Input
try self.captureSession.addInput(AVCaptureDeviceInput(device: videoCaptureDevice))
// Get Audio Device
let audioInput = AVCaptureDevice.defaultDevice(withMediaType: AVMediaTypeAudio)
//Add Audio Input
try self.captureSession.addInput(AVCaptureDeviceInput(device: audioInput))
self.previewLayer = AVCaptureVideoPreviewLayer(session: self.captureSession)
previewLayer.videoGravity = AVLayerVideoGravityResizeAspectFill
previewLayer.connection.videoOrientation = AVCaptureVideoOrientation.portrait
self.videoView.layer.addSublayer(self.previewLayer)
//Add File Output
self.captureSession.addOutput(self.movieOutput)
captureSession.startRunning()
} catch {
print(error)
}
}
}
For more details refer this link:
https://medium.com/#santhosh3386/ios-avcapturesession-record-video-with-audio-23c8f8c9a8f8

Resources