A newbie to swift! I am trying to implement an app that converts speech to text using speech recognizer.
Problem
SFSpeechRecognizer().isAvailable is false
private let request = SFSpeechAudioBufferRecognitionRequest()
private var task: SFSpeechRecognitionTask?
private let engine = AVAudioEngine()
func recognize() {
guard let node = engine.inputNode else {
return
}
let recordingFormat = node.outputFormat(forBus: 0)
node.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, _ in
self.recognitionRequest.append(buffer);
}
engine.prepare()
do {
try engine.start()
} catch {
return print(error)
}
guard let systemRecognizer = SFSpeechRecognizer() else {
return
}
if !systemRecognizer.isAvailable {
self.log(.debug, msg: "Entered this condition and stopped!")
return
}
}
Question
I am not sure why it stops in the simulator. Does microphone works in iPhone simulator?
Update
I tried testing with a audio file with below code,
let audioFile = Bundle.main.url(forResource: "create_activity", withExtension: "m4a", subdirectory: "Sample Recordings")
let recognitionRequest = SFSpeechURLRecognitionRequest(url: audioFile!)
getting error which says, Error Domain=kAFAssistantErrorDomain Code=1101 "(null)"
It looks that simulator has gained access to microphone with iOS 11.
Unfortunately I was not able to find any documentation confirming that, but can confirm this functionality with the following code sample. Works perfectly fine on iOS 11 simulator, but does nothing on iOS 10 simulator (or earlier).
import UIKit
import Speech
class ViewController: UIViewController {
private var recognizer = SFSpeechRecognizer()
private var request = SFSpeechAudioBufferRecognitionRequest()
private let engine = AVAudioEngine()
override func viewDidLoad() {
super.viewDidLoad()
requestPermissions()
}
private func requestPermissions() {
//
// Do not forget to add `NSMicrophoneUsageDescription` and `NSSpeechRecognitionUsageDescription` to `Info.plist`
//
// Request recording permission
AVAudioSession.sharedInstance().requestRecordPermission { allowed in
if allowed {
// Request speech recognition authorization
SFSpeechRecognizer.requestAuthorization { status in
switch status {
case .authorized: self.prepareSpeechRecognition()
case .notDetermined, .denied, .restricted: print("SFSpeechRecognizer authorization status: \(status).")
}
}
} else {
print("AVAudioSession record permission: \(allowed).")
}
}
}
private func prepareSpeechRecognition() {
// Check if recognizer is available (has failable initializer)
guard let recognizer = recognizer else {
print("SFSpeechRecognizer not supported.")
return
}
// Prepare recognition task
recognizer.recognitionTask(with: request) { (result, error) in
if let result = result {
print("SFSpeechRecognizer result: \(result.bestTranscription.formattedString)")
} else {
print("SFSpeechRecognizer error: \(String(describing: error))")
}
}
// Install tap to audio engine input node
let inputNode = engine.inputNode
let busNumber = 0
let recordingFormat = inputNode.outputFormat(forBus: busNumber)
inputNode.installTap(onBus: busNumber, bufferSize: 1024, format: recordingFormat) { buffer, time in
self.request.append(buffer);
}
// Prepare and start audio engine
engine.prepare()
do {
try engine.start()
} catch {
return print(error)
}
}
}
Do not forget to add NSMicrophoneUsageDescription and NSSpeechRecognitionUsageDescription to Info.plist.
Related
I'm currently in the process of debugging my video camera model that I'm using to record video and audio. I would like the video camera to continue playing background audio if there is something play and record using the mic over the audio. I initially got my av capture session to work smoothly as intended by adding the microphone input on set up which automatically stops playing background audio when the camera view is set up.
I have been working on the following solution where I add the audio input only when I start recording and attempt to remove audio input once I stop recording. Here is my current code:
import SwiftUI
import AVFoundation
// MARK: Camera View Model
class CameraViewModel: NSObject,ObservableObject,AVCaptureFileOutputRecordingDelegate, AVCapturePhotoCaptureDelegate{
#Published var session = AVCaptureSession()
#Published var alert = false
#Published var output = AVCaptureMovieFileOutput()
#Published var preview : AVCaptureVideoPreviewLayer!
// MARK: Video Recorder Properties
#Published var isRecording: Bool = false
#Published var recordedURLs: [URL] = []
#Published var previewURL: URL?
#Published var showPreview: Bool = false
// Set up is called after necessary permissions are acquired
func setUp(){
do{
self.session.beginConfiguration()
let cameraDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .front)
if cameraDevice != nil {
/* old code that added audio input on open that worked as intended
let videoInput = try AVCaptureDeviceInput(device: cameraDevice!)
let audioDevice = AVCaptureDevice.default(for: .audio)
let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
if self.session.canAddInput(videoInput) && self.session.canAddInput(audioInput){ //MARK: Audio Input
self.session.addInput(videoInput)
self.session.addInput(audioInput)
self.videoDeviceInput = videoInput
} */
// new code that only adds video input
if self.session.canAddInput(videoInput) {
self.session.addInput(videoInput)
self.videoDeviceInput = videoInput
}
if self.session.canAddOutput(self.output){
self.session.addOutput(self.output)
}
if self.session.canAddOutput(self.photoOutput){
self.session.addOutput(self.photoOutput)
}
//for audio mixing, make sure this is default set to true
self.session.automaticallyConfiguresApplicationAudioSession = true
self.session.commitConfiguration()
}
}
catch{
print(error.localizedDescription)
}
}
//start recording is called upon a user input which now attaches the mic input
func startRecording() {
// here is how I'm mixing the background audio and adding the microphone input when the camera starts recording
do
{
try AVAudioSession.sharedInstance().setActive(false)
try AVAudioSession.sharedInstance().setCategory(AVAudioSession.Category.ambient)
try AVAudioSession.sharedInstance().setCategory(.playAndRecord, mode: .default, options: AVAudioSession.CategoryOptions.mixWithOthers)
try AVAudioSession.sharedInstance().setMode(AVAudioSession.Mode.videoRecording)
try AVAudioSession.sharedInstance().setActive(true)
let audioDevice = AVCaptureDevice.default(for: .audio)
let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
if self.session.canAddInput(audioInput){
self.session.automaticallyConfiguresApplicationAudioSession = false
self.session.addInput(audioInput)
}
} catch {
print("Can't Set Audio Session Category: \(error)")
}
// MARK: Temporary URL for recording Video
let tempURL = NSTemporaryDirectory() + "\(Date()).mov"
//Need to correct image orientation before moving further
if let videoOutputConnection = output.connection(with: .video) {
//For frontCamera settings to capture mirror image
if self.videoDeviceInput.device.position == .front {
videoOutputConnection.automaticallyAdjustsVideoMirroring = false
videoOutputConnection.isVideoMirrored = true
} else {
videoOutputConnection.automaticallyAdjustsVideoMirroring = true
}
}
output.startRecording(to: URL(fileURLWithPath: tempURL), recordingDelegate: self)
isRecording = true
}
//stop recording removes the audio input
func stopRecording(){
output.stopRecording()
isRecording = false
self.flashOn = false
// stop recording is where I believe I'm doing something wrong when I remove the audio input
do{
try AVAudioSession.sharedInstance().setActive(false)
let audioDevice = AVCaptureDevice.default(for: .audio)
let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
self.session.removeInput(audioInput)
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.ambient, mode: .default, options: [.mixWithOthers])
try AVAudioSession.sharedInstance().setActive(true)
} catch {
print("Error occurred while removing audio device input: \(error)")
}
}
}
I also added the following necessary lines in my AppDelegate launch method as well
/*
below is for mixing audio
*/
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(.ambient, mode: .default, options: [.mixWithOthers])
} catch {
print("Failed to set audio session category.")
}
I believe I'm going on the correct track as the first time the app opens, background audio plays smoothly and there is a small camera flash but once I start recording, it mixes the background audio well with the phone audio input as well. I was able to see this in the preview in a new view. However, once I dismiss the preview of the recorded url and go back to the camera, the phone audio mic input stops working completely.
I also receive this error in my console:
AVAudioSession_iOS.mm:1271 Deactivating an audio session that has running I/O. All I/O should be stopped or paused prior to deactivating the audio session.
When I looked online, it said to stop or pause AVPlayer but I'm unsure where I'm even using an AVPlayer session here. I also noticed that people suggested creating two capture sessions for the audio and video but I was struggling to get that working as well, so went ahead with this option.
Editing for minimal reproducible example:
Here is the camera view model and I've attached the necessary views in a separate answer:
import SwiftUI
import AVFoundation
// MARK: Camera View Model
class CameraViewModel: NSObject,ObservableObject,AVCaptureFileOutputRecordingDelegate, AVCapturePhotoCaptureDelegate{
#Published var session = AVCaptureSession()
#Published var alert = false
#Published var output = AVCaptureMovieFileOutput()
#Published var preview : AVCaptureVideoPreviewLayer!
// MARK: Video Recorder Properties
#Published var isRecording: Bool = false
#Published var recordedURLs: [URL] = []
#Published var previewURL: URL?
#Published var showPreview: Bool = false
// Top Progress Bar
#Published var recordedDuration: CGFloat = 0
// Maximum 15 seconds
#Published var maxDuration: CGFloat = 15
//for photo
// since were going to read pic data....
#Published var photoOutput = AVCapturePhotoOutput()
#Published var isTaken = false
#Published var picData = Data(count: 0)
#Published var thumbnailData = Data(count: 0)
#Published var flashOn = false
#objc dynamic var videoDeviceInput: AVCaptureDeviceInput!
private let sessionQueue = DispatchQueue(label: "session queue")
// MARK: Device Configuration Properties
private let videoDeviceDiscoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera, .builtInDualCamera, .builtInTrueDepthCamera], mediaType: .video, position: .unspecified)
#AppStorage("camerapermission") var camerapermission = 0
func checkPermission(){
switch AVCaptureDevice.authorizationStatus(for: .video) {
case .authorized:
self.checkAudioPermission()
return
case .notDetermined:
AVCaptureDevice.requestAccess(for: .video) { (status) in
if status{
self.checkAudioPermission()
}
}
case .denied:
self.camerapermission = 2
self.alert.toggle()
return
default:
return
}
}
func checkAudioPermission() {
switch AVAudioSession.sharedInstance().recordPermission {
case .granted :
print("permission granted")
self.camerapermission = 1
setUp()
case .denied:
print("permission denied")
self.camerapermission = 2
self.alert.toggle()
case .undetermined:
print("request permission here")
AVAudioSession.sharedInstance().requestRecordPermission({ granted in
if granted {
print("permission granted here")
DispatchQueue.main.async {
self.camerapermission = 1
}
self.setUp()
}
})
default:
print("unknown")
}
}
func setUp(){
do{
self.session.beginConfiguration()
let cameraDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .front)
if cameraDevice != nil {
let videoInput = try AVCaptureDeviceInput(device: cameraDevice!)
// let audioDevice = AVCaptureDevice.default(for: .audio)
// let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
// if self.session.canAddInput(videoInput) && self.session.canAddInput(audioInput){ //MARK: Audio Input
// self.session.addInput(videoInput)
// self.session.addInput(audioInput)
// self.videoDeviceInput = videoInput
// }
//
/* mixing code buggy */
if self.session.canAddInput(videoInput) {
self.session.addInput(videoInput)
self.videoDeviceInput = videoInput
}
if self.session.canAddOutput(self.output){
self.session.addOutput(self.output)
}
if self.session.canAddOutput(self.photoOutput){
self.session.addOutput(self.photoOutput)
}
//for audio mixing, make sure this is default set to true
self.session.automaticallyConfiguresApplicationAudioSession = true
self.session.commitConfiguration()
}
}
catch{
print(error.localizedDescription)
}
}
public func set(zoom: CGFloat){
let factor = zoom < 1 ? 1 : zoom
let device = self.videoDeviceInput.device
do {
try device.lockForConfiguration()
device.videoZoomFactor = factor
device.unlockForConfiguration()
}
catch {
print(error.localizedDescription)
}
}
func changeCamera() {
sessionQueue.async {
if self.videoDeviceInput != nil {
let currentVideoDevice = self.videoDeviceInput.device
let currentPosition = currentVideoDevice.position
let preferredPosition: AVCaptureDevice.Position
let preferredDeviceType: AVCaptureDevice.DeviceType
switch currentPosition {
case .unspecified, .front:
preferredPosition = .back
preferredDeviceType = .builtInWideAngleCamera
case .back:
preferredPosition = .front
preferredDeviceType = .builtInWideAngleCamera
#unknown default:
print("Unknown capture position. Defaulting to back, dual-camera.")
preferredPosition = .back
preferredDeviceType = .builtInWideAngleCamera
}
let devices = self.videoDeviceDiscoverySession.devices
var newVideoDevice: AVCaptureDevice? = nil
// First, seek a device with both the preferred position and device type. Otherwise, seek a device with only the preferred position.
if let device = devices.first(where: { $0.position == preferredPosition && $0.deviceType == preferredDeviceType }) {
newVideoDevice = device
} else if let device = devices.first(where: { $0.position == preferredPosition }) {
newVideoDevice = device
}
if let videoDevice = newVideoDevice {
do {
let videoDeviceInput = try AVCaptureDeviceInput(device: videoDevice)
self.session.beginConfiguration()
// Remove the existing device input first, because AVCaptureSession doesn't support
// simultaneous use of the rear and front cameras.
self.session.removeInput(self.videoDeviceInput)
// MARK: Audio Input
if self.session.canAddInput(videoDeviceInput){
self.session.addInput(videoDeviceInput)
self.videoDeviceInput = videoDeviceInput
}
if self.session.canAddOutput(self.output){
self.session.addOutput(self.output)
}
if self.session.canAddOutput(self.photoOutput){
self.session.addOutput(self.photoOutput)
}
self.session.commitConfiguration()
} catch {
print("Error occurred while creating video device input: \(error)")
}
}
}
}
}
// take and retake functions...
func switchFlash() {
self.flashOn.toggle()
}
func takePic(){
let settings = AVCapturePhotoSettings()
if flashOn {
settings.flashMode = .on
} else {
settings.flashMode = .off
}
//Need to correct image orientation before moving further
if let photoOutputConnection = photoOutput.connection(with: .video) {
//For frontCamera settings to capture mirror image
if self.videoDeviceInput.device.position == .front {
photoOutputConnection.automaticallyAdjustsVideoMirroring = false
photoOutputConnection.isVideoMirrored = true
} else {
photoOutputConnection.automaticallyAdjustsVideoMirroring = true
}
}
self.photoOutput.capturePhoto(with: settings, delegate: self)
print("retaking a photo taken...")
DispatchQueue.global(qos: .background).async {
//self.session.stopRunning()
DispatchQueue.main.async {
withAnimation{self.isTaken.toggle()}
}
}
}
func reTake(){
DispatchQueue.global(qos: .background).async {
self.session.startRunning()
DispatchQueue.main.async {
withAnimation{self.isTaken.toggle()}
//clearing ...
self.flashOn = false
self.picData = Data(count: 0)
}
}
}
func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
if error != nil{
return
}
print("pic taken...")
guard let imageData = photo.fileDataRepresentation() else{return}
self.picData = imageData
}
func startRecording() {
/* mixing code buggy */
do
{
try AVAudioSession.sharedInstance().setActive(false)
try AVAudioSession.sharedInstance().setCategory(AVAudioSession.Category.ambient)
try AVAudioSession.sharedInstance().setCategory(.playAndRecord, mode: .default, options: AVAudioSession.CategoryOptions.mixWithOthers)
try AVAudioSession.sharedInstance().setMode(AVAudioSession.Mode.videoRecording)
try AVAudioSession.sharedInstance().setActive(true)
let audioDevice = AVCaptureDevice.default(for: .audio)
let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
if self.session.canAddInput(audioInput){
self.session.automaticallyConfiguresApplicationAudioSession = false
self.session.addInput(audioInput)
}
} catch {
print("Can't Set Audio Session Category: \(error)")
}
// MARK: Temporary URL for recording Video
let tempURL = NSTemporaryDirectory() + "\(Date()).mov"
//Need to correct image orientation before moving further
if let videoOutputConnection = output.connection(with: .video) {
//For frontCamera settings to capture mirror image
if self.videoDeviceInput.device.position == .front {
videoOutputConnection.automaticallyAdjustsVideoMirroring = false
videoOutputConnection.isVideoMirrored = true
} else {
videoOutputConnection.automaticallyAdjustsVideoMirroring = true
}
}
output.startRecording(to: URL(fileURLWithPath: tempURL), recordingDelegate: self)
isRecording = true
}
func stopRecording(){
output.stopRecording()
isRecording = false
self.flashOn = false
/* mixing code buggy */
do{
try AVAudioSession.sharedInstance().setActive(false)
let audioDevice = AVCaptureDevice.default(for: .audio)
let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
self.session.removeInput(audioInput)
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.ambient, mode: .default, options: [.mixWithOthers])
try AVAudioSession.sharedInstance().setActive(true)
} catch {
print("Error occurred while removing audio device input: \(error)")
}
}
func generateThumbnail() {
let image = self.imageFromVideo(url: previewURL!, at: 0)
DispatchQueue.main.async {
self.thumbnailData = image?.pngData() ?? Data(count: 0)
}
}
func imageFromVideo(url: URL, at time: TimeInterval) -> UIImage? {
let asset = AVURLAsset(url: url)
let assetIG = AVAssetImageGenerator(asset: asset)
assetIG.appliesPreferredTrackTransform = true
assetIG.apertureMode = AVAssetImageGenerator.ApertureMode.encodedPixels
let cmTime = CMTime(seconds: time, preferredTimescale: 60)
let thumbnailImageRef: CGImage
do {
thumbnailImageRef = try assetIG.copyCGImage(at: cmTime, actualTime: nil)
print("SUCESS: THUMBNAIL")
} catch let error {
print("Error: \(error)")
return nil
}
return UIImage(cgImage: thumbnailImageRef)
}
func restartSession() {
if !self.session.isRunning {
DispatchQueue.global(qos: .background).async {
self.session.startRunning()
}
}
}
func stopSession() {
// DispatchQueue.global(qos: .background).async {
self.session.stopRunning()
// }
}
func fileOutput(_ output: AVCaptureFileOutput, didFinishRecordingTo outputFileURL: URL, from connections: [AVCaptureConnection], error: Error?) {
if let error = error {
print(error.localizedDescription)
return
}
// CREATED SUCCESSFULLY
print(outputFileURL)
guard let data = try? Data(contentsOf: outputFileURL) else {
return
}
print("File size before compression: \(Double(data.count / 1048576)) mb")
self.recordedURLs.append(outputFileURL)
if self.recordedURLs.count == 1{
self.previewURL = outputFileURL
self.generateThumbnail()
return
}
/*
Below code can be ignored because only recording one url
*/
// CONVERTING URLs TO ASSETS
let assets = recordedURLs.compactMap { url -> AVURLAsset in
return AVURLAsset(url: url)
}
self.previewURL = nil
// MERGING VIDEOS
mergeVideos(assets: assets) { exporter in
exporter.exportAsynchronously {
if exporter.status == .failed{
// HANDLE ERROR
print(exporter.error!)
}
else{
if let finalURL = exporter.outputURL{
print(finalURL)
DispatchQueue.main.async {
self.previewURL = finalURL
print("inside final url")
}
}
}
}
}
}
func mergeVideos(assets: [AVURLAsset],completion: #escaping (_ exporter: AVAssetExportSession)->()){
let compostion = AVMutableComposition()
var lastTime: CMTime = .zero
guard let videoTrack = compostion.addMutableTrack(withMediaType: .video, preferredTrackID: Int32(kCMPersistentTrackID_Invalid)) else{return}
guard let audioTrack = compostion.addMutableTrack(withMediaType: .audio, preferredTrackID: Int32(kCMPersistentTrackID_Invalid)) else{return}
for asset in assets {
// Linking Audio and Video
do{
try videoTrack.insertTimeRange(CMTimeRange(start: .zero, duration: asset.duration), of: asset.tracks(withMediaType: .video)[0], at: lastTime)
// Safe Check if Video has Audio
if !asset.tracks(withMediaType: .audio).isEmpty{
try audioTrack.insertTimeRange(CMTimeRange(start: .zero, duration: asset.duration), of: asset.tracks(withMediaType: .audio)[0], at: lastTime)
}
}
catch{
// HANDLE ERROR
print(error.localizedDescription)
}
// Updating Last Time
lastTime = CMTimeAdd(lastTime, asset.duration)
}
// MARK: Temp Output URL
let tempURL = URL(fileURLWithPath: NSTemporaryDirectory() + "Reel-\(Date()).mp4")
// VIDEO IS ROTATED
// BRINGING BACK TO ORIGNINAL TRANSFORM
let layerInstructions = AVMutableVideoCompositionLayerInstruction(assetTrack: videoTrack)
// MARK: Transform
var transform = CGAffineTransform.identity
transform = transform.rotated(by: 90 * (.pi / 180))
transform = transform.translatedBy(x: 0, y: -videoTrack.naturalSize.height)
layerInstructions.setTransform(transform, at: .zero)
let instructions = AVMutableVideoCompositionInstruction()
instructions.timeRange = CMTimeRange(start: .zero, duration: lastTime)
instructions.layerInstructions = [layerInstructions]
let videoComposition = AVMutableVideoComposition()
videoComposition.renderSize = CGSize(width: videoTrack.naturalSize.height, height: videoTrack.naturalSize.width)
videoComposition.instructions = [instructions]
videoComposition.frameDuration = CMTimeMake(value: 1, timescale: 30)
guard let exporter = AVAssetExportSession(asset: compostion, presetName: AVAssetExportPresetHighestQuality) else{return}
exporter.outputFileType = .mp4
exporter.outputURL = tempURL
exporter.videoComposition = videoComposition
completion(exporter)
}
func compressVideo(inputURL: URL,
outputURL: URL,
handler:#escaping (_ exportSession: AVAssetExportSession?) -> Void) {
let urlAsset = AVURLAsset(url: inputURL, options: nil)
guard let exportSession = AVAssetExportSession(asset: urlAsset,
presetName: AVAssetExportPresetMediumQuality) else {
handler(nil)
return
}
exportSession.outputURL = outputURL
exportSession.outputFileType = .mp4
exportSession.exportAsynchronously {
handler(exportSession)
}
}
}
You can ignore the merge videos code as there is only one recorded url but right now, you should be able to run this code if you've added camera and microphone permissions to your info.plist.
It currently has the buggy mixing code where background audio does work the first time but after restarting audio session, it no longer works. Any help would be greatly appreciated!
Today, I create a small app to try ShazamKit music detection ability in iOS 15. Follow a tutorial on Youtube, and I have Apple developer membership and have enabled the ShazamKit service for this app identifier.
In short, I want to detect a song metadata with ShazamKit from the audio file inside app.
The problem is that both of delegate method didFind and didNotFindMatchFor didn't fire though I have generated the signature successfully. I think it should give me an error in didNotFindMatchFor delegate method if there is no match found at least, but it doesn't.
It's a pretty new feature, there is not that much related stuff I could find. Appreciate for any help.
More info: I do find some stuff using audioEngine, however that use output from Microphone, if user listen music with a headphone, that would be not possible. In my case I want to use the file itself since my production app is a music player, which stores a lot audio files in sandbox.
import ShazamKit
import UIKit
class ViewController: UIViewController {
lazy var recoButton: UIButton = {
let button = UIButton(frame: CGRect(x: 0, y: 0, width: 120, height: 60))
button.layer.cornerRadius = 8
button.backgroundColor = .brown
button.setTitle("Recognize", for: .normal)
button.addTarget(self, action: #selector(recognizeSong), for: .touchUpInside)
return button
}()
override func viewDidLoad() {
super.viewDidLoad()
view.addSubview(recoButton)
recoButton.center = view.center
}
#objc func recognizeSong(_ sender: UIButton) {
print("reco button tapped")
// ShazamKit is available from iOS 15
if #available(iOS 15, *) {
// session
let session = SHSession()
// delegate
session.delegate = self
do {
// get track
guard let url = Bundle.main.url(forResource: "Baby One More Time", withExtension: "mp3") else {
print("url is NULLLL")
return }
// create audio file
let file = try AVAudioFile(forReading: url)
let frameCapacity = AVAudioFrameCount(file.length / 26)
// Audio -> Buffer
guard let buffer = AVAudioPCMBuffer(pcmFormat: file.processingFormat, frameCapacity: frameCapacity) else {
print("Failed to create a buffer")
return }
// Read file into buffer
try file.read(into: buffer)
// SignatureGenerator
let generator = SHSignatureGenerator()
try generator.append(buffer, at: nil)
// create signature
let signature = generator.signature()
// try to match
session.match(signature)
} catch {
print(error)
}
} else {
// unavailable alert
}
}
}
extension ViewController: SHSessionDelegate {
func session(_ session: SHSession, didFind match: SHMatch) {
print("Match found!")
// get results
let items = match.mediaItems
items.forEach { item in
print(item.title ?? "title")
print(item.artist ?? "artist")
print(item.artworkURL?.absoluteURL ?? "artwork url")
}
}
func session(_ session: SHSession, didNotFindMatchFor signature: SHSignature, error: Error?) {
if let error = error {
print(error)
}
}
}
Per today's test & observation. I found that we need to convert input audio format to AVAudioFormat(standardFormatWithSampleRate: 44100, channels: 1) with a built-in converter(AVAudioConverter). Then create the output buffer, and the music is recognized this time.
I pick 10+ music files for a test run, all of them could be detected except one. And the interesting thing is this music file could be detected by Shazam app, I have no idea what is the reason as there is no error is shown for the un-detected music song.
Anyway, now it is worked. Update code as below, it is just a combination of several functions for test purpose, you should separate them into different functions for production.
#objc func recognizeSong(_ sender: UIButton) {
print("reco button tapped")
// ShazamKit is available from iOS 15
if #available(iOS 15, *) {
// session
let session = SHSession()
session.delegate = self
guard let url = Bundle.main.url(forResource: "You Belong With Me", withExtension: "mp3") else {
return
}
guard let audioFormat = AVAudioFormat(standardFormatWithSampleRate: 44100, channels: 1) else {
return
}
let generator = SHSignatureGenerator()
do {
let audioFile = try AVAudioFile(forReading: url)
guard let inputBuffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: 44100 * 10),
let outputBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: 44100 * 10) else {
return
}
// Read file into buffer
let inputBlock : AVAudioConverterInputBlock = { inNumPackets, outStatus in
do {
try audioFile.read(into: inputBuffer)
outStatus.pointee = .haveData
return inputBuffer
} catch {
if audioFile.framePosition >= audioFile.length {
outStatus.pointee = .endOfStream
return nil
} else {
outStatus.pointee = .noDataNow
return nil
}
}
}
guard let converter = AVAudioConverter(from: audioFile.processingFormat, to: audioFormat) else {
return
}
let status = converter.convert(to: outputBuffer, error: nil, withInputFrom: inputBlock)
if status == .error || status == .endOfStream {
return
}
try generator.append(outputBuffer, at: nil)
if status == .inputRanDry {
return
}
} catch {
print(error)
}
// create signature
let signature = generator.signature()
// try to match
session.match(signature)
} else {
// unavailable alert
}
}
}
Reference: Apple forums
I'm implementing a speech recognition module for an app. It works fine, however there are some additional things that I need to do. For example, I need to know if a user is speaking or shouting. I know, I can achieve that by knowing the frequency of the sound. Here is how I implement it:
let audioEngine = AVAudioEngine()
let speechRecognizer: SFSpeechRecognizer? = SFSpeechRecognizer()
let request = SFSpeechAudioBufferRecognitionRequest()
var recognitionTask = SFSpeechRecognitionTask()
func recordAndRecognizeSpeech() {
let node = audioEngine.inputNode
let recordingFormat = node.outputFormat(forBus: 0)
node.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, _) in
self.request.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
return print(error)
}
guard let myRecoginizer = SFSpeechRecognizer() else {
return
}
if !myRecoginizer.isAvailable {
return
}
recognitionTask = (speechRecognizer?.recognitionTask(with: request, resultHandler: { (result, error) in
//Handling speech recognition tasks here
}))!
}
This works fine for the speech recognition, but how can I get the frequency or amplitude value of the sound?
Running on iOS 10 I am able to load a sample from file into an AVAudioUnitSampler on demand and it plays correctly.
However when I run the same code on iOS 11 the audio doesn't play. In order to get it to work I have to preload the audio.
Has something changed in iOS 11 that causes this to no longer work?
The is designed to play from a selection of about to 100 samples, some of them longer than others, so I can't load them all into memory in advance and thus need to load on demand.
I've tried this on iOS 11, 11.0.1 and 11.0.2.
Any suggestions?
This is my initial version (works on iOS 10 but not iOS 11):
var audioEngine:AVAudioEngine!
var mixer:AVAudioMixerNode!
var sampler:AVAudioUnitSampler!
override func viewDidLoad() {
super.viewDidLoad()
audioEngine = AVAudioEngine()
mixer = audioEngine.mainMixerNode
mixer.volume = 1.0
do {
try audioEngine.start()
} catch {
print(error)
}
sampler = AVAudioUnitSampler()
audioEngine.attach(sampler)
audioEngine.connect(sampler, to: mixer, format: nil)
}
#IBAction func audio1Tapped(_ sender: Any) {
if let audioPath = Bundle.main.url(forResource: "a73", withExtension: "wav") {
do {
try sampler.loadAudioFiles(at: [audioPath])
sampler.startNote(60, withVelocity: 127, onChannel: 0)
} catch {
print(error.localizedDescription)
}
} else {
print("Failed to find audio file")
}
}
The following works on iOS 11:
var audioEngine:AVAudioEngine!
var mixer:AVAudioMixerNode!
var sampler:AVAudioUnitSampler!
override func viewDidLoad() {
super.viewDidLoad()
audioEngine = AVAudioEngine()
mixer = audioEngine.mainMixerNode
mixer.volume = 1.0
do {
try audioEngine.start()
} catch {
print(error)
}
sampler = AVAudioUnitSampler()
audioEngine.attach(sampler)
audioEngine.connect(sampler, to: mixer, format: nil)
if let audioPath = Bundle.main.url(forResource: "a73", withExtension: "wav") {
do {
try sampler.loadAudioFiles(at: [audioPath])
} catch {
print(error.localizedDescription)
}
} else {
print("Failed to find audio file")
}
}
#IBAction func audio1Tapped(_ sender: Any) {
sampler.startNote(60, withVelocity: 127, onChannel: 0)
}
I am trying to capture camera video in memory using AVCaptureSession so that I can later write the video data to a movie file. While I have been able to successfully start a capture session, I am not able to successful write the CMSampleBuffers I've captured to a compressed movie file using AVAssetWriter.
Appending sample buffers using AVAssetWriterInput's append method fails and when I inspect the AVAssetWriter's error property, I get the following:
Error Domain=AVFoundationErrorDomain Code=-11800 "The operation could not be completed" UserInfo={NSUnderlyingError=0x17005d070 {Error Domain=NSOSStatusErrorDomain Code=-12780 "(null)"}, NSLocalizedFailureReason=An unknown error occurred (-12780), NSLocalizedDescription=The operation could not be completed}
As far as I can tell -11800 indicates an AVErrorUnknown, however I have not been able to find information about the -12780 error code, which as far as I can tell is undocumented. Below I have pasted the main files in the example project I setup to demonstrate the issue.
Any guidance would be greatly appreciated. Thanks!
ViewController.swift
import UIKit
import AVFoundation
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
private let recordingClipQueue = DispatchQueue(label: "com.example.recordingClipQueue")
private let videoDataOutputQueue = DispatchQueue(label: "com.example.videoDataOutputQueue")
private let session = AVCaptureSession()
private var backfillSampleBufferList = [CMSampleBuffer]()
override func viewDidLoad() {
super.viewDidLoad()
session.sessionPreset = AVCaptureSessionPreset640x480
let videoDevice = AVCaptureDevice.defaultDevice(withMediaType: AVMediaTypeVideo);
let videoDeviceInput: AVCaptureDeviceInput;
do {
videoDeviceInput = try AVCaptureDeviceInput(device: videoDevice)
} catch {
print("Error creating device input from video device: \(error).")
return
}
guard session.canAddInput(videoDeviceInput) else {
print("Could not add video device input to capture session.")
return
}
session.addInput(videoDeviceInput)
let videoDataOutput = AVCaptureVideoDataOutput()
videoDataOutput.videoSettings = [ kCVPixelBufferPixelFormatTypeKey as NSString : Int(kCMPixelFormat_32BGRA) ]
videoDataOutput.alwaysDiscardsLateVideoFrames = true
videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
guard session.canAddOutput(videoDataOutput) else {
print("Could not add video data output to capture session.")
return
}
session.addOutput(videoDataOutput)
videoDataOutput.connection(withMediaType: AVMediaTypeVideo).isEnabled = true
session.startRunning()
}
private func backfillSizeInSeconds() -> Double {
if backfillSampleBufferList.count < 1 {
return 0.0
}
let earliestSampleBuffer = backfillSampleBufferList.first!
let latestSampleBuffer = backfillSampleBufferList.last!
let earliestSampleBufferPTS = CMSampleBufferGetOutputPresentationTimeStamp(earliestSampleBuffer).value
let latestSampleBufferPTS = CMSampleBufferGetOutputPresentationTimeStamp(latestSampleBuffer).value
let timescale = CMSampleBufferGetOutputPresentationTimeStamp(latestSampleBuffer).timescale
return Double(latestSampleBufferPTS - earliestSampleBufferPTS) / Double(timescale)
}
private func createClipFromBackfill() {
guard backfillSampleBufferList.count > 0 else {
print("createClipFromBackfill() called before any samples were recorded.")
return
}
let clipURL = URL(fileURLWithPath:
NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0] +
"/recorded_clip.mp4")
if FileManager.default.fileExists(atPath: clipURL.path) {
do {
try FileManager.default.removeItem(atPath: clipURL.path)
} catch {
print("Could not delete existing clip file: \(error).")
}
}
var _videoFileWriter: AVAssetWriter?
do {
_videoFileWriter = try AVAssetWriter(url: clipURL, fileType: AVFileTypeQuickTimeMovie)
} catch {
print("Could not create video file writer: \(error).")
return
}
guard let videoFileWriter = _videoFileWriter else {
print("Video writer was nil.")
return
}
let settingsAssistant = AVOutputSettingsAssistant(preset: AVOutputSettingsPreset640x480)!
guard videoFileWriter.canApply(outputSettings: settingsAssistant.videoSettings, forMediaType: AVMediaTypeVideo) else {
print("Video file writer could not apply video output settings.")
return
}
let earliestRecordedSampleBuffer = backfillSampleBufferList.first!
let _formatDescription = CMSampleBufferGetFormatDescription(earliestRecordedSampleBuffer)
guard let formatDescription = _formatDescription else {
print("Earliest recording pixel buffer format description was nil.")
return
}
let videoWriterInput = AVAssetWriterInput(mediaType: AVMediaTypeVideo,
outputSettings: settingsAssistant.videoSettings,
sourceFormatHint: formatDescription)
guard videoFileWriter.canAdd(videoWriterInput) else {
print("Could not add video writer input to video file writer.")
return
}
videoFileWriter.add(videoWriterInput)
guard videoFileWriter.startWriting() else {
print("Video file writer not ready to write file.")
return
}
videoFileWriter.startSession(atSourceTime: CMSampleBufferGetOutputPresentationTimeStamp(earliestRecordedSampleBuffer))
videoWriterInput.requestMediaDataWhenReady(on: recordingClipQueue) {
while videoWriterInput.isReadyForMoreMediaData {
if self.backfillSampleBufferList.count > 0 {
let sampleBufferToAppend = self.backfillSampleBufferList.first!.deepCopy()
let appendSampleBufferSucceeded = videoWriterInput.append(sampleBufferToAppend)
if !appendSampleBufferSucceeded {
print("Failed to append sample buffer to asset writer input: \(videoFileWriter.error!)")
print("Video file writer status: \(videoFileWriter.status.rawValue)")
}
self.backfillSampleBufferList.remove(at: 0)
} else {
videoWriterInput.markAsFinished()
videoFileWriter.finishWriting {
print("Saved clip to \(clipURL)")
}
break
}
}
}
}
// MARK: AVCaptureVideoDataOutputSampleBufferDelegate
func captureOutput(_ captureOutput: AVCaptureOutput!,
didOutputSampleBuffer sampleBuffer: CMSampleBuffer!,
from connection: AVCaptureConnection!) {
guard let buffer = sampleBuffer else {
print("Captured sample buffer was nil.")
return
}
let sampleBufferCopy = buffer.deepCopy()
backfillSampleBufferList.append(sampleBufferCopy)
if backfillSizeInSeconds() > 3.0 {
session.stopRunning()
createClipFromBackfill()
}
}
func captureOutput(_ captureOutput: AVCaptureOutput!,
didDrop sampleBuffer: CMSampleBuffer!,
from connection: AVCaptureConnection!) {
print("Sample buffer dropped.")
}
}
CVPixelBuffer+Copy.swift:
import CoreVideo
extension CVPixelBuffer {
func deepCopy() -> CVPixelBuffer {
precondition(CFGetTypeID(self) == CVPixelBufferGetTypeID(), "deepCopy() cannot copy a non-CVPixelBuffer")
var _copy : CVPixelBuffer?
CVPixelBufferCreate(
nil,
CVPixelBufferGetWidth(self),
CVPixelBufferGetHeight(self),
CVPixelBufferGetPixelFormatType(self),
CVBufferGetAttachments(self, CVAttachmentMode.shouldPropagate),
&_copy)
guard let copy = _copy else {
print("Pixel buffer copy was nil.")
fatalError()
}
CVBufferPropagateAttachments(self, copy)
CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags.readOnly)
CVPixelBufferLockBaseAddress(copy, CVPixelBufferLockFlags(rawValue: 0))
let sourceBaseAddress = CVPixelBufferGetBaseAddress(self)
let copyBaseAddress = CVPixelBufferGetBaseAddress(copy)
memcpy(copyBaseAddress, sourceBaseAddress, CVPixelBufferGetHeight(self) * CVPixelBufferGetBytesPerRow(self))
CVPixelBufferUnlockBaseAddress(copy, CVPixelBufferLockFlags(rawValue: 0))
CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags.readOnly)
return copy
}
}
CMSampleBuffer+Copy.swift:
import CoreMedia
extension CMSampleBuffer {
func deepCopy() -> CMSampleBuffer {
let _pixelBuffer = CMSampleBufferGetImageBuffer(self)
guard let pixelBuffer = _pixelBuffer else {
print("Pixel buffer to copy was nil.")
fatalError()
}
let pixelBufferCopy = pixelBuffer.deepCopy()
let _formatDescription = CMSampleBufferGetFormatDescription(self)
guard let formatDescription = _formatDescription else {
print("Format description to copy was nil.")
fatalError()
}
var timingInfo = kCMTimingInfoInvalid
let getTimingInfoResult = CMSampleBufferGetSampleTimingInfo(self, 0, &timingInfo)
guard getTimingInfoResult == noErr else {
print("Could not get timing info to copy: \(getTimingInfoResult).")
fatalError()
}
timingInfo.presentationTimeStamp = CMSampleBufferGetOutputPresentationTimeStamp(self)
var _copy : CMSampleBuffer?
let createCopyResult = CMSampleBufferCreateForImageBuffer(kCFAllocatorDefault,
pixelBufferCopy,
true,
nil,
nil,
formatDescription,
&timingInfo,
&_copy);
guard createCopyResult == noErr else {
print("Error creating copy of sample buffer: \(createCopyResult).")
fatalError()
}
guard let copy = _copy else {
print("Copied sample buffer was nil.")
fatalError()
}
return copy
}
}
I also ran into this while trying to synthesize videos. I finally figured out that -[AVAssetWriterInput appendSampleBuffer:] only works on device (as of iOS 11.2.6 anyway) if the underlying pixel buffer is backed by an IOSurface.
If you modify your CVPixelBuffer.deepCopy() method to include the (id)kCVPixelBufferIOSurfacePropertiesKey: #{} key-value pair in the attributes dictionary you pass to CVPixelBufferCreate, it'll probably work.
After more research and experimentation, it appears using AVAssetWriterInputPixelBufferAdaptor to append the CVPixelBuffers of the CMSampleBuffers I'm storing to the AVAssetWriterInput works without generating an error.
Below is the modified version of ViewController.swift implementation that uses AVAssetWriterInputPixelBufferAdaptor to append pixel buffers.
ViewController.swift
import UIKit
import AVFoundation
import Photos
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
private let recordingClipQueue = DispatchQueue(label: "com.example.recordingClipQueue")
private let videoDataOutputQueue = DispatchQueue(label: "com.example.videoDataOutputQueue")
private let session = AVCaptureSession()
private var backfillSampleBufferList = [CMSampleBuffer]()
override func viewDidLoad() {
super.viewDidLoad()
session.sessionPreset = AVCaptureSessionPreset640x480
let videoDevice = AVCaptureDevice.defaultDevice(withMediaType: AVMediaTypeVideo);
let videoDeviceInput: AVCaptureDeviceInput;
do {
videoDeviceInput = try AVCaptureDeviceInput(device: videoDevice)
} catch {
print("Error creating device input from video device: \(error).")
return
}
guard session.canAddInput(videoDeviceInput) else {
print("Could not add video device input to capture session.")
return
}
session.addInput(videoDeviceInput)
let videoDataOutput = AVCaptureVideoDataOutput()
videoDataOutput.videoSettings = [ kCVPixelBufferPixelFormatTypeKey as NSString : Int(kCMPixelFormat_32BGRA) ]
videoDataOutput.alwaysDiscardsLateVideoFrames = true
videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
guard session.canAddOutput(videoDataOutput) else {
print("Could not add video data output to capture session.")
return
}
session.addOutput(videoDataOutput)
videoDataOutput.connection(withMediaType: AVMediaTypeVideo).isEnabled = true
session.startRunning()
}
private func backfillSizeInSeconds() -> Double {
if backfillSampleBufferList.count < 1 {
return 0.0
}
let earliestSampleBuffer = backfillSampleBufferList.first!
let latestSampleBuffer = backfillSampleBufferList.last!
let earliestSampleBufferPTS = CMSampleBufferGetOutputPresentationTimeStamp(earliestSampleBuffer).value
let latestSampleBufferPTS = CMSampleBufferGetOutputPresentationTimeStamp(latestSampleBuffer).value
let timescale = CMSampleBufferGetOutputPresentationTimeStamp(latestSampleBuffer).timescale
return Double(latestSampleBufferPTS - earliestSampleBufferPTS) / Double(timescale)
}
private func createClipFromBackfill() {
guard backfillSampleBufferList.count > 0 else {
print("createClipFromBackfill() called before any samples were recorded.")
return
}
let clipURL = URL(fileURLWithPath:
NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0] +
"/recorded_clip.mp4")
if FileManager.default.fileExists(atPath: clipURL.path) {
do {
try FileManager.default.removeItem(atPath: clipURL.path)
} catch {
print("Could not delete existing clip file: \(error).")
}
}
var _videoFileWriter: AVAssetWriter?
do {
_videoFileWriter = try AVAssetWriter(url: clipURL, fileType: AVFileTypeMPEG4)
} catch {
print("Could not create video file writer: \(error).")
return
}
guard let videoFileWriter = _videoFileWriter else {
print("Video writer was nil.")
return
}
let settingsAssistant = AVOutputSettingsAssistant(preset: AVOutputSettingsPreset640x480)!
guard videoFileWriter.canApply(outputSettings: settingsAssistant.videoSettings, forMediaType: AVMediaTypeVideo) else {
print("Video file writer could not apply video output settings.")
return
}
let earliestRecordedSampleBuffer = backfillSampleBufferList.first!
let _formatDescription = CMSampleBufferGetFormatDescription(earliestRecordedSampleBuffer)
guard let formatDescription = _formatDescription else {
print("Earliest recording pixel buffer format description was nil.")
return
}
let videoWriterInput = AVAssetWriterInput(mediaType: AVMediaTypeVideo,
outputSettings: settingsAssistant.videoSettings,
sourceFormatHint: formatDescription)
guard videoFileWriter.canAdd(videoWriterInput) else {
print("Could not add video writer input to video file writer.")
return
}
videoFileWriter.add(videoWriterInput)
let pixelAdapterBufferAttributes = [ kCVPixelBufferPixelFormatTypeKey as String : Int(kCMPixelFormat_32BGRA) ]
let pixelAdapter = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: videoWriterInput,
sourcePixelBufferAttributes: pixelAdapterBufferAttributes)
guard videoFileWriter.startWriting() else {
print("Video file writer not ready to write file.")
return
}
videoFileWriter.startSession(atSourceTime: CMSampleBufferGetOutputPresentationTimeStamp(earliestRecordedSampleBuffer))
videoWriterInput.requestMediaDataWhenReady(on: recordingClipQueue) {
while videoWriterInput.isReadyForMoreMediaData {
if self.backfillSampleBufferList.count > 0 {
let sampleBufferToAppend = self.backfillSampleBufferList.first!.deepCopy()
let appendSampleBufferSucceeded = pixelAdapter.append(CMSampleBufferGetImageBuffer(sampleBufferToAppend)!,
withPresentationTime: CMSampleBufferGetOutputPresentationTimeStamp(sampleBufferToAppend))
if !appendSampleBufferSucceeded {
print("Failed to append sample buffer to asset writer input: \(videoFileWriter.error!)")
print("Video file writer status: \(videoFileWriter.status.rawValue)")
}
self.backfillSampleBufferList.remove(at: 0)
} else {
videoWriterInput.markAsFinished()
videoFileWriter.finishWriting {
print("Saving clip to \(clipURL)")
}
break
}
}
}
}
// MARK: AVCaptureVideoDataOutputSampleBufferDelegate
func captureOutput(_ captureOutput: AVCaptureOutput!,
didOutputSampleBuffer sampleBuffer: CMSampleBuffer!,
from connection: AVCaptureConnection!) {
guard let buffer = sampleBuffer else {
print("Captured sample buffer was nil.")
return
}
let sampleBufferCopy = buffer.deepCopy()
backfillSampleBufferList.append(sampleBufferCopy)
if backfillSizeInSeconds() > 3.0 {
session.stopRunning()
createClipFromBackfill()
}
}
func captureOutput(_ captureOutput: AVCaptureOutput!,
didDrop sampleBuffer: CMSampleBuffer!,
from connection: AVCaptureConnection!) {
print("Sample buffer dropped.")
}
}
I ran into issues with the same error codes when creating CVPixelBuffers and CMSampleBuffers manually to create a video with individual frames rendered by CoreGraphics. I could solve the problem by using a AVAssetWriterInputPixelBufferAdaptor instead, like you suggested in your own answer. For some reason, this was only needed when the code was run on an actual device. On the simulator, manually creating the buffers worked fine.
I noticed that the same error codes AVFoundationErrorDomain Code -11800 and NSOSStatusErrorDomain Code -12780 can also occur for other reasons, for example:
There exists already a file at the destination URL provided to AVAssetWriter
The destination URL is not a file URL (it must be created with URL.init(fileURLWithPath:) and not with URL.init(string:)).
(Posting this for the sake of completeness, your code already handles this correctly.)