I'm writing a first in first out recording app that buffers up to 2.5 mins of audio using AudioQueue. I've got most of it figured out but I'm at a roadblock trying to crop audio data.
I've seen people do it with AVAssetExportSession but it seems like it wouldn't be performant to export a new track every time the AudioQueueInputCallback is called.
I'm not married to using AVAssestExportSession by any means if anyone has a better idea.
Here's where I'm doing my write and was hoping to execute the crop.
var beforeSeconds = TimeInterval() // find the current estimated duration (not reliable)
var propertySize = UInt32(MemoryLayout.size(ofValue: beforeSeconds))
var osStatus = AudioFileGetProperty(audioRecorder.recordFile!, kAudioFilePropertyEstimatedDuration, &propertySize, &beforeSeconds)
if numPackets > 0 {
AudioFileWritePackets(audioRecorder.recordFile!, // write to disk
false,
buffer.mAudioDataByteSize,
packetDescriptions,
audioRecorder.recordPacket,
&numPackets,
buffer.mAudioData)
audioRecorder.recordPacket += Int64(numPackets) // up the packet index
var afterSeconds = TimeInterval() // find the after write estimated duration (not reliable)
var propertySize = UInt32(MemoryLayout.size(ofValue: afterSeconds))
var osStatus = AudioFileGetProperty(audioRecorder.recordFile!, kAudioFilePropertyEstimatedDuration, &propertySize, &afterSeconds)
assert(osStatus == noErr, "couldn't get record time")
if afterSeconds >= 150.0 {
print("hit max buffer!")
audioRecorder.onBufferMax?(afterSeconds - beforeSeconds)
}
}
Here's where the callback is executed
func onBufferMax(_ difference: Double){
let asset = AVAsset(url: tempFilePath)
let duration = CMTimeGetSeconds(asset.duration)
guard duration >= 150.0 else { return }
guard let exporter = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetAppleM4A) else {
print("exporter init failed")
return }
exporter.outputURL = getDocumentsDirectory().appendingPathComponent("buffered.caf") // helper function that calls the FileManager
exporter.outputFileType = AVFileTypeAppleM4A
let startTime = CMTimeMake(Int64(difference), 1)
let endTime = CMTimeMake(Int64(WYNDRConstants.maxTimeInterval + difference), 1)
exporter.timeRange = CMTimeRangeFromTimeToTime(startTime, endTime)
exporter.exportAsynchronously(completionHandler: {
switch exporter.status {
case .failed:
print("failed to export")
case .cancelled:
print("canceled export")
default:
print("export successful")
}
})
}
A ring buffer is a useful structure for storing, either in memory or on disk, the most recent n seconds of audio. Here is a simple solution that stores the audio in memory, presented in the traditional UIViewController format.
N.B 2.5 minutes of 44.1kHz audio stored as floats requires about 26MB of RAM, which is on the heavy side for a mobile device.
import AVFoundation
class ViewController: UIViewController {
let engine = AVAudioEngine()
var requiredSamples: AVAudioFrameCount = 0
var ringBuffer: [AVAudioPCMBuffer] = []
var ringBufferSizeInSamples: AVAudioFrameCount = 0
func startRecording() {
let input = engine.inputNode!
let bus = 0
let inputFormat = input.inputFormat(forBus: bus)
requiredSamples = AVAudioFrameCount(inputFormat.sampleRate * 2.5 * 60)
input.installTap(onBus: bus, bufferSize: 512, format: inputFormat) { (buffer, time) -> Void in
self.appendAudioBuffer(buffer)
}
try! engine.start()
}
func appendAudioBuffer(_ buffer: AVAudioPCMBuffer) {
ringBuffer.append(buffer)
ringBufferSizeInSamples += buffer.frameLength
// throw away old buffers if ring buffer gets too large
if let firstBuffer = ringBuffer.first {
if ringBufferSizeInSamples - firstBuffer.frameLength >= requiredSamples {
ringBuffer.remove(at: 0)
ringBufferSizeInSamples -= firstBuffer.frameLength
}
}
}
func stopRecording() {
engine.stop()
let url = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first!.appendingPathComponent("foo.m4a")
let settings: [String : Any] = [AVFormatIDKey: Int(kAudioFormatMPEG4AAC)]
// write ring buffer to file.
let file = try! AVAudioFile(forWriting: url, settings: settings)
for buffer in ringBuffer {
try! file.write(from: buffer)
}
}
override func viewDidLoad() {
super.viewDidLoad()
// example usage
startRecording()
DispatchQueue.main.asyncAfter(deadline: .now() + 4*60) {
print("stopping")
self.stopRecording()
}
}
}
Related
I am currently using Microsoft Azure Cognitive Speech SDK to play text to speech.
I am able to get the data from the Stream which is provided in the following format (reference):
.audio16Khz32KBitRateMonoMp3
This is set like this:
private let inputFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: 16000,
channels: 1,
interleaved: false
)!
I'm using AVAudioEngine & AVAudioPlayerNode:
let engine = AVAudioEngine()
let player = AVAudioPlayerNode()
override func viewDidLoad() {
super.viewDidLoad()
let mainMixer = engine.mainMixerNode
engine.attach(player)
engine.connect(player, to: mainMixer, format: inputFormat)
try! engine.start()
}
I am able to play this back with some success using the following:
func playAudio(dialogue: String, audioPlayer: AVAudioPlayerNode, then completion: #escaping ( () -> Void)) {
audioAsset = nil
try? FileManager.default.removeItem(at: recordingPath)
FileManager.default.createFile(atPath: recordingPath.path, contents: nil, attributes: nil)
do {
let configuration = try SPXSpeechConfiguration(subscription: Microsoft.key, region: Microsoft.region)
configuration.setSpeechSynthesisOutputFormat(.audio16Khz32KBitRateMonoMp3)
let synthesizer = try SPXSpeechSynthesizer(speechConfiguration: configuration, audioConfiguration: nil)
let speechResult = try synthesizer.startSpeakingSsml(dialogue)
let stream = try SPXAudioDataStream(from: speechResult)
guard
let mutableFile = FileHandle(forWritingAtPath: recordingPath.path),
let streamData = NSMutableData(capacity:Int(bufferCapacity))
else {
fatalError()
}
while stream.read(streamData, length:bufferCapacity) > 0 {
mutableFile.write(streamData as Data)
mutableFile.seekToEndOfFile()
do {
let buffer = try readFileIntoBuffer(audioUrl: recordingPath)
audioPlayer.scheduleBuffer(buffer, at: currentBufferTime(buffer: buffer)) { [weak self] in
guard let self = self else { return }
if let audioAsset = self.audioAsset, audioPlayer.currentTime >= CMTimeGetSeconds(audioAsset.duration) {
DispatchQueue.main.async {
audioPlayer.stop()
}
completion()
}
}
audioPlayer.play()
} catch {
print("Unable To Play Azure Buffer Stream \(error)")
}
}
print("Did Complete Azure Buffer Rendering To File")
audioAsset = AVURLAsset.init(url: recordingPath, options: nil)
mutableFile.closeFile()
} catch {
print("Unable To Run Azure Vocder \(error)")
}
}
With my Buffer creation function being as follows:
func currentBufferTime(buffer: AVAudioPCMBuffer) -> AVAudioTime {
let framecount = Double(buffer.frameLength)
let samplerate = buffer.format.sampleRate
let position = TimeInterval(framecount / samplerate)
return AVAudioTime(sampleTime: AVAudioFramePosition(position), atRate: 1)
}
func readFileIntoBuffer(audioUrl: URL) throws -> AVAudioPCMBuffer {
let audioFile = try AVAudioFile(forReading: audioUrl)
let audioFileFormat = audioFile.processingFormat
let audioFileSize = UInt32(audioFile.length)
let audioBuffer = AVAudioPCMBuffer(pcmFormat: audioFileFormat, frameCapacity: audioFileSize)!
try audioFile.read(into: audioBuffer)
return audioBuffer
}
The issue is that this is not performant and the CPU is around 100% for a significant amount of time when running the function.
As such my question is what is a more optimum way of reading the data into a PCM Buffer?
I have looked at many examples and there doesn't seem to be any thing which works. For example:
func toPCMBuffer(format: AVAudioFormat, data: NSData) -> AVAudioPCMBuffer? {
let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: UInt32(data.count) / format.streamDescription.pointee.mBytesPerFrame)
guard let buffer = buffer else { return nil }
buffer.frameLength = buffer.frameCapacity
let channels = UnsafeBufferPointer(start: buffer.int32ChannelData, count: Int(buffer.format.channelCount))
data.getBytes(UnsafeMutableRawPointer(channels[0]) , length: data.count)
return buffer
}
I'm trying to use AVAudioEngine instead of AVAudioPlayer because I need to do some per-packet processing as the audio is playing, but before I can get that far, I need to convert the 16-bit 8khz mono audio data to stereo so the AVAudioEngine will play it. This is my (incomplete) attempt to do it. I'm currently stuck at how to make AVAudioConverter do the mono-to-stereo conversion. If I don't use the AVAudioConverter, the iOS runtime complains that the input format doesn't match the output format. If I do use it (as below), the runtime doesn't complain, but the audio does not play back properly (likely because i'm not doing the mono-to-stereo conversion correctly). Any assistance is appreciated!
private func loadAudioData(audioData: Data?) {
// Load audio data into player
guard let audio = audioData else {return}
do {
let inputAudioFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: Double(sampleRate), channels: 1, interleaved: false)
let outputAudioFormat = self.audioEngine.mainMixerNode.outputFormat(forBus: 0)
if inputAudioFormat != nil {
let inputStreamDescription = inputAudioFormat?.streamDescription.pointee
let outputStreamDescription = outputAudioFormat.streamDescription.pointee
let count = UInt32(audio.count)
if inputStreamDescription != nil && count > 0 {
if let ibpf = inputStreamDescription?.mBytesPerFrame {
let inputFrameCapacity = count / ibpf
let outputFrameCapacity = count / outputStreamDescription.mBytesPerFrame
self.pcmInputBuffer = AVAudioPCMBuffer(pcmFormat: inputAudioFormat!, frameCapacity: inputFrameCapacity)
self.pcmOutputBuffer = AVAudioPCMBuffer(pcmFormat: outputAudioFormat, frameCapacity: outputFrameCapacity)
if let input = self.pcmInputBuffer, let output = self.pcmOutputBuffer {
self.pcmConverter = AVAudioConverter(from: inputAudioFormat!, to: outputAudioFormat)
input.frameLength = input.frameCapacity
let b = UnsafeMutableBufferPointer(start: input.int16ChannelData?[0], count: input.stride * Int(inputFrameCapacity))
let bytesCopied = audio.copyBytes(to: b)
assert(bytesCopied == count)
audioEngine.attach(playerNode)
audioEngine.connect(playerNode, to: audioEngine.mainMixerNode, format: nil)
self.pcmConverter?.convert(to: output, error: nil) { packets, status in
status.pointee = .haveData
return self.pcmInputBuffer // I know this is wrong, but i'm not sure how to do it correctly
}
try audioEngine.start()
}
}
}
}
}
}
Speculative, incorrect answer
How about pcmConverter?.channelMap = [0, 0]?
Actual answer
You don't need to use the audio converter channel map, because mono to stereo AVAudioConverters seem to duplicate the mono channel by default. The main problems were that outputFrameCapacity was wrong, and you use mainMixers outputFormat before calling audioEngine.prepare() or starting the engine.
Assuming sampleRate = 8000, an amended solution looks like this:
private func loadAudioData(audioData: Data?) throws {
// Load audio data into player
guard let audio = audioData else {return}
do {
audioEngine.attach(playerNode)
audioEngine.connect(playerNode, to: audioEngine.mainMixerNode, format: nil)
audioEngine.prepare() // https://stackoverflow.com/a/70392017/22147
let outputAudioFormat = self.audioEngine.mainMixerNode.outputFormat(forBus: 0)
guard let inputAudioFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: Double(sampleRate), channels: 1, interleaved: false) else { return }
let inputStreamDescription = inputAudioFormat.streamDescription.pointee
let outputStreamDescription = outputAudioFormat.streamDescription.pointee
let count = UInt32(audio.count)
if count > 0 {
let ibpf = inputStreamDescription.mBytesPerFrame
let inputFrameCapacity = count / ibpf
let outputFrameCapacity = Float64(inputFrameCapacity) * outputStreamDescription.mSampleRate / inputStreamDescription.mSampleRate
self.pcmInputBuffer = AVAudioPCMBuffer(pcmFormat: inputAudioFormat, frameCapacity: inputFrameCapacity)
self.pcmOutputBuffer = AVAudioPCMBuffer(pcmFormat: outputAudioFormat, frameCapacity: AVAudioFrameCount(outputFrameCapacity))
if let input = self.pcmInputBuffer, let output = self.pcmOutputBuffer {
self.pcmConverter = AVAudioConverter(from: inputAudioFormat, to: outputAudioFormat)
input.frameLength = input.frameCapacity
let b = UnsafeMutableBufferPointer(start: input.int16ChannelData?[0], count: input.stride * Int(inputFrameCapacity))
let bytesCopied = audio.copyBytes(to: b)
assert(bytesCopied == count)
self.pcmConverter?.convert(to: output, error: nil) { packets, status in
status.pointee = .haveData
return self.pcmInputBuffer // I know this is wrong, but i'm not sure how to do it correctly
}
try audioEngine.start()
self.playerNode.scheduleBuffer(output, completionHandler: nil)
self.playerNode.play()
}
}
}
}
I am building an app that needs to perform analysis on the audio it receives from the microphone in real time. In my app, I also need to play a beep sound and start recording audio at the same time, in other words, I can't play the beep sound and then start recording. This introduces the problem of hearing the beep sound in my recording, (this might be because I am playing the beep sound through the speaker, but unfortunately I cannot compromise in this regard either). Since the beep sound is just a tone of about 2350 kHz, I was wondering how I could exclude that range of frequencies (say from 2300 kHz to 2400 kHz) in my recordings and prevent it from influencing my audio samples. After doing some googling I came up with what I think might be the solution, a band stop filter. According to Wikipedia: "a band-stop filter or band-rejection filter is a filter that passes most frequencies unaltered, but attenuates those in a specific range to very low levels". This seems like what I need to to exclude frequencies from 2300 kHz to 2400 kHz in my recordings (or at least for the first second of the recording while the beep sound is playing). My question is: how would I implement this with AVAudioEngine? Is there a way I can turn off the filter after the first second of the recording when the beep sound is done playing without stopping the recording?
Since I am new to working with audio with AVAudioEngine (I've always just stuck to the higher levels of AVFoundation) I followed this tutorial to help me create a class to handle all the messy stuff. This is what my code looks like:
class Recorder {
enum RecordingState {
case recording, paused, stopped
}
private var engine: AVAudioEngine!
private var mixerNode: AVAudioMixerNode!
private var state: RecordingState = .stopped
private var audioPlayer = AVAudioPlayerNode()
init() {
setupSession()
setupEngine()
}
fileprivate func setupSession() {
let session = AVAudioSession.sharedInstance()
//The original tutorial sets the category to .record
//try? session.setCategory(.record)
try? session.setCategory(.playAndRecord, options: [.mixWithOthers, .defaultToSpeaker])
try? session.setActive(true, options: .notifyOthersOnDeactivation)
}
fileprivate func setupEngine() {
engine = AVAudioEngine()
mixerNode = AVAudioMixerNode()
// Set volume to 0 to avoid audio feedback while recording.
mixerNode.volume = 0
engine.attach(mixerNode)
//Attach the audio player node
engine.attach(audioPlayer)
makeConnections()
// Prepare the engine in advance, in order for the system to allocate the necessary resources.
engine.prepare()
}
fileprivate func makeConnections() {
let inputNode = engine.inputNode
let inputFormat = inputNode.outputFormat(forBus: 0)
engine.connect(inputNode, to: mixerNode, format: inputFormat)
let mainMixerNode = engine.mainMixerNode
let mixerFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: inputFormat.sampleRate, channels: 1, interleaved: false)
engine.connect(mixerNode, to: mainMixerNode, format: mixerFormat)
//AudioPlayer Connection
let path = Bundle.main.path(forResource: "beep.mp3", ofType:nil)!
let url = URL(fileURLWithPath: path)
let file = try! AVAudioFile(forReading: url)
engine.connect(audioPlayer, to: mainMixerNode, format: nil)
audioPlayer.scheduleFile(file, at: nil)
}
//MARK: Start Recording Function
func startRecording() throws {
print("Start Recording!")
let tapNode: AVAudioNode = mixerNode
let format = tapNode.outputFormat(forBus: 0)
let documentURL = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
// AVAudioFile uses the Core Audio Format (CAF) to write to disk.
// So we're using the caf file extension.
let file = try AVAudioFile(forWriting: documentURL.appendingPathComponent("recording.caf"), settings: format.settings)
tapNode.installTap(onBus: 0, bufferSize: 4096, format: format, block: {
(buffer, time) in
try? file.write(from: buffer)
print(buffer.description)
print(buffer.stride)
let floatArray = Array(UnsafeBufferPointer(start: buffer.floatChannelData![0], count:Int(buffer.frameLength)))
})
try engine.start()
audioPlayer.play()
state = .recording
}
//MARK: Other recording functions
func resumeRecording() throws {
try engine.start()
state = .recording
}
func pauseRecording() {
engine.pause()
state = .paused
}
func stopRecording() {
// Remove existing taps on nodes
mixerNode.removeTap(onBus: 0)
engine.stop()
state = .stopped
}
}
AVAudioUnitEQ supports a band-stop filter.
Perhaps something like:
// Create an instance of AVAudioUnitEQ and connect it to the engine's main mixer
let eq = AVAudioUnitEQ(numberOfBands: 1)
engine.attach(eq)
engine.connect(eq, to: engine.mainMixerNode, format: nil)
engine.connect(player, to: eq, format: nil)
eq.bands[0].frequency = 2350
eq.bands[0].filterType = .bandStop
eq.bands[0].bypass = false
A slightly more complete answer, linked to an IBAction; in this example, I use .parametric for the filter type, with more bands than required, to give a broader insight on how to use it:
#IBAction func PlayWithEQ(_ sender: Any) {
self.engine.stop()
self.engine = AVAudioEngine()
let player = AVAudioPlayerNode()
let url = Bundle.main.url(forResource:"yoursong", withExtension: "m4a")!
let f = try! AVAudioFile(forReading: url)
self.engine.attach(player)
// adding eq effect node
let effect = AVAudioUnitEQ(numberOfBands: 4)
let bands = effect.bands
let freq = [125, 250, 2350, 8000]
for i in 0...(bands.count - 1) {
bands[i].frequency = Float(freq[i])
}
bands[0].gain = 0.0
bands[0].filterType = .parametric
bands[0].bandwidth = 1
bands[1].gain = 0.0
bands[1].filterType = .parametric
bands[1].bandwidth = 0.5
// filter of interest, rejecting 2350Hz (adjust bandwith as needed)
bands[2].gain = -60.0
bands[2].filterType = .parametric
bands[2].bandwidth = 1
bands[3].gain = 0.0
bands[3].filterType = .parametric
bands[3].bandwidth = 1
self.engine.attach(effect)
self.engine.connect(player, to: effect, format: f.processingFormat)
let mixer = self.engine.mainMixerNode
self.engine.connect(effect, to: mixer, format: f.processingFormat)
player.scheduleFile(f, at: nil) {
delay(0.05) {
if self.engine.isRunning {
self.engine.stop()
}
}
}
self.engine.prepare()
try! self.engine.start()
player.play()
}
I'm trying install a tap on the output audio that is played on my app. I have no issue catching buffer from microphone input, but when it comes to catch sound that it goes trough the speaker or the earpiece or whatever the output device is, it does not succeed. Am I missing something?
In my example I'm trying to catch the audio buffer from an audio file that an AVPLayer is playing. But let's pretend I don't have access directly to the AVPlayer instance.
The goal is to perform Speech Recognition on an audio stream.
func catchAudioBuffers() throws {
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.playAndRecord, mode: .voiceChat, options: .allowBluetooth)
try audioSession.setActive(true)
let outputNode = audioEngine.outputNode
let recordingFormat = outputNode.outputFormat(forBus: 0)
outputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
// PROCESS AUDIO BUFFER
}
audioEngine.prepare()
try audioEngine.start()
// For example I am playing an audio conversation with an AVPlayer and a local file.
player.playSound()
}
This code results in a:
AVAEInternal.h:76 required condition is false: [AVAudioIONodeImpl.mm:1057:SetOutputFormat: (_isInput)]
*** Terminating app due to uncaught exception 'com.apple.coreaudio.avfaudio', reason: 'required condition is false: _isInput'
I was facing the same problem and during 2 days of brainstorming found the following.
Apple says that For AVAudioOutputNode, tap format must be specified as nil. I'm not sure that it's important but in my case, that finally worked, format was nil.
You need to start recording and don't forget to stop it.
Removing tap is really important, otherwise you will have file that you can't open.
Try to save the file with the same audio settings that you used in source file.
Here's my code that finally worked. It was partly taken from this question Saving Audio After Effect in iOS.
func playSound() {
let rate: Float? = effect.speed
let pitch: Float? = effect.pitch
let echo: Bool? = effect.echo
let reverb: Bool? = effect.reverb
// initialize audio engine components
audioEngine = AVAudioEngine()
// node for playing audio
audioPlayerNode = AVAudioPlayerNode()
audioEngine.attach(audioPlayerNode)
// node for adjusting rate/pitch
let changeRatePitchNode = AVAudioUnitTimePitch()
if let pitch = pitch {
changeRatePitchNode.pitch = pitch
}
if let rate = rate {
changeRatePitchNode.rate = rate
}
audioEngine.attach(changeRatePitchNode)
// node for echo
let echoNode = AVAudioUnitDistortion()
echoNode.loadFactoryPreset(.multiEcho1)
audioEngine.attach(echoNode)
// node for reverb
let reverbNode = AVAudioUnitReverb()
reverbNode.loadFactoryPreset(.cathedral)
reverbNode.wetDryMix = 50
audioEngine.attach(reverbNode)
// connect nodes
if echo == true && reverb == true {
connectAudioNodes(audioPlayerNode, changeRatePitchNode, echoNode, reverbNode, audioEngine.mainMixerNode, audioEngine.outputNode)
} else if echo == true {
connectAudioNodes(audioPlayerNode, changeRatePitchNode, echoNode, audioEngine.mainMixerNode, audioEngine.outputNode)
} else if reverb == true {
connectAudioNodes(audioPlayerNode, changeRatePitchNode, reverbNode, audioEngine.mainMixerNode, audioEngine.outputNode)
} else {
connectAudioNodes(audioPlayerNode, changeRatePitchNode, audioEngine.mainMixerNode, audioEngine.outputNode)
}
// schedule to play and start the engine!
audioPlayerNode.stop()
audioPlayerNode.scheduleFile(audioFile, at: nil) {
var delayInSeconds: Double = 0
if let lastRenderTime = self.audioPlayerNode.lastRenderTime, let playerTime = self.audioPlayerNode.playerTime(forNodeTime: lastRenderTime) {
if let rate = rate {
delayInSeconds = Double(self.audioFile.length - playerTime.sampleTime) / Double(self.audioFile.processingFormat.sampleRate) / Double(rate)
} else {
delayInSeconds = Double(self.audioFile.length - playerTime.sampleTime) / Double(self.audioFile.processingFormat.sampleRate)
}
}
// schedule a stop timer for when audio finishes playing
self.stopTimer = Timer(timeInterval: delayInSeconds, target: self, selector: #selector(EditViewController.stopAudio), userInfo: nil, repeats: false)
RunLoop.main.add(self.stopTimer!, forMode: RunLoop.Mode.default)
}
do {
try audioEngine.start()
} catch {
showAlert(Alerts.AudioEngineError, message: String(describing: error))
return
}
//Try to save
let dirPaths: String = (NSSearchPathForDirectoriesInDomains(.libraryDirectory, .userDomainMask, true)[0]) + "/sounds/"
let tmpFileUrl = URL(fileURLWithPath: dirPaths + "effected.caf")
//Save the tmpFileUrl into global varibale to not lose it (not important if you want to do something else)
filteredOutputURL = URL(fileURLWithPath: filePath)
do{
print(dirPaths)
let settings = [AVSampleRateKey : NSNumber(value: Float(44100.0)),
AVFormatIDKey : NSNumber(value: Int32(kAudioFormatMPEG4AAC)),
AVNumberOfChannelsKey : NSNumber(value: 1),
AVEncoderAudioQualityKey : NSNumber(value: Int32(AVAudioQuality.medium.rawValue))]
self.newAudio = try! AVAudioFile(forWriting: tmpFileUrl as URL, settings: settings)
let length = self.audioFile.length
audioEngine.mainMixerNode.installTap(onBus: 0, bufferSize: 4096, format: nil) {
(buffer: AVAudioPCMBuffer?, time: AVAudioTime!) -> Void in
//Let us know when to stop saving the file, otherwise saving infinitely
if (self.newAudio.length) <= length {
do{
try self.newAudio.write(from: buffer!)
} catch _{
print("Problem Writing Buffer")
}
} else {
//if we dont remove it, will keep on tapping infinitely
self.audioEngine.mainMixerNode.removeTap(onBus: 0)
}
}
}
// play the recording!
audioPlayerNode.play()
}
#objc func stopAudio() {
if let audioPlayerNode = audioPlayerNode {
let engine = audioEngine
audioPlayerNode.stop()
engine?.mainMixerNode.removeTap(onBus: 0)
}
if let stopTimer = stopTimer {
stopTimer.invalidate()
}
configureUI(.notPlaying)
if let audioEngine = audioEngine {
audioEngine.stop()
audioEngine.reset()
}
isPlaying = false
}
I'm trying to record segments of audio and recombine them without producing a gap in audio.
The eventual goal is to also have video, but I've found that audio itself creates gaps when combined with ffmpeg -f concat -i list.txt -c copy out.mp4
If I put the audio in an HLS playlist, there are also gaps, so I don't think this is unique to ffmpeg.
The idea is that samples come in continuously, and my controller routes samples to the proper AVAssetWriter. How do I eliminate gaps in audio?
import Foundation
import UIKit
import AVFoundation
class StreamController: UIViewController, AVCaptureAudioDataOutputSampleBufferDelegate, AVCaptureVideoDataOutputSampleBufferDelegate {
var closingAudioInput: AVAssetWriterInput?
var closingAssetWriter: AVAssetWriter?
var currentAudioInput: AVAssetWriterInput?
var currentAssetWriter: AVAssetWriter?
var nextAudioInput: AVAssetWriterInput?
var nextAssetWriter: AVAssetWriter?
var videoHelper: VideoHelper?
var startTime: NSTimeInterval = 0
let closeAssetQueue: dispatch_queue_t = dispatch_queue_create("closeAssetQueue", nil);
override func viewDidLoad() {
super.viewDidLoad()
startTime = NSDate().timeIntervalSince1970
createSegmentWriter()
videoHelper = VideoHelper()
videoHelper!.delegate = self
videoHelper!.startSession()
NSTimer.scheduledTimerWithTimeInterval(1, target: self, selector: "createSegmentWriter", userInfo: nil, repeats: true)
}
func createSegmentWriter() {
print("Creating segment writer at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
let outputPath = OutputFileNameHelper.instance.pathForOutput()
OutputFileNameHelper.instance.incrementSegmentIndex()
try? NSFileManager.defaultManager().removeItemAtPath(outputPath)
nextAssetWriter = try! AVAssetWriter(URL: NSURL(fileURLWithPath: outputPath), fileType: AVFileTypeMPEG4)
nextAssetWriter!.shouldOptimizeForNetworkUse = true
let audioSettings: [String:AnyObject] = EncodingSettings.AUDIO
nextAudioInput = AVAssetWriterInput(mediaType: AVMediaTypeAudio, outputSettings: audioSettings)
nextAudioInput!.expectsMediaDataInRealTime = true
nextAssetWriter?.addInput(nextAudioInput!)
nextAssetWriter!.startWriting()
}
func closeWriterIfNecessary() {
if closing && audioFinished {
closing = false
audioFinished = false
let outputFile = closingAssetWriter?.outputURL.pathComponents?.last
closingAssetWriter?.finishWritingWithCompletionHandler() {
let delta = NSDate().timeIntervalSince1970 - self.startTime
print("segment \(outputFile!) finished at t=\(delta)")
}
self.closingAudioInput = nil
self.closingAssetWriter = nil
}
}
var audioFinished = false
var closing = false
func captureOutput(captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBufferRef, fromConnection connection: AVCaptureConnection!) {
if let nextWriter = nextAssetWriter {
if nextWriter.status.rawValue != 0 {
if (currentAssetWriter != nil) {
closing = true
}
var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
print("Switching asset writers at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
closingAssetWriter = currentAssetWriter
closingAudioInput = currentAudioInput
currentAssetWriter = nextAssetWriter
currentAudioInput = nextAudioInput
nextAssetWriter = nil
nextAudioInput = nil
currentAssetWriter?.startSessionAtSourceTime(sampleTiming.presentationTimeStamp)
}
}
if let _ = captureOutput as? AVCaptureVideoDataOutput {
} else if let _ = captureOutput as? AVCaptureAudioDataOutput {
captureAudioSample(sampleBuffer)
}
dispatch_async(closeAssetQueue) {
self.closeWriterIfNecessary()
}
}
func printTimingInfo(sampleBuffer: CMSampleBufferRef, prefix: String) {
var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
let presentationTime = Double(sampleTiming.presentationTimeStamp.value) / Double(sampleTiming.presentationTimeStamp.timescale)
print("\(prefix):\(presentationTime)")
}
func captureAudioSample(sampleBuffer: CMSampleBufferRef) {
printTimingInfo(sampleBuffer, prefix: "A")
if (closing && !audioFinished) {
if closingAudioInput?.readyForMoreMediaData == true {
closingAudioInput?.appendSampleBuffer(sampleBuffer)
}
closingAudioInput?.markAsFinished()
audioFinished = true
} else {
if currentAudioInput?.readyForMoreMediaData == true {
currentAudioInput?.appendSampleBuffer(sampleBuffer)
}
}
}
}
With packet formats like AAC you have silent priming frames (a.k.a encoder delay) at the beginning and remainder frames at the end (when your audio length is not a multiple of the packet size). In your case it's 2112 of them at the beginning of every file. Priming and remainder frames break the possibility of concatenating the files without transcoding them, so you can't really blame ffmpeg -c copy for not producing seamless output.
I'm not sure where this leaves you with video - obviously audio is synced to the video, even in the presence of priming frames.
It all depends on how you intend to concatenate the final audio (and eventually video). If you're doing it yourself using AVFoundation, then you can detect and account for priming/remainder frames using
CMGetAttachment(buffer, kCMSampleBufferAttachmentKey_TrimDurationAtStart, NULL)
CMGetAttachment(audioBuffer, kCMSampleBufferAttachmentKey_TrimDurationAtEnd, NULL)
As a short term solution, you can switch to a non "packetised" to get gapless, concatenatable (with ffmpeg) files.
e.g.
AVFormatIDKey: kAudioFormatAppleIMA4, fileType: AVFileTypeAIFC, suffix ".aifc" or
AVFormatIDKey: kAudioFormatLinearPCM, fileType: AVFileTypeWAVE, suffix ".wav"
p.s. you can see priming & remainder frames and packet sizes using the ubiquitous afinfo tool.
afinfo chunk.mp4
Data format: 2 ch, 44100 Hz, 'aac ' (0x00000000) 0 bits/channel, 0 bytes/packet, 1024 frames/packet, 0 bytes/frame
...
audio 39596 valid frames + 2112 priming + 276 remainder = 41984
...
Not sure if this helps you but if you have a bunch of MP4s you can use this code to combine them:
func mergeAudioFiles(audioFileUrls: NSArray, callback: (url: NSURL?, error: NSError?)->()) {
// Create the audio composition
let composition = AVMutableComposition()
// Merge
for (var i = 0; i < audioFileUrls.count; i++) {
let compositionAudioTrack :AVMutableCompositionTrack = composition.addMutableTrackWithMediaType(AVMediaTypeAudio, preferredTrackID: CMPersistentTrackID())
let asset = AVURLAsset(URL: audioFileUrls[i] as! NSURL)
let track = asset.tracksWithMediaType(AVMediaTypeAudio)[0]
let timeRange = CMTimeRange(start: CMTimeMake(0, 600), duration: track.timeRange.duration)
try! compositionAudioTrack.insertTimeRange(timeRange, ofTrack: track, atTime: composition.duration)
}
// Create output url
let format = NSDateFormatter()
format.dateFormat="yyyy-MM-dd-HH-mm-ss"
let currentFileName = "recording-\(format.stringFromDate(NSDate()))-merge.m4a"
print(currentFileName)
let documentsDirectory = NSFileManager.defaultManager().URLsForDirectory(.DocumentDirectory, inDomains: .UserDomainMask)[0]
let outputUrl = documentsDirectory.URLByAppendingPathComponent(currentFileName)
print(outputUrl.absoluteString)
// Export it
let assetExport = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetAppleM4A)
assetExport?.outputFileType = AVFileTypeAppleM4A
assetExport?.outputURL = outputUrl
assetExport?.exportAsynchronouslyWithCompletionHandler({ () -> Void in
switch assetExport!.status {
case AVAssetExportSessionStatus.Failed:
callback(url: nil, error: assetExport?.error)
default:
callback(url: assetExport?.outputURL, error: nil)
}
})
}