Low latency input/output AudioQueue - ios

I have two iOS AudioQueues - one input that feeds samples directly to one output. Unfortunately, there is an echo effect that is quite noticeable :(
Is it possible to do low latency audio using AudioQueues or do I really need to use AudioUnits? (I have tried the Novocaine framework that uses AudioUnits and here the latency is much smaller. I've also noticed this framework seems to use less CPU resources. Unfortunately, I was unable to use this framework in my Swift project without major changes to it.)
Here are some extracts of my code, which is mainly done in Swift, except those callbacks which needs to be implemented in C.
private let audioStreamBasicDescription = AudioStreamBasicDescription(
mSampleRate: 16000,
mFormatID: AudioFormatID(kAudioFormatLinearPCM),
mFormatFlags: AudioFormatFlags(kAudioFormatFlagsNativeFloatPacked),
mBytesPerPacket: 4,
mFramesPerPacket: 1,
mBytesPerFrame: 4,
mChannelsPerFrame: 1,
mBitsPerChannel: 32,
mReserved: 0)
private let numberOfBuffers = 80
private let bufferSize: UInt32 = 256
private var active = false
private var inputQueue: AudioQueueRef = nil
private var outputQueue: AudioQueueRef = nil
private var inputBuffers = [AudioQueueBufferRef]()
private var outputBuffers = [AudioQueueBufferRef]()
private var headOfFreeOutputBuffers: AudioQueueBufferRef = nil
// callbacks implemented in Swift
private func audioQueueInputCallback(inputBuffer: AudioQueueBufferRef) {
if active {
if headOfFreeOutputBuffers != nil {
let outputBuffer = headOfFreeOutputBuffers
headOfFreeOutputBuffers = AudioQueueBufferRef(outputBuffer.memory.mUserData)
outputBuffer.memory.mAudioDataByteSize = inputBuffer.memory.mAudioDataByteSize
memcpy(outputBuffer.memory.mAudioData, inputBuffer.memory.mAudioData, Int(inputBuffer.memory.mAudioDataByteSize))
assert(AudioQueueEnqueueBuffer(outputQueue, outputBuffer, 0, nil) == 0)
} else {
println(__FUNCTION__ + ": out-of-output-buffers!")
}
assert(AudioQueueEnqueueBuffer(inputQueue, inputBuffer, 0, nil) == 0)
}
}
private func audioQueueOutputCallback(outputBuffer: AudioQueueBufferRef) {
if active {
outputBuffer.memory.mUserData = UnsafeMutablePointer<Void>(headOfFreeOutputBuffers)
headOfFreeOutputBuffers = outputBuffer
}
}
func start() {
var error: NSError?
audioSession.setCategory(AVAudioSessionCategoryPlayAndRecord, withOptions: .allZeros, error: &error)
dumpError(error, functionName: "AVAudioSessionCategoryPlayAndRecord")
audioSession.setPreferredSampleRate(16000, error: &error)
dumpError(error, functionName: "setPreferredSampleRate")
audioSession.setPreferredIOBufferDuration(0.005, error: &error)
dumpError(error, functionName: "setPreferredIOBufferDuration")
audioSession.setActive(true, error: &error)
dumpError(error, functionName: "setActive(true)")
assert(active == false)
active = true
// cannot provide callbacks to AudioQueueNewInput/AudioQueueNewOutput from Swift and so need to interface C functions
assert(MyAudioQueueConfigureInputQueueAndCallback(audioStreamBasicDescription, &inputQueue, audioQueueInputCallback) == 0)
assert(MyAudioQueueConfigureOutputQueueAndCallback(audioStreamBasicDescription, &outputQueue, audioQueueOutputCallback) == 0)
for (var i = 0; i < numberOfBuffers; i++) {
var audioQueueBufferRef: AudioQueueBufferRef = nil
assert(AudioQueueAllocateBuffer(inputQueue, bufferSize, &audioQueueBufferRef) == 0)
assert(AudioQueueEnqueueBuffer(inputQueue, audioQueueBufferRef, 0, nil) == 0)
inputBuffers.append(audioQueueBufferRef)
assert(AudioQueueAllocateBuffer(outputQueue, bufferSize, &audioQueueBufferRef) == 0)
outputBuffers.append(audioQueueBufferRef)
audioQueueBufferRef.memory.mUserData = UnsafeMutablePointer<Void>(headOfFreeOutputBuffers)
headOfFreeOutputBuffers = audioQueueBufferRef
}
assert(AudioQueueStart(inputQueue, nil) == 0)
assert(AudioQueueStart(outputQueue, nil) == 0)
}
And then my C-code to set up the callbacks back to Swift:
static void MyAudioQueueAudioInputCallback(void * inUserData, AudioQueueRef inAQ, AudioQueueBufferRef inBuffer, const AudioTimeStamp * inStartTime,
UInt32 inNumberPacketDescriptions, const AudioStreamPacketDescription * inPacketDescs) {
void(^block)(AudioQueueBufferRef) = (__bridge void(^)(AudioQueueBufferRef))inUserData;
block(inBuffer);
}
static void MyAudioQueueAudioOutputCallback(void *inUserData, AudioQueueRef inAQ, AudioQueueBufferRef inBuffer) {
void(^block)(AudioQueueBufferRef) = (__bridge void(^)(AudioQueueBufferRef))inUserData;
block(inBuffer);
}
OSStatus MyAudioQueueConfigureInputQueueAndCallback(AudioStreamBasicDescription inFormat, AudioQueueRef *inAQ, void(^callback)(AudioQueueBufferRef)) {
return AudioQueueNewInput(&inFormat, MyAudioQueueAudioInputCallback, (__bridge_retained void *)([callback copy]), nil, nil, 0, inAQ);
}
OSStatus MyAudioQueueConfigureOutputQueueAndCallback(AudioStreamBasicDescription inFormat, AudioQueueRef *inAQ, void(^callback)(AudioQueueBufferRef)) {
return AudioQueueNewOutput(&inFormat, MyAudioQueueAudioOutputCallback, (__bridge_retained void *)([callback copy]), nil, nil, 0, inAQ);
}

After a good while I found this great post using AudioUnits instead of AudioQueues. I just ported it to Swift and then simply added:
audioSession.setPreferredIOBufferDuration(0.005, error: &error)

If you're recording audio from a microphone and playing it back within earshot of that microphone, then due to the audio throughput not being instantaneous, some of your previous output will make it into the new input, hence the echo. This phenomenon is called feedback.
This is a structural problem, so changing the recording API won't help (although changing your recording/playback buffer sizes will give you control over the delay in the echo). You can either play back the audio in such a way that the microphone can't hear it (e.g. not at all, or through headphones) or go down the rabbit hole of echo cancellation.

Related

Get Mic data callbacks for 20 Miliseconds VoIP App

I am developing VOIP calling app so now I am in the stage where I need to transfer the voice data to the server. For that I want to get Real time audio voice data from mic with 20 mili Seconds callbacks.
I did searched many links but I am unable find solution as
i am new to audio frameworks.
Details
We have our own stack like WebRTC which gives RTP sends data from remote for every 20 mili second and asks data from Mic for 20 mili second , What I am trying to achieve is to get 20 mili second data from mic and pass it the same to the stack. So need to know how to do so. Audio format is pcmFormatInt16 and sample rate is 8000 Hz with 20 mili seconds data.
I have searched for
AVAudioEngine,
AUAudioUnit,
AVCaptureSession Etc.
1.I am Using AVAudioSession and AUAudioUnit but setPreferredIOBufferDuration of audioSession is not setting with exact value what i have set. In result of that i am not getting the exact data size. Can anybody help me on setPreferredIOBufferDuration.
2.One more issue is auAudioUnit.outputProvider () is giving inputData in UnsafeMutableAudioBufferListPointer. inputData list has two element and I want only one sample. Can anybody help me on that to change it into data format which can be played in AVAudioPlayer.
I have followed before link
https://gist.github.com/hotpaw2/ba815fc23b5d642705f2b1dedfaf0107
let hwSRate = audioSession.sampleRate
try audioSession.setActive(true)
print("native Hardware rate : \(hwSRate)")
try audioSession.setPreferredIOBufferDuration(preferredIOBufferDuration)
try audioSession.setPreferredSampleRate(8000) // at 8000.0 Hz
print("Changed native Hardware rate : \(audioSession.sampleRate) buffer duration \(audioSession.ioBufferDuration)")
try auAudioUnit = AUAudioUnit(componentDescription: self.audioComponentDescription)
auAudioUnit.outputProvider = { // AURenderPullInputBlock
(actionFlags, timestamp, frameCount, inputBusNumber, inputData) -> AUAudioUnitStatus in
if let block = self.renderBlock { // AURenderBlock?
let err : OSStatus = block(actionFlags,
timestamp,
frameCount,
1,
inputData,
.none)
if err == noErr {
// save samples from current input buffer to circular buffer
print("inputData = \(inputData) and frameCount: \(frameCount)")
self.recordMicrophoneInputSamples(
inputDataList: inputData,
frameCount: UInt32(frameCount) )
}
}
let err2 : AUAudioUnitStatus = noErr
return err2
}
Log:-
Changed native Hardware rate : 8000.0 buffer duration 0.01600000075995922
try to get 40 ms data from the Audio interface and then split it up into 20ms data.
also check if you are able to set the sampling frequency (8 Khz) of the audio interface.
Render block will give you call backs according to the accepted set up by hardware for AUAudioUnit and AudioSession. We have to manage buffer if we want different size of input from mic. Output to the speaker should be same size as it expects like 128, 256 ,512 bytes etc.
try audioSession.setPreferredSampleRate(sampleRateProvided) // at 48000.0
try audioSession.setPreferredIOBufferDuration(preferredIOBufferDuration)
These values can be different from our preferred size. That is why we have to use buffer logic get out preferred size of input.
Link: https://gist.github.com/hotpaw2/ba815fc23b5d642705f2b1dedfaf0107
renderBlock = auAudioUnit.renderBlock
if ( enableRecording
&& micPermissionGranted
&& audioSetupComplete
&& audioSessionActive
&& isRecording == false ) {
auAudioUnit.inputHandler = { (actionFlags, timestamp, frameCount, inputBusNumber) in
if let block = self.renderBlock { // AURenderBlock?
var bufferList = AudioBufferList(
mNumberBuffers: 1,
mBuffers: AudioBuffer(
mNumberChannels: audioFormat!.channelCount,
mDataByteSize: 0,
mData: nil))
let err : OSStatus = block(actionFlags,
timestamp,
frameCount,
inputBusNumber,
&bufferList,
.none)
if err == noErr {
// save samples from current input buffer to circular buffer
print("inputData = \(bufferList.mBuffers.mDataByteSize) and frameCount: \(frameCount) and count: \(count)")
count += 1
if !self.isMuteState {
self.recordMicrophoneInputSamples(
inputDataList: &bufferList,
frameCount: UInt32(frameCount) )
}
}
}
}
auAudioUnit.isInputEnabled = true
auAudioUnit.outputProvider = { ( // AURenderPullInputBlock?
actionFlags,
timestamp,
frameCount,
inputBusNumber,
inputDataList ) -> AUAudioUnitStatus in
if let block = self.renderBlock {
if let dataReceived = self.getInputDataForConsumption() {
let mutabledata = NSMutableData(data: dataReceived)
var bufferListSpeaker = AudioBufferList(
mNumberBuffers: 1,
mBuffers: AudioBuffer(
mNumberChannels: 1,
mDataByteSize: 0,
mData: nil))
let err : OSStatus = block(actionFlags,
timestamp,
frameCount,
1,
&bufferListSpeaker,
.none)
if err == noErr {
bufferListSpeaker.mBuffers.mDataByteSize = UInt32(mutabledata.length)
bufferListSpeaker.mBuffers.mData = mutabledata.mutableBytes
inputDataList[0] = bufferListSpeaker
print("Output Provider mDataByteSize: \(inputDataList[0].mBuffers.mDataByteSize) output FrameCount: \(frameCount)")
return err
} else {
print("Output Provider \(err)")
return err
}
}
}
return 0
}
auAudioUnit.isOutputEnabled = true
do {
circInIdx = 0 // initialize circular buffer pointers
circOutIdx = 0
circoutSpkIdx = 0
circInSpkIdx = 0
try auAudioUnit.allocateRenderResources()
try auAudioUnit.startHardware() // equivalent to AudioOutputUnitStart ???
isRecording = true
} catch let e {
print(e)
}

Piping AudioKit Microphone to Google Speech-to-Text

I'm trying to get AudioKit to pipe the microphone to Google's Speech-to-Text API as seen here but I'm not entirely sure how to go about it.
To prepare the audio for the Speech-to-Text engine, you need to set up the encoding and pass it through as chunks. In the example Google uses, they use Apple's AVFoundation, but I'd like to use AudioKit so I can preform some pre-processing such as cutting of low amplitudes etc.
I believe the right way to do this is to use a Tap:
First, I should match the format by:
var asbd = AudioStreamBasicDescription()
asbd.mSampleRate = 16000.0
asbd.mFormatID = kAudioFormatLinearPCM
asbd.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked
asbd.mBytesPerPacket = 2
asbd.mFramesPerPacket = 1
asbd.mBytesPerFrame = 2
asbd.mChannelsPerFrame = 1
asbd.mBitsPerChannel = 16
AudioKit.format = AVAudioFormat(streamDescription: &asbd)!
Then create a tap such as:
open class TestTap {
internal let bufferSize: UInt32 = 1_024
#objc public init(_ input: AKNode?) {
input?.avAudioNode.installTap(onBus: 0, bufferSize: bufferSize, format: AudioKit.format) { buffer, _ in
// do work here
}
}
}
But I wasn't able to identify the right way of handling this data to be sent to the Google Speech-to-Text API via the method streamAudioData in real-time with AudioKit but perhaps I am going about this the wrong way?
UPDATE:
I've created a Tap as such:
open class TestTap {
internal var audioData = NSMutableData()
internal let bufferSize: UInt32 = 1_024
func toData(buffer: AVAudioPCMBuffer) -> NSData {
let channelCount = 2 // given PCMBuffer channel count is
let channels = UnsafeBufferPointer(start: buffer.floatChannelData, count: channelCount)
return NSData(bytes: channels[0], length:Int(buffer.frameCapacity * buffer.format.streamDescription.pointee.mBytesPerFrame))
}
#objc public init(_ input: AKNode?) {
input?.avAudioNode.installTap(onBus: 0, bufferSize: bufferSize, format: AudioKit.format) { buffer, _ in
self.audioData.append(self.toData(buffer: buffer) as Data)
// We recommend sending samples in 100ms chunks (from Google)
let chunkSize: Int /* bytes/chunk */ = Int(0.1 /* seconds/chunk */
* AudioKit.format.sampleRate /* samples/second */
* 2 /* bytes/sample */ )
if self.audioData.length > chunkSize {
SpeechRecognitionService
.sharedInstance
.streamAudioData(self.audioData,
completion: { response, error in
if let error = error {
print("ERROR: \(error.localizedDescription)")
SpeechRecognitionService.sharedInstance.stopStreaming()
} else if let response = response {
print(response)
}
})
self.audioData = NSMutableData()
}
}
}
}
and in viewDidLoad:, I'm setting AudioKit up with:
AKSettings.sampleRate = 16_000
AKSettings.bufferLength = .shortest
However, Google complains with:
ERROR: Audio data is being streamed too fast. Please stream audio data approximately at real time.
I've tried changing multiple parameters such as the chunk size to no avail.
I found the solution here.
Final code for my Tap is:
open class GoogleSpeechToTextStreamingTap {
internal var converter: AVAudioConverter!
#objc public init(_ input: AKNode?, sampleRate: Double = 16000.0) {
let format = AVAudioFormat(commonFormat: AVAudioCommonFormat.pcmFormatInt16, sampleRate: sampleRate, channels: 1, interleaved: false)!
self.converter = AVAudioConverter(from: AudioKit.format, to: format)
self.converter?.sampleRateConverterAlgorithm = AVSampleRateConverterAlgorithm_Normal
self.converter?.sampleRateConverterQuality = .max
let sampleRateRatio = AKSettings.sampleRate / sampleRate
let inputBufferSize = 4410 // 100ms of 44.1K = 4410 samples.
input?.avAudioNode.installTap(onBus: 0, bufferSize: AVAudioFrameCount(inputBufferSize), format: nil) { buffer, time in
let capacity = Int(Double(buffer.frameCapacity) / sampleRateRatio)
let bufferPCM16 = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(capacity))!
var error: NSError? = nil
self.converter?.convert(to: bufferPCM16, error: &error) { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer
}
let channel = UnsafeBufferPointer(start: bufferPCM16.int16ChannelData!, count: 1)
let data = Data(bytes: channel[0], count: capacity * 2)
SpeechRecognitionService
.sharedInstance
.streamAudioData(data,
completion: { response, error in
if let error = error {
print("ERROR: \(error.localizedDescription)")
SpeechRecognitionService.sharedInstance.stopStreaming()
} else if let response = response {
print(response)
}
})
}
}
You can likely record using AKNodeRecorder, and pass along the buffer from the resulting AKAudioFile to the API. If you wanted more real-time, you could try installing a tap on the avAudioNode property of the AKNode you want to record and pass the buffers to the API continuously.
However, I'm curious why you see the need for pre-processing - I'm sure the Google API is plenty optimized for recordings produced by the sample code you noted.
I've had a lot of success / fun with the iOS Speech API. Not sure if there's a reason you want to go with the Google API, but I'd consider checking it out and seeing if it might better serve your needs if you haven't already.
Hope this helps!

Deep Copy of Audio CMSampleBuffer

I am trying to create a copy of a CMSampleBuffer as returned by captureOutput in a AVCaptureAudioDataOutputSampleBufferDelegate.
The problem I am having is that my frames coming from delegate method captureOutput:didOutputSampleBuffer:fromConnection: being dropped after I retain them in CFArray for long time.
Obviously, I need to create deep copies of incoming buffers for further processing. I also know that CMSampleBufferCreateCopy only creates shallow copies.
There are few related questions were asked on SO:
Pulling data from a CMSampleBuffer in order to create a deep copy
Creating copy of CMSampleBuffer in Swift returns OSStatus -12743 (Invalid Media Format)
Deep Copy of CMImageBuffer or CVImageBuffer
But none of them helps me to use correctly CMSampleBufferCreate function with 12 parameters:
CMSampleBufferRef copyBuffer;
CMBlockBufferRef data = CMSampleBufferGetDataBuffer(sampleBuffer);
CMFormatDescriptionRef formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer);
CMItemCount itemCount = CMSampleBufferGetNumSamples(sampleBuffer);
CMTime duration = CMSampleBufferGetDuration(sampleBuffer);
CMTime presentationStamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer);
CMSampleTimingInfo timingInfo;
timingInfo.duration = duration;
timingInfo.presentationTimeStamp = presentationStamp;
timingInfo.decodeTimeStamp = CMSampleBufferGetDecodeTimeStamp(sampleBuffer);
size_t sampleSize = CMBlockBufferGetDataLength(data);
CMBlockBufferRef sampleData;
if (CMBlockBufferCopyDataBytes(data, 0, sampleSize, &sampleData) != kCMBlockBufferNoErr) {
VLog(#"error during copying sample buffer");
}
// Here I tried data and sampleData CMBlockBuffer instance, but no success
OSStatus status = CMSampleBufferCreate(kCFAllocatorDefault, data, isDataReady, nil, nil, formatDescription, itemCount, 1, &timingInfo, 1, &sampleSize, &copyBuffer);
if (!self.sampleBufferArray) {
self.sampleBufferArray = CFArrayCreateMutable(NULL, 0, &kCFTypeArrayCallBacks);
//EXC_BAD_ACCESS crash when trying to add sampleBuffer to the array
CFArrayAppendValue(self.sampleBufferArray, copyBuffer);
} else {
CFArrayAppendValue(self.sampleBufferArray, copyBuffer);
}
How do you deep copy Audio CMSampleBuffer? Feel free to use any language (swift/objective-c) in your answers.
Here is a working solution I finally implemented. I sent this snippet to Apple Developer Technical support and asked them to check if it is a correct way to copy incoming sample buffer. The basic idea is copy AudioBufferList and then create a CMSampleBuffer and set AudioBufferList to this sample.
AudioBufferList audioBufferList;
CMBlockBufferRef blockBuffer;
//Create an AudioBufferList containing the data from the CMSampleBuffer,
//and a CMBlockBuffer which references the data in that AudioBufferList.
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(sampleBuffer, NULL, &audioBufferList, sizeof(audioBufferList), NULL, NULL, 0, &blockBuffer);
NSUInteger size = sizeof(audioBufferList);
char buffer[size];
memcpy(buffer, &audioBufferList, size);
//This is the Audio data.
NSData *bufferData = [NSData dataWithBytes:buffer length:size];
const void *copyBufferData = [bufferData bytes];
copyBufferData = (char *)copyBufferData;
CMSampleBufferRef copyBuffer = NULL;
OSStatus status = -1;
/* Format Description */
AudioStreamBasicDescription audioFormat = *CMAudioFormatDescriptionGetStreamBasicDescription((CMAudioFormatDescriptionRef) CMSampleBufferGetFormatDescription(sampleBuffer));
CMFormatDescriptionRef format = NULL;
status = CMAudioFormatDescriptionCreate(kCFAllocatorDefault, &audioFormat, 0, nil, 0, nil, nil, &format);
CMFormatDescriptionRef formatdes = NULL;
status = CMFormatDescriptionCreate(NULL, kCMMediaType_Audio, 'lpcm', NULL, &formatdes);
if (status != noErr)
{
NSLog(#"Error in CMAudioFormatDescriptionCreator");
CFRelease(blockBuffer);
return;
}
/* Create sample Buffer */
CMItemCount framesCount = CMSampleBufferGetNumSamples(sampleBuffer);
CMSampleTimingInfo timing = {.duration= CMTimeMake(1, 44100), .presentationTimeStamp= CMSampleBufferGetPresentationTimeStamp(sampleBuffer), .decodeTimeStamp= CMSampleBufferGetDecodeTimeStamp(sampleBuffer)};
status = CMSampleBufferCreate(kCFAllocatorDefault, nil , NO,nil,nil,format, framesCount, 1, &timing, 0, nil, &copyBuffer);
if( status != noErr) {
NSLog(#"Error in CMSampleBufferCreate");
CFRelease(blockBuffer);
return;
}
/* Copy BufferList to Sample Buffer */
AudioBufferList receivedAudioBufferList;
memcpy(&receivedAudioBufferList, copyBufferData, sizeof(receivedAudioBufferList));
//Creates a CMBlockBuffer containing a copy of the data from the
//AudioBufferList.
status = CMSampleBufferSetDataBufferFromAudioBufferList(copyBuffer, kCFAllocatorDefault , kCFAllocatorDefault, 0, &receivedAudioBufferList);
if (status != noErr) {
NSLog(#"Error in CMSampleBufferSetDataBufferFromAudioBufferList");
CFRelease(blockBuffer);
return;
}
Code-Level Support answer:
This code looks ok (though you’ll want to add some additional error
checking). I've successfully tested it in an app that implements the
AVCaptureAudioDataOutput delegate
captureOutput:didOutputSampleBuffer:fromConnection: method to
capture and record audio. The captured audio I'm getting when using
this deep copy code appears to be the same as what I get when directly
using the provided sample buffer (without the deep copy).
Apple Developer Technical Support
Couldn't find a decent answer doing this in Swift. Here's an extension:
extension CMSampleBuffer {
func deepCopy() -> CMSampleBuffer? {
guard let formatDesc = CMSampleBufferGetFormatDescription(self),
let data = self.data else {
return nil
}
let nFrames = CMSampleBufferGetNumSamples(self)
let pts = CMSampleBufferGetPresentationTimeStamp(self)
let dataBuffer = data.withUnsafeBytes { (buffer) -> CMBlockBuffer? in
var blockBuffer: CMBlockBuffer?
let length: Int = data.count
guard CMBlockBufferCreateWithMemoryBlock(
allocator: kCFAllocatorDefault,
memoryBlock: nil,
blockLength: length,
blockAllocator: nil,
customBlockSource: nil,
offsetToData: 0,
dataLength: length,
flags: 0,
blockBufferOut: &blockBuffer) == noErr else {
print("Failed to create block")
return nil
}
guard CMBlockBufferReplaceDataBytes(
with: buffer.baseAddress!,
blockBuffer: blockBuffer!,
offsetIntoDestination: 0,
dataLength: length) == noErr else {
print("Failed to move bytes for block")
return nil
}
return blockBuffer
}
guard let dataBuffer = dataBuffer else {
return nil
}
var newSampleBuffer: CMSampleBuffer?
CMAudioSampleBufferCreateReadyWithPacketDescriptions(
allocator: kCFAllocatorDefault,
dataBuffer: dataBuffer,
formatDescription: formatDesc,
sampleCount: nFrames,
presentationTimeStamp: pts,
packetDescriptions: nil,
sampleBufferOut: &newSampleBuffer
)
return newSampleBuffer
}
}
LLooggaann's solution is simpler and works well, however, in case anyone is interested, I migrated the original solution to Swift 5.6:
extension CMSampleBuffer {
func deepCopy() -> CMSampleBuffer? {
var audioBufferList : AudioBufferList = AudioBufferList()
var blockBuffer : CMBlockBuffer?
let sizeOfAudioBufferList = MemoryLayout<AudioBufferList>.size
//Create an AudioBufferList containing the data from the CMSampleBuffer.
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(self,
bufferListSizeNeededOut: nil,
bufferListOut: &audioBufferList,
bufferListSize: sizeOfAudioBufferList,
blockBufferAllocator: nil,
blockBufferMemoryAllocator: nil,
flags: 0,
blockBufferOut: &blockBuffer)
guard audioBufferList.mNumberBuffers == 1 else { return nil } //TODO: Make this generic for any number of buffers
/* Deep copy the audio buffer */
let audioBufferDataSize = Int(audioBufferList.mBuffers.mDataByteSize)
let audioBuffer = audioBufferList.mBuffers
let audioBufferDataCopyPointer = UnsafeMutableRawPointer.allocate(byteCount: audioBufferDataSize, alignment: 1)
defer {
audioBufferDataCopyPointer.deallocate()
}
memcpy(audioBufferDataCopyPointer, audioBufferList.mBuffers.mData, audioBufferDataSize)
let copiedAudioBuffer = AudioBuffer(mNumberChannels: audioBuffer.mNumberChannels,
mDataByteSize: audioBufferList.mBuffers.mDataByteSize,
mData: audioBufferDataCopyPointer)
/* Create a new audio buffer list with the deep copied audio buffer */
var copiedAudioBufferList = AudioBufferList(mNumberBuffers: 1, mBuffers: copiedAudioBuffer)
/* Copy audio format description, to be used in the new sample buffer */
guard let sampleBufferFormatDescription = CMSampleBufferGetFormatDescription(self) else { return nil }
/* Create copy of timing for new sample buffer */
var duration = CMSampleBufferGetDuration(self)
duration.value /= Int64(numSamples)
var timing = CMSampleTimingInfo(duration: duration,
presentationTimeStamp: CMSampleBufferGetPresentationTimeStamp(self),
decodeTimeStamp: CMSampleBufferGetDecodeTimeStamp(self))
/* New sample buffer preparation, using the audio format description, and the timing information. */
let sampleCount = CMSampleBufferGetNumSamples(self)
var newSampleBuffer : CMSampleBuffer?
guard CMSampleBufferCreate(allocator: kCFAllocatorDefault,
dataBuffer: nil,
dataReady: false,
makeDataReadyCallback: nil,
refcon: nil,
formatDescription: sampleBufferFormatDescription,
sampleCount: sampleCount,
sampleTimingEntryCount: 1,
sampleTimingArray: &timing,
sampleSizeEntryCount: 0,
sampleSizeArray: nil,
sampleBufferOut: &newSampleBuffer) == noErr else { return nil }
//Create a CMBlockBuffer containing a copy of the data from the AudioBufferList, add to new sample buffer.
let status = CMSampleBufferSetDataBufferFromAudioBufferList(newSampleBuffer!,
blockBufferAllocator: kCFAllocatorDefault,
blockBufferMemoryAllocator: kCFAllocatorDefault,
flags: 0,
bufferList: &copiedAudioBufferList)
guard status == noErr else { return nil }
return newSampleBuffer
}
}

Audio Recording AudioQueueStart buffer never filled

I am using AudioQueueStart in order to start recording on an iOS device and I want all the recording data streamed to me in buffers so that I can process them and send them to a server.
Basic functionality works great however in my BufferFilled function I usually get < 10 bytes of data on every call. This feels very inefficient. Especially since I have tried to set the buffer size to 16384 btyes (see beginning of startRecording method)
How can I make it fill up the buffer more before calling BufferFilled? Or do I need to make a second phase buffering before sending to server to achieve what I want?
OSStatus BufferFilled(void *aqData, SInt64 inPosition, UInt32 requestCount, const void *inBuffer, UInt32 *actualCount) {
AQRecorderState *pAqData = (AQRecorderState*)aqData;
NSData *audioData = [NSData dataWithBytes:inBuffer length:requestCount];
*actualCount = inBuffer + requestCount;
//audioData is ususally < 10 bytes, sometimes 100 bytes but never close to 16384 bytes
return 0;
}
void HandleInputBuffer(void *aqData, AudioQueueRef inAQ, AudioQueueBufferRef inBuffer, const AudioTimeStamp *inStartTime, UInt32 inNumPackets, const AudioStreamPacketDescription *inPacketDesc) {
AQRecorderState *pAqData = (AQRecorderState*)aqData;
if (inNumPackets == 0 && pAqData->mDataFormat.mBytesPerPacket != 0)
inNumPackets = inBuffer->mAudioDataByteSize / pAqData->mDataFormat.mBytesPerPacket;
if(AudioFileWritePackets(pAqData->mAudioFile, false, inBuffer->mAudioDataByteSize, inPacketDesc, pAqData->mCurrentPacket, &inNumPackets, inBuffer->mAudioData) == noErr) {
pAqData->mCurrentPacket += inNumPackets;
}
if (pAqData->mIsRunning == 0)
return;
OSStatus error = AudioQueueEnqueueBuffer(pAqData->mQueue, inBuffer, 0, NULL);
}
void DeriveBufferSize(AudioQueueRef audioQueue, AudioStreamBasicDescription *ASBDescription, Float64 seconds, UInt32 *outBufferSize) {
static const int maxBufferSize = 0x50000;
int maxPacketSize = ASBDescription->mBytesPerPacket;
if (maxPacketSize == 0) {
UInt32 maxVBRPacketSize = sizeof(maxPacketSize);
AudioQueueGetProperty(audioQueue, kAudioQueueProperty_MaximumOutputPacketSize, &maxPacketSize, &maxVBRPacketSize);
}
Float64 numBytesForTime = ASBDescription->mSampleRate * maxPacketSize * seconds;
*outBufferSize = (UInt32)(numBytesForTime < maxBufferSize ? numBytesForTime : maxBufferSize);
}
OSStatus SetMagicCookieForFile (AudioQueueRef inQueue, AudioFileID inFile) {
OSStatus result = noErr;
UInt32 cookieSize;
if (AudioQueueGetPropertySize (inQueue, kAudioQueueProperty_MagicCookie, &cookieSize) == noErr) {
char* magicCookie =
(char *) malloc (cookieSize);
if (AudioQueueGetProperty (inQueue, kAudioQueueProperty_MagicCookie, magicCookie, &cookieSize) == noErr)
result = AudioFileSetProperty (inFile, kAudioFilePropertyMagicCookieData, cookieSize, magicCookie);
free(magicCookie);
}
return result;
}
- (void)startRecording {
aqData.mDataFormat.mFormatID = kAudioFormatMPEG4AAC;
aqData.mDataFormat.mSampleRate = 22050.0;
aqData.mDataFormat.mChannelsPerFrame = 1;
aqData.mDataFormat.mBitsPerChannel = 0;
aqData.mDataFormat.mBytesPerPacket = 0;
aqData.mDataFormat.mBytesPerFrame = 0;
aqData.mDataFormat.mFramesPerPacket = 1024;
aqData.mDataFormat.mFormatFlags = kMPEG4Object_AAC_Main;
AudioFileTypeID fileType = kAudioFileAAC_ADTSType;
aqData.bufferByteSize = 16384;
UInt32 defaultToSpeaker = TRUE;
AudioSessionSetProperty(kAudioSessionProperty_OverrideCategoryDefaultToSpeaker, sizeof(defaultToSpeaker), &defaultToSpeaker);
OSStatus status = AudioQueueNewInput(&aqData.mDataFormat, HandleInputBuffer, &aqData, NULL, kCFRunLoopCommonModes, 0, &aqData.mQueue);
UInt32 dataFormatSize = sizeof (aqData.mDataFormat);
status = AudioQueueGetProperty(aqData.mQueue, kAudioQueueProperty_StreamDescription, &aqData.mDataFormat, &dataFormatSize);
status = AudioFileInitializeWithCallbacks(&aqData, nil, BufferFilled, nil, nil, fileType, &aqData.mDataFormat, 0, &aqData.mAudioFile);
for (int i = 0; i < kNumberBuffers; ++i) {
status = AudioQueueAllocateBuffer (aqData.mQueue, aqData.bufferByteSize, &aqData.mBuffers[i]);
status = AudioQueueEnqueueBuffer (aqData.mQueue, aqData.mBuffers[i], 0, NULL);
}
aqData.mCurrentPacket = 0;
aqData.mIsRunning = true;
status = AudioQueueStart(aqData.mQueue, NULL);
}
UPDATE: I have logged the data that I receive and it is quite interesting, it almost seems like half of the "packets" are some kind of header and half is sound data. Could I assume this is just how the AAC encoding on iOS works? It writes header in one buffer, then data in the next one and so on. And it never wants more than around 170-180 bytes for each data chunk and that is why it ignores my large buffer?
I solved this eventually. Turns out that yes the encoding on iOS produces small and large chunks of data. I added a second phase buffer myself using NSMutableData and it worked perfectly.

Audio queue works not as expected in iOS 10

Update
I resolved the problem in recording in iOS 10. After adding Audio Session configuration before starting recording, it works as normal. But playback hasn't been resolved.
Here's the solution:
NSError *error = nil;
// the param category depends what you need
BOOL ret = [[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryPlayAndRecord error:&error];
if (!ret) {
NSLog(#"Audio session category setup failed");
return;
}
// don't forget to setActive NO when finishing recording
ret = [[AVAudioSession sharedInstance] setActive:YES error:&error];
if (!ret)
{
NSLog(#"Audio session activation failed");
return;
}
Original
I work in audio recording with audio queue service in iOS. I followed apple's official tutorial to realize the recording part and playback part. It was successfully tested in iOS 9.3 in emulator but failed in iOS 10.3.1 in real device iPad.
For the recording part, the callback function invokes AudioFileWritePackets to save the audio into a file (see the code below). In iOS 9, ioNumPackets always has a non-zero value but in iOS 10, it is always 0 during the first recording, and from the second time it becomes normal. That is, only from the second time the recording works.
Here's some code about recording:
Callback function:
static void AudioInputCallback(void * inUserData, AudioQueueRef inAQ, AudioQueueBufferRef inBuffer, const AudioTimeStamp * inStartTime, UInt32 inNumPackets, const AudioStreamPacketDescription * inPacketDescs) {
NSLog(#"Input callback called");
RecordState * aqData = (RecordState*)inUserData;
if (aqData->isRecording == 0) return;
if (inNumPackets == 0 && aqData->dataFormat.mBytesPerPacket != 0)
inNumPackets = inBuffer->mAudioDataByteSize / aqData->dataFormat.mBytesPerPacket;
NSLog(#"inNumPackets = %d", inNumPackets);
// handler the data
if (outputToMobile){
OSStatus res = AudioFileWritePackets(aqData->audioFile, false, inBuffer->mAudioDataByteSize, inPacketDescs, aqData->currentPacket, &inNumPackets, inBuffer->mAudioData);
if(res == noErr)
aqData->currentPacket += inNumPackets;
}else{
}
// after handling, re-enqueue de buffer into the queue
AudioQueueEnqueueBuffer(aqData->queue, inBuffer, 0, NULL);
}
Start record function:
-(void)startRecording{
[self setupAudioFormat:&recordState.dataFormat];
recordState.currentPacket = 0;
OSStatus status;
status = AudioQueueNewInput(&recordState.dataFormat, AudioInputCallback, &recordState, CFRunLoopGetCurrent(), kCFRunLoopCommonModes, 0, &recordState.queue);
if (status == 0) {
UInt32 dataFormatSize = sizeof (recordState.dataFormat);
AudioQueueGetProperty (recordState.queue,kAudioQueueProperty_StreamDescription,&recordState.dataFormat,&dataFormatSize);
if (outputToMobile) {
[self createFile];
SetMagicCookieForFile(recordState.queue, recordState.audioFile);
}
DeriveBufferSize(recordState.queue, &recordState.dataFormat, 0.5, &recordState.bufferByteSize);
for (int i = 0; i < NUM_BUFFERS; i++) {
AudioQueueAllocateBuffer(recordState.queue, recordState.bufferByteSize, &recordState.buffers[i]);
AudioQueueEnqueueBuffer(recordState.queue, recordState.buffers[i], 0, NULL);
}
recordState.isRecording = true;
AudioQueueStart(recordState.queue, NULL);
}
}
For the playback part, the callback function invokes AudioFileReadPacketData to read the audio file (see the code below). As well, in iOS 9, ioNumPackets is always non-zero but in iOS 10, ioNumPackets is always always 0 so that nothing is output from iOS 10.
Here's some code about playback:
Callback function:
static void AudioOutputCallback(void *inUserData, AudioQueueRef inAQ, AudioQueueBufferRef inBuffer){
NSLog(#"Output callback called");
PlayState *aqData = (PlayState *)inUserData;
if (aqData->isPlaying == 0) return;
UInt32 numBytesReadFromFile;
UInt32 numPackets = aqData->numPacketsToRead;
AudioFileReadPacketData(aqData->audioFile, false, &numBytesReadFromFile, aqData->packetDesc, aqData->currentPacket, &numPackets, inBuffer->mAudioData);
NSLog(#"outNumPackets = %d", numPackets);
if (numPackets > 0) {
AudioQueueEnqueueBuffer(aqData->queue, inBuffer, aqData->packetDesc ? numPackets : 0, aqData->packetDesc);
aqData->currentPacket += numPackets;
} else {
AudioQueueStop(aqData->queue, false);
aqData->isPlaying = false;
}
}
Start playback function:
- (void)startPlaying{
playState.currentPacket = 0;
[self openFile];
UInt32 dataFormatSize = sizeof(playState.dataFormat);
AudioFileGetProperty(playState.audioFile, kAudioFilePropertyDataFormat, &dataFormatSize, &playState.dataFormat);
OSStatus status;
status = AudioQueueNewOutput(&playState.dataFormat, AudioOutputCallback, &playState, CFRunLoopGetCurrent(), kCFRunLoopCommonModes, 0, &playState.queue);
if (status == 0) {
playState.isPlaying = true;
UInt32 maxPacketSize;
UInt32 propertySize = sizeof(maxPacketSize);
AudioFileGetProperty(playState.audioFile,kAudioFilePropertyPacketSizeUpperBound,&propertySize,&maxPacketSize);
DeriveBufferSize(playState.dataFormat, maxPacketSize, 0.5, &playState.bufferByteSize, &playState.numPacketsToRead);
bool isFormatVBR = (playState.dataFormat.mBytesPerPacket == 0 ||playState.dataFormat.mFramesPerPacket == 0);
if (isFormatVBR) {
playState.packetDesc = (AudioStreamPacketDescription*) malloc (playState.numPacketsToRead * sizeof(AudioStreamPacketDescription));
} else {
playState.packetDesc = NULL;
}
//Set a Magic Cookie for a Playback Audio Queue
MyCopyEncoderCookieToQueue(playState.audioFile, playState.queue);
for (int i = 0; i < NUM_BUFFERS; i++) {
AudioQueueAllocateBuffer(playState.queue, playState.bufferByteSize, &playState.buffers[i]);
playState.buffers[i]->mAudioDataByteSize = playState.bufferByteSize;
AudioOutputCallback(&playState, playState.queue, playState.buffers[i]);
}
Float32 gain = 10.0;
AudioQueueSetParameter(playState.queue, kAudioQueueParam_Volume, gain);
AudioQueueStart(playState.queue, NULL);
}
}
This kind of incompatibility really upsets me for several days. Free to ask me if you need more details. I hope someone could help me out. Thanks a lot.

Resources