I am creating a voip app for iOS in objective-c. Currently i am trying to create the audio part: recording the audio data from microphone, encoding with Opus, decoding, and then playing. For the recording and playing i use AudioUnit. Also i made a buffer implementation which allocates places of memory each with initially set size. There are three main methods:
- setBufferSize - for setting buffer's sub allocated spaces.
- writeDataToBuffer - for creating new space(if needed), and filling data into current writing space.
- readDataFromBuffer - read data from current reading space.
I use the buffer for storing the audio data there. It works good. I've tested it. Also if i try to use it without Opus just reading audio data, storing it into the buffer, reading from the buffer and then playing, everything works great. But the problem comes when i include opus. Actually it encodes and decodes the audio data, but the quality is not so good and there are some crackle as well. I was wondering what am i doing wrong? Here are pieces of my code:
AudioUnit:
OSStatus status;
m_sAudioDescription.componentType = kAudioUnitType_Output;
m_sAudioDescription.componentSubType = kAudioUnitSubType_VoiceProcessingIO/*kAudioUnitSubType_RemoteIO*/;
m_sAudioDescription.componentFlags = 0;
m_sAudioDescription.componentFlagsMask = 0;
m_sAudioDescription.componentManufacturer = kAudioUnitManufacturer_Apple;
AudioComponent inputComponent = AudioComponentFindNext(NULL, &m_sAudioDescription);
status = AudioComponentInstanceNew(inputComponent, &m_audioUnit);
// Enable IO for recording
UInt32 flag = 1;
status = AudioUnitSetProperty(m_audioUnit,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input,
VOIP_AUDIO_INPUT_ELEMENT,
&flag,
sizeof(flag));
// Enable IO for playback
status = AudioUnitSetProperty(m_audioUnit,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Output,
VOIP_AUDIO_OUTPUT_ELEMENT,
&flag,
sizeof(flag));
// Describe format
m_sAudioFormat.mSampleRate = 48000.00;//48000.00;/*44100.00*/;
m_sAudioFormat.mFormatID = kAudioFormatLinearPCM;
m_sAudioFormat.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked/* | kAudioFormatFlagsCanonical*/;
m_sAudioFormat.mFramesPerPacket = 1;
m_sAudioFormat.mChannelsPerFrame = 1;
m_sAudioFormat.mBitsPerChannel = 16; //8 * bytesPerSample
m_sAudioFormat.mBytesPerFrame = /*(UInt32)bytesPerSample;*/2; //bitsPerChannel / 8 * channelsPerFrame
m_sAudioFormat.mBytesPerPacket = 2; //bytesPerFrame * framesPerPacket
// Apply format
status = AudioUnitSetProperty(m_audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output,
VOIP_AUDIO_INPUT_ELEMENT,
&m_sAudioFormat,
sizeof(m_sAudioFormat));
status = AudioUnitSetProperty(m_audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input,
VOIP_AUDIO_OUTPUT_ELEMENT,
&m_sAudioFormat,
sizeof(m_sAudioFormat));
// Set input callback
AURenderCallbackStruct callbackStruct;
callbackStruct.inputProc = inputRenderCallback;
callbackStruct.inputProcRefCon = this;
status = AudioUnitSetProperty(m_audioUnit,
kAudioOutputUnitProperty_SetInputCallback,
kAudioUnitScope_Global,
VOIP_AUDIO_INPUT_ELEMENT,
&callbackStruct,
sizeof(callbackStruct));
// Set output callback
callbackStruct.inputProc = outputRenderCallback;
callbackStruct.inputProcRefCon = this;
status = AudioUnitSetProperty(m_audioUnit,
kAudioUnitProperty_SetRenderCallback,
kAudioUnitScope_Global,
VOIP_AUDIO_OUTPUT_ELEMENT,
&callbackStruct,
sizeof(callbackStruct));
//Enable Echo cancelation:
this->_setEchoCancelation(true);
//Enable Automatic Gain control:
this->_setAGC(false);
// Initialise
status = AudioUnitInitialize(m_audioUnit);
return noErr;
Input buffer allocation and setting the size of storing buffers:
void VoipAudio::_allocBuffer()
{
UInt32 numFramesPerBuffer;
UInt32 size = sizeof(/*VoipUInt32*/VoipInt16);
AudioUnitGetProperty(m_audioUnit,
kAudioUnitProperty_MaximumFramesPerSlice,
kAudioUnitScope_Global,
VOIP_AUDIO_OUTPUT_ELEMENT, &numFramesPerBuffer, &siz
UInt32 inputBufferListSize = offsetof(AudioBufferList, mBuffers[0]) + (sizeof(AudioBuffer) * m_sAudioFormat.mChannelsPerFrame);
inputBuffer = (AudioBufferList *)malloc(inputBufferListSize);
inputBuffer->mNumberBuffers = m_sAudioFormat.mChannelsPerFrame;
//pre-malloc buffers for AudioBufferLists
for(VoipUInt32 tmp_int1 = 0; tmp_int1 < inputBuffer->mNumberBuffers; tmp_int1++)
{
inputBuffer->mBuffers[tmp_int1].mNumberChannels = 1;
inputBuffer->mBuffers[tmp_int1].mDataByteSize = 2048;
inputBuffer->mBuffers[tmp_int1].mData = malloc(2048);
memset(inputBuffer->mBuffers[tmp_int1].mData, 0, 2048);
}
this->m_oAudioBuffer = new VoipBuffer();
this->m_oAudioBuffer->setBufferSize(2048);
this->m_oAudioReadBuffer = new VoipBuffer();
this->m_oAudioReadBuffer->setBufferSize(2880);
}
Record callback:
this->m_oAudioReadBuffer->writeDataToBuffer(samples, samplesSize);
void* tmp_buffer = this->m_oAudioReadBuffer->readDataFromBuffer();
if (tmp_buffer != nullptr)
{
sVoipAudioCodecOpusEncodedResult* encodedSamples = VoipAudioCodecs::Opus_Encode((VoipInt16*)tmp_buffer, 2880);
sVoipAudioCodecOpusDecodedResult* decodedSamples = VoipAudioCodecs::Opus_Decode(encodedSamples->m_data, encodedSamples->m_dataSize);
this->m_oAudioBuffer->writeDataToBuffer(decodedSamples->m_data, decodedSamples->m_dataSize);
free(encodedSamples->m_data);
free(encodedSamples);
free(decodedSamples->m_data);
free(decodedSamples);
}
Playing callback:
void* tmp_buffer = this->m_oAudioBuffer->readDataFromBuffer();
if (tmp_buffer != nullptr)
{
memset(buffer->mBuffers[0].mData, 0, 2048);
memcpy(buffer->mBuffers[0].mData, tmp_buffer, 2048);
buffer->mBuffers[0].mDataByteSize = 2048;
} else {
memset(buffer->mBuffers[0].mData, 0, 2048);
buffer->mBuffers[0].mDataByteSize = 2048;
}
Opus Init Code:
int _error = 0;
VoipAudioCodecs::m_oEncoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, APPLICATION, &_error);
if (_error < 0)
{
fprintf(stderr, "VoipAudioCodecs error: failed to create an encoder: %s\n", opus_strerror(_error));
return;
}
_error = opus_encoder_ctl(VoipAudioCodecs::m_oEncoder, OPUS_SET_BITRATE(BITRATE/*OPUS_BITRATE_MAX*/));
if (_error < 0)
{
fprintf(stderr, "VoipAudioCodecs error: failed to set bitrate: %s\n", opus_strerror(_error));
return;
}
VoipAudioCodecs::m_oDecoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &_error);
if (_error < 0)
{
fprintf(stderr, "VoipAudioCodecs error: failed to create decoder: %s\n", opus_strerror(_error));
return;
}
Opus encode/decode:
sVoipAudioCodecOpusEncodedResult* VoipAudioCodecs::Opus_Encode(VoipInt16* number, int samplesCount)
{
unsigned char cbits[MAX_PACKET_SIZE];
VoipInt32 nbBytes;
nbBytes = opus_encode(VoipAudioCodecs::m_oEncoder, number, FRAME_SIZE, cbits, MAX_PACKET_SIZE);
if (nbBytes < 0)
{
fprintf(stderr, "VoipAudioCodecs error: encode failed: %s\n", opus_strerror(nbBytes));
return nullptr;
}
sVoipAudioCodecOpusEncodedResult* result = (sVoipAudioCodecOpusEncodedResult* )malloc(sizeof(sVoipAudioCodecOpusEncodedResult));
result->m_data = (unsigned char*)malloc(nbBytes);
memcpy(result->m_data, cbits, nbBytes);
result->m_dataSize = nbBytes;
return result;
}
sVoipAudioCodecOpusDecodedResult* VoipAudioCodecs::Opus_Decode(void* encoded, VoipInt32 nbBytes)
{
VoipInt16 decodedPacket[MAX_FRAME_SIZE];
int frame_size = opus_decode(VoipAudioCodecs::m_oDecoder, (const unsigned char*)encoded, nbBytes, decodedPacket, MAX_FRAME_SIZE, 0);
if (frame_size < 0)
{
fprintf(stderr, "VoipAudioCodecs error: decoder failed: %s\n", opus_strerror(frame_size));
return nullptr;
}
sVoipAudioCodecOpusDecodedResult* result = (sVoipAudioCodecOpusDecodedResult* )malloc(sizeof(sVoipAudioCodecOpusDecodedResult));
result->m_data = (VoipInt16*)malloc(frame_size / sizeof(VoipInt16));
memcpy(result->m_data, decodedPacket, (frame_size / sizeof(VoipInt16)));
result->m_dataSize = frame_size / sizeof(VoipInt16);
return result;
}
Here are some constants i use:
#define FRAME_SIZE 2880 //120, 240, 480, 960, 1920, 2880
#define SAMPLE_RATE 48000
#define CHANNELS 1
#define APPLICATION OPUS_APPLICATION_VOIP//OPUS_APPLICATION_AUDIO
#define BITRATE 64000
#define MAX_FRAME_SIZE 4096
#define MAX_PACKET_SIZE (3*1276)
Can you help me please?
Your audio call back time may need increased. Try increasing your session setPreferredIOBufferDuration time. I have used opus on iOS and have measured the decoding time. It takes 2 to 3 ms to decode about 240 frames of data. There is a good chance you are missing your subsequent callbacks because it is taking to long to decode the audio.
i was have same problem in my project, the problem was the iOS give me unstable frame size, i used audio queue service and audio unit, they give me same result ( crackled voice ).
all you have to do is, save some samples in ring buffer in audio callback.
then in separate thread, do audio processing to make fixed frame to each round.
for example :
audioUnit give you frames or samples like this: [2048 .. 2048 .. 2048]
and opus codec need, 2880 fame for each packet, so you need to get 2048 from first buffer and 832 remain frames from next buffer to get fixed frame size to send it to opus encoder.
this function i used in my project
func audioProcessing(){
DispatchQueue.global(qos: .default).async {
// this to save remain data from ring buffer
var remainData:NSMutableData = NSMutableData()
var remainDataSize = 0
while self.room_oppened{
// here we define the fixed frame we want to use in our opus encoder
var packetOffset = 0
let fixedFrameSize:Int = 5760
var dataToGetFullFrame:Int = 5760
let packetData:NSMutableData = NSMutableData(length: fixedFrameSize)!// this need to filled with data
if remainDataSize > 0 {
if remainDataSize < fixedFrameSize{
memcpy(packetData.mutableBytes.advanced(by: packetOffset), remainData.mutableBytes.advanced(by: 0), remainDataSize)// add the remain data
dataToGetFullFrame = dataToGetFullFrame - remainDataSize
packetOffset = packetOffset + remainDataSize// - 1
}else{
memcpy(packetData.mutableBytes.advanced(by: packetOffset), remainData.mutableBytes.advanced(by: 0), fixedFrameSize)// add the remain data
dataToGetFullFrame = 0
}
remainDataSize = 0
}
// if the packet not fill full, we need to get more data from circle buffer
if dataToGetFullFrame > 0 {
while dataToGetFullFrame > 0 {
let bufferData = self.ringBufferEncodedAudio.read()// read chunk of data from bufer
if bufferData != nil{
var chunkOffset = 0
if dataToGetFullFrame > bufferData!.length{
memcpy(packetData.mutableBytes.advanced(by: packetOffset) , bufferData!.mutableBytes , bufferData!.length)
chunkOffset = bufferData!.length// this how much data we read
dataToGetFullFrame = dataToGetFullFrame - bufferData!.length // how much of data we need to fill packet
packetOffset = packetOffset + bufferData!.length// + 1
}else{
memcpy(packetData.mutableBytes.advanced(by: packetOffset) , bufferData!.mutableBytes , dataToGetFullFrame)
chunkOffset = dataToGetFullFrame// this how much data we read
packetOffset = packetOffset + dataToGetFullFrame// + 1
dataToGetFullFrame = dataToGetFullFrame - dataToGetFullFrame // how much of data we need to fill packet
}
if dataToGetFullFrame <= 0 {
var size = bufferData!.length - chunkOffset
remainData = NSMutableData(bytes: bufferData?.mutableBytes.advanced(by: chunkOffset), length: size)
remainDataSize = size
}
}
usleep(useconds_t(8 * 1000))
}
}
// send packet to encoder
if self.enable_streaming {
let dataToEncode:Data = packetData as Data
let packet = OpusSwiftPort.shared.encodeData(dataToEncode)
if packet != nil{
self.sendAudioPacket(packet: packet!)// <--- this to network
}
}
}
}
}
after i did this audio processing i get very clear audio.
i hope this was helpful for you.
Related
I am using AudioQueueStart in order to start recording on an iOS device and I want all the recording data streamed to me in buffers so that I can process them and send them to a server.
Basic functionality works great however in my BufferFilled function I usually get < 10 bytes of data on every call. This feels very inefficient. Especially since I have tried to set the buffer size to 16384 btyes (see beginning of startRecording method)
How can I make it fill up the buffer more before calling BufferFilled? Or do I need to make a second phase buffering before sending to server to achieve what I want?
OSStatus BufferFilled(void *aqData, SInt64 inPosition, UInt32 requestCount, const void *inBuffer, UInt32 *actualCount) {
AQRecorderState *pAqData = (AQRecorderState*)aqData;
NSData *audioData = [NSData dataWithBytes:inBuffer length:requestCount];
*actualCount = inBuffer + requestCount;
//audioData is ususally < 10 bytes, sometimes 100 bytes but never close to 16384 bytes
return 0;
}
void HandleInputBuffer(void *aqData, AudioQueueRef inAQ, AudioQueueBufferRef inBuffer, const AudioTimeStamp *inStartTime, UInt32 inNumPackets, const AudioStreamPacketDescription *inPacketDesc) {
AQRecorderState *pAqData = (AQRecorderState*)aqData;
if (inNumPackets == 0 && pAqData->mDataFormat.mBytesPerPacket != 0)
inNumPackets = inBuffer->mAudioDataByteSize / pAqData->mDataFormat.mBytesPerPacket;
if(AudioFileWritePackets(pAqData->mAudioFile, false, inBuffer->mAudioDataByteSize, inPacketDesc, pAqData->mCurrentPacket, &inNumPackets, inBuffer->mAudioData) == noErr) {
pAqData->mCurrentPacket += inNumPackets;
}
if (pAqData->mIsRunning == 0)
return;
OSStatus error = AudioQueueEnqueueBuffer(pAqData->mQueue, inBuffer, 0, NULL);
}
void DeriveBufferSize(AudioQueueRef audioQueue, AudioStreamBasicDescription *ASBDescription, Float64 seconds, UInt32 *outBufferSize) {
static const int maxBufferSize = 0x50000;
int maxPacketSize = ASBDescription->mBytesPerPacket;
if (maxPacketSize == 0) {
UInt32 maxVBRPacketSize = sizeof(maxPacketSize);
AudioQueueGetProperty(audioQueue, kAudioQueueProperty_MaximumOutputPacketSize, &maxPacketSize, &maxVBRPacketSize);
}
Float64 numBytesForTime = ASBDescription->mSampleRate * maxPacketSize * seconds;
*outBufferSize = (UInt32)(numBytesForTime < maxBufferSize ? numBytesForTime : maxBufferSize);
}
OSStatus SetMagicCookieForFile (AudioQueueRef inQueue, AudioFileID inFile) {
OSStatus result = noErr;
UInt32 cookieSize;
if (AudioQueueGetPropertySize (inQueue, kAudioQueueProperty_MagicCookie, &cookieSize) == noErr) {
char* magicCookie =
(char *) malloc (cookieSize);
if (AudioQueueGetProperty (inQueue, kAudioQueueProperty_MagicCookie, magicCookie, &cookieSize) == noErr)
result = AudioFileSetProperty (inFile, kAudioFilePropertyMagicCookieData, cookieSize, magicCookie);
free(magicCookie);
}
return result;
}
- (void)startRecording {
aqData.mDataFormat.mFormatID = kAudioFormatMPEG4AAC;
aqData.mDataFormat.mSampleRate = 22050.0;
aqData.mDataFormat.mChannelsPerFrame = 1;
aqData.mDataFormat.mBitsPerChannel = 0;
aqData.mDataFormat.mBytesPerPacket = 0;
aqData.mDataFormat.mBytesPerFrame = 0;
aqData.mDataFormat.mFramesPerPacket = 1024;
aqData.mDataFormat.mFormatFlags = kMPEG4Object_AAC_Main;
AudioFileTypeID fileType = kAudioFileAAC_ADTSType;
aqData.bufferByteSize = 16384;
UInt32 defaultToSpeaker = TRUE;
AudioSessionSetProperty(kAudioSessionProperty_OverrideCategoryDefaultToSpeaker, sizeof(defaultToSpeaker), &defaultToSpeaker);
OSStatus status = AudioQueueNewInput(&aqData.mDataFormat, HandleInputBuffer, &aqData, NULL, kCFRunLoopCommonModes, 0, &aqData.mQueue);
UInt32 dataFormatSize = sizeof (aqData.mDataFormat);
status = AudioQueueGetProperty(aqData.mQueue, kAudioQueueProperty_StreamDescription, &aqData.mDataFormat, &dataFormatSize);
status = AudioFileInitializeWithCallbacks(&aqData, nil, BufferFilled, nil, nil, fileType, &aqData.mDataFormat, 0, &aqData.mAudioFile);
for (int i = 0; i < kNumberBuffers; ++i) {
status = AudioQueueAllocateBuffer (aqData.mQueue, aqData.bufferByteSize, &aqData.mBuffers[i]);
status = AudioQueueEnqueueBuffer (aqData.mQueue, aqData.mBuffers[i], 0, NULL);
}
aqData.mCurrentPacket = 0;
aqData.mIsRunning = true;
status = AudioQueueStart(aqData.mQueue, NULL);
}
UPDATE: I have logged the data that I receive and it is quite interesting, it almost seems like half of the "packets" are some kind of header and half is sound data. Could I assume this is just how the AAC encoding on iOS works? It writes header in one buffer, then data in the next one and so on. And it never wants more than around 170-180 bytes for each data chunk and that is why it ignores my large buffer?
I solved this eventually. Turns out that yes the encoding on iOS produces small and large chunks of data. I added a second phase buffer myself using NSMutableData and it worked perfectly.
I'm looking to pan a mono signal using MTAudioProcessingTap and a Multichannel Mixer audio unit, but am getting a mono output instead of a panned, stereo output. The documentation states:
"The Multichannel Mixer unit (subtype
kAudioUnitSubType_MultiChannelMixer) takes any number of mono or
stereo streams and combines them into a single stereo output."
So, the mono output was unexpected. Any way around this? I ran a stereo signal through the exact same code and everything worked great: stereo output, panned as expected. Here's the code from my tap's prepare callback:
static void tap_PrepareCallback(MTAudioProcessingTapRef tap,
CMItemCount maxFrames,
const AudioStreamBasicDescription *processingFormat) {
AVAudioTapProcessorContext *context = (AVAudioTapProcessorContext *)MTAudioProcessingTapGetStorage(tap);
// Store sample rate for -setCenterFrequency:.
context->sampleRate = processingFormat->mSampleRate;
/* Verify processing format (this is not needed for Audio Unit, but for RMS calculation). */
context->supportedTapProcessingFormat = true;
if (processingFormat->mFormatID != kAudioFormatLinearPCM) {
NSLog(#"Unsupported audio format ID for audioProcessingTap. LinearPCM only.");
context->supportedTapProcessingFormat = false;
}
if (!(processingFormat->mFormatFlags & kAudioFormatFlagIsFloat)) {
NSLog(#"Unsupported audio format flag for audioProcessingTap. Float only.");
context->supportedTapProcessingFormat = false;
}
if (processingFormat->mFormatFlags & kAudioFormatFlagIsNonInterleaved) {
context->isNonInterleaved = true;
}
AudioUnit audioUnit;
AudioComponentDescription audioComponentDescription;
audioComponentDescription.componentType = kAudioUnitType_Mixer;
audioComponentDescription.componentSubType = kAudioUnitSubType_MultiChannelMixer;
audioComponentDescription.componentManufacturer = kAudioUnitManufacturer_Apple;
audioComponentDescription.componentFlags = 0;
audioComponentDescription.componentFlagsMask = 0;
AudioComponent audioComponent = AudioComponentFindNext(NULL, &audioComponentDescription);
if (audioComponent) {
if (noErr == AudioComponentInstanceNew(audioComponent, &audioUnit)) {
OSStatus status = noErr;
// Set audio unit input/output stream format to processing format.
if (noErr == status) {
status = AudioUnitSetProperty(audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input,
0,
processingFormat,
sizeof(AudioStreamBasicDescription));
}
if (noErr == status) {
status = AudioUnitSetProperty(audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output,
0,
processingFormat,
sizeof(AudioStreamBasicDescription));
}
// Set audio unit render callback.
if (noErr == status) {
AURenderCallbackStruct renderCallbackStruct;
renderCallbackStruct.inputProc = AU_RenderCallback;
renderCallbackStruct.inputProcRefCon = (void *)tap;
status = AudioUnitSetProperty(audioUnit,
kAudioUnitProperty_SetRenderCallback,
kAudioUnitScope_Input,
0,
&renderCallbackStruct,
sizeof(AURenderCallbackStruct));
}
// Set audio unit maximum frames per slice to max frames.
if (noErr == status) {
UInt32 maximumFramesPerSlice = (UInt32)maxFrames;
status = AudioUnitSetProperty(audioUnit,
kAudioUnitProperty_MaximumFramesPerSlice,
kAudioUnitScope_Global,
0,
&maximumFramesPerSlice,
(UInt32)sizeof(UInt32));
}
// Initialize audio unit.
if (noErr == status) {
status = AudioUnitInitialize(audioUnit);
}
if (noErr != status) {
AudioComponentInstanceDispose(audioUnit);
audioUnit = NULL;
}
context->audioUnit = audioUnit;
}
}
NSLog(#"Tap channels: %d",processingFormat->mChannelsPerFrame); // = 1 for mono source file
}
I've tried a few different options for the output stream format, e.g., AVAudioFormat *outFormat = [[AVAudioFormat alloc] initStandardFormatWithSampleRate:processingFormat->mSampleRate channels:2];, but get this error each time: "Client did not see 20 I/O cycles; giving up." Here's the code that creates the exact same ASBD as the input format except for 2 channels instead of one, and this gives the same "20 I/O cycles" error too:
AudioStreamBasicDescription asbd;
asbd.mFormatID = kAudioFormatLinearPCM;
asbd.mFormatFlags = 0x29;
asbd.mSampleRate = 44100;
asbd.mBitsPerChannel = 32;
asbd.mChannelsPerFrame = 2;
asbd.mBytesPerFrame = 4;
asbd.mFramesPerPacket = 1;
asbd.mBytesPerPacket = 4;
asbd.mReserved = 0;
I am trying to take a video file read it in using AVAssetReader and pass the audio off to CoreAudio for processing (adding effects and stuff) before saving it back out to disk using AVAssetWriter. I would like to point out that if i set the componentSubType on AudioComponentDescription of my output node as RemoteIO, things play correctly though the speakers. This makes me confident that my AUGraph is properly setup as I can hear things working. I am setting the subType to GenericOutput though so I can do the rendering myself and get back the adjusted audio.
I am reading in the audio and i pass the CMSampleBufferRef off to copyBuffer. This puts the audio into a circular buffer that will be read in later.
- (void)copyBuffer:(CMSampleBufferRef)buf {
if (_readyForMoreBytes == NO)
{
return;
}
AudioBufferList abl;
CMBlockBufferRef blockBuffer;
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(buf, NULL, &abl, sizeof(abl), NULL, NULL, kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment, &blockBuffer);
UInt32 size = (unsigned int)CMSampleBufferGetTotalSampleSize(buf);
BOOL bytesCopied = TPCircularBufferProduceBytes(&circularBuffer, abl.mBuffers[0].mData, size);
if (!bytesCopied){
/
_readyForMoreBytes = NO;
if (size > kRescueBufferSize){
NSLog(#"Unable to allocate enought space for rescue buffer, dropping audio frame");
} else {
if (rescueBuffer == nil) {
rescueBuffer = malloc(kRescueBufferSize);
}
rescueBufferSize = size;
memcpy(rescueBuffer, abl.mBuffers[0].mData, size);
}
}
CFRelease(blockBuffer);
if (!self.hasBuffer && bytesCopied > 0)
{
self.hasBuffer = YES;
}
}
Next I call processOutput. This will do a manual reder on the outputUnit. When AudioUnitRender is called it invokes the playbackCallback below, which is what is hooked up as input callback on my first node. playbackCallback pulls the data off the circular buffer and feeds it into the audioBufferList passed in. Like I said before if the output is set as RemoteIO this will cause the audio to correctly be played on the speakers. When AudioUnitRender finishes, it returns noErr and the bufferList object contains valid data. When I call CMSampleBufferSetDataBufferFromAudioBufferList though I get kCMSampleBufferError_RequiredParameterMissing (-12731).
-(CMSampleBufferRef)processOutput
{
if(self.offline == NO)
{
return NULL;
}
AudioUnitRenderActionFlags flags = 0;
AudioTimeStamp inTimeStamp;
memset(&inTimeStamp, 0, sizeof(AudioTimeStamp));
inTimeStamp.mFlags = kAudioTimeStampSampleTimeValid;
UInt32 busNumber = 0;
UInt32 numberFrames = 512;
inTimeStamp.mSampleTime = 0;
UInt32 channelCount = 2;
AudioBufferList *bufferList = (AudioBufferList*)malloc(sizeof(AudioBufferList)+sizeof(AudioBuffer)*(channelCount-1));
bufferList->mNumberBuffers = channelCount;
for (int j=0; j<channelCount; j++)
{
AudioBuffer buffer = {0};
buffer.mNumberChannels = 1;
buffer.mDataByteSize = numberFrames*sizeof(SInt32);
buffer.mData = calloc(numberFrames,sizeof(SInt32));
bufferList->mBuffers[j] = buffer;
}
CheckError(AudioUnitRender(outputUnit, &flags, &inTimeStamp, busNumber, numberFrames, bufferList), #"AudioUnitRender outputUnit");
CMSampleBufferRef sampleBufferRef = NULL;
CMFormatDescriptionRef format = NULL;
CMSampleTimingInfo timing = { CMTimeMake(1, 44100), kCMTimeZero, kCMTimeInvalid };
AudioStreamBasicDescription audioFormat = self.audioFormat;
CheckError(CMAudioFormatDescriptionCreate(kCFAllocatorDefault, &audioFormat, 0, NULL, 0, NULL, NULL, &format), #"CMAudioFormatDescriptionCreate");
CheckError(CMSampleBufferCreate(kCFAllocatorDefault, NULL, false, NULL, NULL, format, numberFrames, 1, &timing, 0, NULL, &sampleBufferRef), #"CMSampleBufferCreate");
CheckError(CMSampleBufferSetDataBufferFromAudioBufferList(sampleBufferRef, kCFAllocatorDefault, kCFAllocatorDefault, 0, bufferList), #"CMSampleBufferSetDataBufferFromAudioBufferList");
return sampleBufferRef;
}
static OSStatus playbackCallback(void *inRefCon,
AudioUnitRenderActionFlags *ioActionFlags,
const AudioTimeStamp *inTimeStamp,
UInt32 inBusNumber,
UInt32 inNumberFrames,
AudioBufferList *ioData)
{
int numberOfChannels = ioData->mBuffers[0].mNumberChannels;
SInt16 *outSample = (SInt16 *)ioData->mBuffers[0].mData;
/
memset(outSample, 0, ioData->mBuffers[0].mDataByteSize);
MyAudioPlayer *p = (__bridge MyAudioPlayer *)inRefCon;
if (p.hasBuffer){
int32_t availableBytes;
SInt16 *bufferTail = TPCircularBufferTail([p getBuffer], &availableBytes);
int32_t requestedBytesSize = inNumberFrames * kUnitSize * numberOfChannels;
int bytesToRead = MIN(availableBytes, requestedBytesSize);
memcpy(outSample, bufferTail, bytesToRead);
TPCircularBufferConsume([p getBuffer], bytesToRead);
if (availableBytes <= requestedBytesSize*2){
[p setReadyForMoreBytes];
}
if (availableBytes <= requestedBytesSize) {
p.hasBuffer = NO;
}
}
return noErr;
}
The CMSampleBufferRef I pass in looks valid (below is a dump of the object from the debugger)
CMSampleBuffer 0x7f87d2a03120 retainCount: 1 allocator: 0x103333180
invalid = NO
dataReady = NO
makeDataReadyCallback = 0x0
makeDataReadyRefcon = 0x0
formatDescription = <CMAudioFormatDescription 0x7f87d2a02b20 [0x103333180]> {
mediaType:'soun'
mediaSubType:'lpcm'
mediaSpecific: {
ASBD: {
mSampleRate: 44100.000000
mFormatID: 'lpcm'
mFormatFlags: 0xc2c
mBytesPerPacket: 2
mFramesPerPacket: 1
mBytesPerFrame: 2
mChannelsPerFrame: 1
mBitsPerChannel: 16 }
cookie: {(null)}
ACL: {(null)}
}
extensions: {(null)}
}
sbufToTrackReadiness = 0x0
numSamples = 512
sampleTimingArray[1] = {
{PTS = {0/1 = 0.000}, DTS = {INVALID}, duration = {1/44100 = 0.000}},
}
dataBuffer = 0x0
The buffer list looks like this
Printing description of bufferList:
(AudioBufferList *) bufferList = 0x00007f87d280b0a0
Printing description of bufferList->mNumberBuffers:
(UInt32) mNumberBuffers = 2
Printing description of bufferList->mBuffers:
(AudioBuffer [1]) mBuffers = {
[0] = (mNumberChannels = 1, mDataByteSize = 2048, mData = 0x00007f87d3008c00)
}
Really at a loss here, hoping someone can help. Thanks,
In case it matters i am debugging this in ios 8.3 simulator and the audio is coming from a mp4 that i shot on my iphone 6 then saved to my laptop.
I have read the following issues, however still to no avail, things are not working.
How to convert AudioBufferList to CMSampleBuffer?
Converting an AudioBufferList to a CMSampleBuffer Produces Unexpected Results
CMSampleBufferSetDataBufferFromAudioBufferList returning error 12731
core audio offline rendering GenericOutput
UPDATE
I poked around some more and notice that when my AudioBufferList right before AudioUnitRender runs looks like this:
bufferList->mNumberBuffers = 2,
bufferList->mBuffers[0].mNumberChannels = 1,
bufferList->mBuffers[0].mDataByteSize = 2048
mDataByteSize is numberFrames*sizeof(SInt32), which is 512 * 4. When I look at the AudioBufferList passed in playbackCallback, the list looks like this:
bufferList->mNumberBuffers = 1,
bufferList->mBuffers[0].mNumberChannels = 1,
bufferList->mBuffers[0].mDataByteSize = 1024
not really sure where that other buffer is going, or the other 1024 byte size...
if when i get finished calling Redner if I do something like this
AudioBufferList newbuff;
newbuff.mNumberBuffers = 1;
newbuff.mBuffers[0] = bufferList->mBuffers[0];
newbuff.mBuffers[0].mDataByteSize = 1024;
and pass newbuff off to CMSampleBufferSetDataBufferFromAudioBufferList the error goes away.
If I try setting the size of BufferList to have 1 mNumberBuffers or its mDataByteSize to be numberFrames*sizeof(SInt16) I get a -50 when calling AudioUnitRender
UPDATE 2
I hooked up a render callback so I can inspect the output when I play the sound over the speakers. I noticed that the output that goes to the speakers also has a AudioBufferList with 2 buffers, and the mDataByteSize during the input callback is 1024 and in the render callback its 2048, which is the same as I have been seeing when manually calling AudioUnitRender. When I inspect the data in the rendered AudioBufferList I notice that the bytes in the 2 buffers are the same, which means I can just ignore the second buffer. But I am not sure how to handle the fact that the data is 2048 in size after being rendered instead of 1024 as it's being taken in. Any ideas on why that could be happening? Is it in more of a raw form after going through the audio graph and that is why the size is doubling?
Sounds like the issue you're dealing with is because of a discrepancy in the number of channels. The reason you're seeing data in blocks of 2048 instead of 1024 is because it is feeding you back two channels (stereo). Check to make sure all of your audio units are properly configured to use mono throughout the entire audio graph, including the Pitch Unit and any audio format descriptions.
One thing to especially beware of is that calls to AudioUnitSetProperty can fail - so be sure to wrap those in CheckError() as well.
I'm using kxmovie (it's a ffmpeg-based video player) as a base for an app and I'm trying to figure out how the RemoteI/O unit works on iOS when the only thing connected to a device is headphones and we're playing a track with more than 2 channels (say a surround 6 track channel). It seems like it is going with the output channel setting and the buffer only has 2 channels. Is this because of Core Audio's pull structure? And if so, what's happening to the other channels in the track? Are they being downmixed or simply ignored?
The code for the render callback connected to the remoteio unit is here:
- (BOOL) renderFrames: (UInt32) numFrames
ioData: (AudioBufferList *) ioData
{
NSLog(#"Number of channels in buffer: %lu",ioData->mNumberBuffers);
for (int iBuffer=0; iBuffer < ioData->mNumberBuffers; ++iBuffer) {
memset(ioData->mBuffers[iBuffer].mData, 0, ioData->mBuffers[iBuffer].mDataByteSize);
}
if (_playing && _outputBlock ) {
// Collect data to render from the callbacks
_outputBlock(_outData, numFrames, _numOutputChannels);
// Put the rendered data into the output buffer
if (_numBytesPerSample == 4) // then we've already got floats
{
float zero = 0.0;
for (int iBuffer=0; iBuffer < ioData->mNumberBuffers; ++iBuffer) {
int thisNumChannels = ioData->mBuffers[iBuffer].mNumberChannels;
for (int iChannel = 0; iChannel < thisNumChannels; ++iChannel) {
vDSP_vsadd(_outData+iChannel, _numOutputChannels, &zero, (float *)ioData->mBuffers[iBuffer].mData, thisNumChannels, numFrames);
}
}
}
else if (_numBytesPerSample == 2) // then we need to convert SInt16 -> Float (and also scale)
{
float scale = (float)INT16_MAX;
vDSP_vsmul(_outData, 1, &scale, _outData, 1, numFrames*_numOutputChannels);
for (int iBuffer=0; iBuffer < ioData->mNumberBuffers; ++iBuffer) {
int thisNumChannels = ioData->mBuffers[iBuffer].mNumberChannels;
for (int iChannel = 0; iChannel < thisNumChannels; ++iChannel) {
vDSP_vfix16(_outData+iChannel, _numOutputChannels, (SInt16 *)ioData->mBuffers[iBuffer].mData+iChannel, thisNumChannels, numFrames);
}
}
}
}
return noErr;
}
Thanks!
edit: Here's the code for the ASBD (_ouputFormat). It's getting its values straight from the remoteio. You can also check the whole method file here.
if (checkError(AudioUnitGetProperty(_audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input,
0,
&_outputFormat,
&size),
"Couldn't get the hardware output stream format"))
return NO;
_outputFormat.mSampleRate = _samplingRate;
if (checkError(AudioUnitSetProperty(_audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input,
0,
&_outputFormat,
size),
"Couldn't set the hardware output stream format")) {
// just warning
}
_numBytesPerSample = _outputFormat.mBitsPerChannel / 8;
_numOutputChannels = _outputFormat.mChannelsPerFrame;
NSLog(#"Current output bytes per sample: %ld", _numBytesPerSample);
NSLog(#"Current output num channels: %ld", _numOutputChannels);
// Slap a render callback on the unit
AURenderCallbackStruct callbackStruct;
callbackStruct.inputProc = renderCallback;
callbackStruct.inputProcRefCon = (__bridge void *)(self);
if (checkError(AudioUnitSetProperty(_audioUnit,
kAudioUnitProperty_SetRenderCallback,
kAudioUnitScope_Input,
0,
&callbackStruct,
sizeof(callbackStruct)),
"Couldn't set the render callback on the audio unit"))
return NO;
I finally found the piece of code that's making it remix channels to stereo. It sets a property in KxAudioManager using the ASBD of the RIO. And then, in KxMovieDecoder.m, it sets ffmpeg options using that same variable. Here's the code:
id<KxAudioManager> audioManager = [KxAudioManager audioManager];
swrContext = swr_alloc_set_opts(NULL,
av_get_default_channel_layout(audioManager.numOutputChannels),
AV_SAMPLE_FMT_S16,
audioManager.samplingRate,
av_get_default_channel_layout(codecCtx->channels),
codecCtx->sample_fmt,
codecCtx->sample_rate,
0,
NULL);
Now it's off to do some reading on how ffmpeg is doing the decoding. Fun times.
I'm using Remote IO to get the audio buffer from PCM, I want to real-time send the data to Darwin Server by cellular network (3G network). I choose The AAC format as there is an article from Fraunhofer called "AAC-ELD based Audio Communication on iOS A Developer’s Guide". The sample code works great. The audio is recorded in LPCM format and encoded to AACELD and decoded back to LPCM and finally performed playback immediately, but it's AACELD(Enhanced Low Delay) format. When I change the format from "kAudioFormatMPEG4AAC_ELD" to "kAudioFormatMPEG4AAC". I can hear the audio for 1 second and the audio is stuck for the next 1 second and the pattern continues. And the audio is twice as frequent as the reality which means the sound last 1 second in real world will only last 0.5 second for playback. I then change the sample frame size from 512 to 1024. the frequency is normal but I can hear the audio for 2 second and it is stuck for the next 2 seconds and the pattern continues... I figured out that the AudioConverterFillComplexBuffer function fails for 2 seconds and then works well in the next 2 seconds. I don't know why. Please help. Thanks in advance.
I really didn't change much of the code just changed the formatID and sample frame size from 512 to 1024
The article is here: http://www.full-hd-voice.com/content/dam/fullhdvoice/documents/iOS-ACE-AP-v2.pdf
1.global variables
static AudioBuffer g_inputBuffer;
static AudioBuffer g_outputBuffer;
static AudioComponentInstance g_audioUnit;
static AudioUnitElement g_outputBus = 0;
static AudioUnitElement g_inputBus = 1;
static UInt32 g_outChannels = 2;
static UInt32 g_inChannels = 1;
static UInt32 g_frameSize = 1024;
static UInt32 g_inputByteSize = 0;
static UInt32 g_outputByteSize = 0;
static unsigned int g_initialized = 0;
static AACELDEncoder *g_encoder = NULL;
static AACELDDecoder *g_decoder = NULL;
static MagicCookie g_cookie;
/* Structure to keep the encoder configuration */
typedef struct EncoderProperties_
{
Float64 samplingRate;
UInt32 inChannels;
UInt32 outChannels;
UInt32 frameSize;
UInt32 bitrate;
} EncoderProperties;
/* Structure to keep the magic cookie */
typedef struct MagicCookie_
{
void *data;
int byteSize;
} MagicCookie;
/* Structure to keep one encoded AU */
typedef struct EncodedAudioBuffer_
{
UInt32 mChannels;
UInt32 mDataBytesSize;
void *data;
} EncodedAudioBuffer;
typedef struct DecoderProperties_
{
Float64 samplingRate;
UInt32 inChannels;
UInt32 outChannels;
UInt32 frameSize;
} DecoderProperties;
2.initialise Audio Session and Audio Unit and encoder&decoder
void InitAudioUnit()
{
/* Calculate the required input and output buffer sizes */
g_inputByteSize = g_frameSize * g_inChannels * sizeof(AudioSampleType);
g_outputByteSize = g_frameSize * g_outChannels * sizeof(AudioSampleType);
/* Initialize the I/O buffers */
g_inputBuffer.mNumberChannels = g_inChannels;
g_inputBuffer.mDataByteSize = g_inputByteSize;
if (g_initialized)
free(g_inputBuffer.mData);
g_inputBuffer.mData = malloc(sizeof(unsigned char)*g_inputByteSize);
memset(g_inputBuffer.mData, 0, g_inputByteSize);
g_outputBuffer.mNumberChannels = g_outChannels;
g_outputBuffer.mDataByteSize = g_outputByteSize;
if (g_initialized)
free(g_outputBuffer.mData);
g_outputBuffer.mData = malloc(sizeof(unsigned char)*g_outputByteSize);
memset(g_outputBuffer.mData, 0, g_outputByteSize);
g_initialized = 1;
/* Initialize the audio session */
AudioSessionInitialize(NULL, NULL, interruptionListener, NULL);
/* Activate the audio session */
AudioSessionSetActive(TRUE);
/* Enable recording for full-duplex I/O */
UInt32 audioCategory = kAudioSessionCategory_PlayAndRecord;
AudioSessionSetProperty(kAudioSessionProperty_AudioCategory,
sizeof(audioCategory),
&audioCategory);
/* Set the route change listener */
AudioSessionAddPropertyListener(kAudioSessionProperty_AudioRouteChange,
routeChangeListener,
NULL);
/* Set the preferred buffer time */
Float32 preferredBufferTime = 1024.0 / 44100.0;
AudioSessionSetProperty(kAudioSessionProperty_PreferredHardwareIOBufferDuration,
sizeof(preferredBufferTime),
&preferredBufferTime);
/* Setup the audio component for I/O */
AudioComponentDescription componentDesc;
memset(&componentDesc, 0, sizeof(componentDesc));
componentDesc.componentType = kAudioUnitType_Output;
componentDesc.componentSubType = kAudioUnitSubType_RemoteIO;
componentDesc.componentManufacturer = kAudioUnitManufacturer_Apple;
/* Find and create the audio component */
AudioComponent auComponent = AudioComponentFindNext(NULL, &componentDesc);
AudioComponentInstanceNew(auComponent, &g_audioUnit);
/* Enable the audio input */
UInt32 enableAudioInput = 1;
AudioUnitSetProperty(g_audioUnit,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input,
g_inputBus,
&enableAudioInput,
sizeof(enableAudioInput));
/* Setup the render callback */
AURenderCallbackStruct renderCallbackInfo;
renderCallbackInfo.inputProc = audioUnitRenderCallback;
renderCallbackInfo.inputProcRefCon = NULL;
AudioUnitSetProperty(g_audioUnit,
kAudioUnitProperty_SetRenderCallback,
kAudioUnitScope_Input,
g_outputBus,
&renderCallbackInfo,
sizeof(renderCallbackInfo));
/* Set the input and output audio stream formats */
AudioStreamBasicDescription audioFormat;
audioFormat.mSampleRate = 44100;
audioFormat.mFormatID = kAudioFormatLinearPCM;
audioFormat.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
audioFormat.mFramesPerPacket = 1;
audioFormat.mBitsPerChannel = 8 * sizeof(AudioSampleType);
audioFormat.mChannelsPerFrame = g_inChannels;
audioFormat.mBytesPerFrame = audioFormat.mChannelsPerFrame * sizeof(AudioSampleType);
audioFormat.mBytesPerPacket = audioFormat.mBytesPerFrame;
AudioUnitSetProperty(g_audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output,
g_inputBus,
&audioFormat,
sizeof(audioFormat));
audioFormat.mChannelsPerFrame = g_outChannels;
audioFormat.mBytesPerFrame = audioFormat.mChannelsPerFrame * sizeof(AudioSampleType);
audioFormat.mBytesPerPacket = audioFormat.mBytesPerFrame;
AudioUnitSetProperty(g_audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input,
g_outputBus,
&audioFormat,
sizeof(audioFormat));
/* Initialize the ELD codec */
InitAACELD();
}
void InitAACELD()
{
EncoderProperties p;
p.samplingRate = 44100.0;
p.inChannels = 1;
p.outChannels = 1;
p.frameSize = 1024;
p.bitrate = 32000;
g_encoder = CreateAACELDEncoder();
InitAACELDEncoder(g_encoder, p, &g_cookie);
DecoderProperties dp;
dp.samplingRate = 44100.0;
dp.inChannels = 1;
dp.outChannels = 2;
dp.frameSize = p.frameSize;
g_decoder = CreateAACELDDecoder();
InitAACELDDecoder(g_decoder, dp, &g_cookie);
}
int InitAACELDEncoder(AACELDEncoder *encoder, EncoderProperties props, MagicCookie *outCookie)
{
/* Copy the provided encoder properties */
encoder->inChannels = props.inChannels;
encoder->outChannels = props.outChannels;
encoder->samplingRate = props.samplingRate;
encoder->frameSize = props.frameSize;
encoder->bitrate = props.bitrate;
/* Convenience macro to fill out the ASBD structure.
Available only when __cplusplus is defined! */
FillOutASBDForLPCM(encoder->sourceFormat,
encoder->samplingRate,
encoder->inChannels,
8*sizeof(AudioSampleType),
8*sizeof(AudioSampleType),
false,
false);
/* Set the format parameters for AAC-ELD encoding. */
encoder->destinationFormat.mFormatID = kAudioFormatMPEG4AAC;
encoder->destinationFormat.mChannelsPerFrame = encoder->outChannels;
encoder->destinationFormat.mSampleRate = encoder->samplingRate;
/* Get the size of the formatinfo structure */
UInt32 dataSize = sizeof(encoder->destinationFormat);
/* Request the propertie from CoreAudio */
AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
0,
NULL,
&dataSize,
&(encoder->destinationFormat));
/* Create a new audio converter */
AudioConverterNew(&(encoder->sourceFormat),
&(encoder->destinationFormat),
&(encoder->audioConverter));
if (!encoder->audioConverter)
{
return -1;
}
/* Try to set the desired output bitrate */
UInt32 outputBitrate = encoder->bitrate;
dataSize = sizeof(outputBitrate);
AudioConverterSetProperty(encoder->audioConverter,
kAudioConverterEncodeBitRate,
dataSize,
&outputBitrate);
/* Query the maximum possible output packet size */
if (encoder->destinationFormat.mBytesPerPacket == 0)
{
UInt32 maxOutputSizePerPacket = 0;
dataSize = sizeof(maxOutputSizePerPacket);
AudioConverterGetProperty(encoder->audioConverter,
kAudioConverterPropertyMaximumOutputPacketSize,
&dataSize,
&maxOutputSizePerPacket);
encoder->maxOutputPacketSize = maxOutputSizePerPacket;
}
else
{
encoder->maxOutputPacketSize = encoder->destinationFormat.mBytesPerPacket;
}
/* Fetch the Magic Cookie from the ELD implementation */
UInt32 cookieSize = 0;
AudioConverterGetPropertyInfo(encoder->audioConverter,
kAudioConverterCompressionMagicCookie,
&cookieSize,
NULL);
char* cookie = (char*)malloc(cookieSize*sizeof(char));
AudioConverterGetProperty(encoder->audioConverter,
kAudioConverterCompressionMagicCookie,
&cookieSize,
cookie);
outCookie->data = cookie;
outCookie->byteSize = cookieSize;
/* Prepare the temporary AU buffer for encoding */
encoder->encoderBuffer = malloc(encoder->maxOutputPacketSize);
return 0;
}
int InitAACELDDecoder(AACELDDecoder* decoder, DecoderProperties props, const MagicCookie *cookie)
{
/* Copy the provided decoder properties */
decoder->inChannels = props.inChannels;
decoder->outChannels = props.outChannels;
decoder->samplingRate = props.samplingRate;
decoder->frameSize = props.frameSize;
/* We will decode to LPCM */
FillOutASBDForLPCM(decoder->destinationFormat,
decoder->samplingRate,
decoder->outChannels,
8*sizeof(AudioSampleType),
8*sizeof(AudioSampleType),
false,
false);
/* from AAC-ELD, having the same sampling rate, but possibly a different channel configuration */
decoder->sourceFormat.mFormatID = kAudioFormatMPEG4AAC;
decoder->sourceFormat.mChannelsPerFrame = decoder->inChannels;
decoder->sourceFormat.mSampleRate = decoder->samplingRate;
/* Get the rest of the format info */
UInt32 dataSize = sizeof(decoder->sourceFormat);
AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
0,
NULL,
&dataSize,
&(decoder->sourceFormat));
/* Create a new AudioConverter instance for the conversion AAC-ELD -> LPCM */
AudioConverterNew(&(decoder->sourceFormat),
&(decoder->destinationFormat),
&(decoder->audioConverter));
if (!decoder->audioConverter)
{
return -1;
}
/* Check for variable output packet size */
if (decoder->destinationFormat.mBytesPerPacket == 0)
{
UInt32 maxOutputSizePerPacket = 0;
dataSize = sizeof(maxOutputSizePerPacket);
AudioConverterGetProperty(decoder->audioConverter,
kAudioConverterPropertyMaximumOutputPacketSize,
&dataSize,
&maxOutputSizePerPacket);
decoder->maxOutputPacketSize = maxOutputSizePerPacket;
}
else
{
decoder->maxOutputPacketSize = decoder->destinationFormat.mBytesPerPacket;
}
/* Set the corresponding encoder cookie */
AudioConverterSetProperty(decoder->audioConverter,
kAudioConverterDecompressionMagicCookie,
cookie->byteSize,
cookie->data);
return 0;
}
3.Render Callback and encoder & decoder
static OSStatus audioUnitRenderCallback(void *inRefCon,
AudioUnitRenderActionFlags *ioActionFlags,
const AudioTimeStamp *inTimeStamp,
UInt32 inBusNumber,
UInt32 inNumberOfFrames,
AudioBufferList *ioData)
{
/* Get the input samples */
AudioUnitRender(g_audioUnit,
ioActionFlags,
inTimeStamp,
g_inputBus,
inNumberOfFrames,
ioData);
/* Copy to global input buffer */
memcpy(g_inputBuffer.mData, ioData->mBuffers[0].mData, g_inputBuffer.mDataByteSize);
/* Encode with AudioConverter */
EncodedAudioBuffer encodedAU;
EncodeAACELD(g_encoder, &g_inputBuffer, &encodedAU);
/* Decode with AudioConverter */
g_outputBuffer.mDataByteSize = g_outputByteSize;
DecodeAACELD(g_decoder, &encodedAU, &g_outputBuffer);
/* Copy output samples to Audio Units' IO buffer */
ioData->mBuffers[0].mNumberChannels = g_outputBuffer.mNumberChannels;
ioData->mBuffers[0].mDataByteSize = g_outputBuffer.mDataByteSize;
memcpy(ioData->mBuffers[0].mData, g_outputBuffer.mData, g_outputBuffer.mDataByteSize);
return noErr;
}
static OSStatus encodeProc(AudioConverterRef inAudioConverter,
UInt32 *ioNumberDataPackets,
AudioBufferList *ioData,
AudioStreamPacketDescription **outDataPacketDescription,
void *inUserData)
{
/* Get the current encoder state from the inUserData parameter */
AACELDEncoder *encoder = (AACELDEncoder*) inUserData;
/* Compute the maximum number of output packets */
UInt32 maxPackets = encoder->bytesToEncode / encoder->sourceFormat.mBytesPerPacket;
if (*ioNumberDataPackets > maxPackets)
{
/* If requested number of packets is bigger, adjust */
*ioNumberDataPackets = maxPackets;
}
/* Check to make sure we have only one audio buffer */
if (ioData->mNumberBuffers != 1)
{
return 1;
}
/* Set the data to be encoded */
ioData->mBuffers[0].mDataByteSize = encoder->currentSampleBuffer->mDataByteSize;
ioData->mBuffers[0].mData = encoder->currentSampleBuffer->mData;
ioData->mBuffers[0].mNumberChannels = encoder->currentSampleBuffer->mNumberChannels;
if (outDataPacketDescription)
{
*outDataPacketDescription = NULL;
}
if (encoder->bytesToEncode == 0)
{
// We are currently out of data but want to keep on processing
// See Apple Technical Q&A QA1317
return 1;
}
encoder->bytesToEncode = 0;
return noErr;
}
int EncodeAACELD(AACELDEncoder *encoder, AudioBuffer *inSamples, EncodedAudioBuffer *outData)
{
/* Clear the encoder buffer */
memset(encoder->encoderBuffer, 0, sizeof(encoder->maxOutputPacketSize));
/* Keep a reference to the samples that should be encoded */
encoder->currentSampleBuffer = inSamples;
encoder->bytesToEncode = inSamples->mDataByteSize;
UInt32 numOutputDataPackets = 1;
AudioStreamPacketDescription outPacketDesc[1];
/* Create the output buffer list */
AudioBufferList outBufferList;
outBufferList.mNumberBuffers = 1;
outBufferList.mBuffers[0].mNumberChannels = encoder->outChannels;
outBufferList.mBuffers[0].mDataByteSize = encoder->maxOutputPacketSize;
outBufferList.mBuffers[0].mData = encoder->encoderBuffer;
/* Start the encoding process */
OSStatus status = AudioConverterFillComplexBuffer(encoder->audioConverter,
encodeProc,
encoder,
&numOutputDataPackets,
&outBufferList,
outPacketDesc);
if (status != noErr)
{
return -1;
}
/* Set the ouput data */
outData->mChannels = encoder->outChannels;
outData->data = encoder->encoderBuffer;
outData->mDataBytesSize = outPacketDesc[0].mDataByteSize;
return 0;
}
static OSStatus decodeProc(AudioConverterRef inAudioConverter,
UInt32 *ioNumberDataPackets,
AudioBufferList *ioData,
AudioStreamPacketDescription **outDataPacketDescription,
void *inUserData)
{
/* Get the current decoder state from the inUserData parameter */
AACELDDecoder *decoder = (AACELDDecoder*)inUserData;
/* Compute the maximum number of output packets */
UInt32 maxPackets = decoder->bytesToDecode / decoder->maxOutputPacketSize;
if (*ioNumberDataPackets > maxPackets)
{
/* If requested number of packets is bigger, adjust */
*ioNumberDataPackets = maxPackets;
}
/* If there is data to be decoded, set it accordingly */
if (decoder->bytesToDecode)
{
ioData->mBuffers[0].mData = decoder->decodeBuffer;
ioData->mBuffers[0].mDataByteSize = decoder->bytesToDecode;
ioData->mBuffers[0].mNumberChannels = decoder->inChannels;
}
/* And set the packet description */
if (outDataPacketDescription)
{
decoder->packetDesc[0].mStartOffset = 0;
decoder->packetDesc[0].mVariableFramesInPacket = 0;
decoder->packetDesc[0].mDataByteSize = decoder->bytesToDecode;
(*outDataPacketDescription) = decoder->packetDesc;
}
if (decoder->bytesToDecode == 0)
{
// We are currently out of data but want to keep on processing
// See Apple Technical Q&A QA1317
return 1;
}
decoder->bytesToDecode = 0;
return noErr;
}
int DecodeAACELD(AACELDDecoder* decoder, EncodedAudioBuffer *inData, AudioBuffer *outSamples)
{
OSStatus status = noErr;
/* Keep a reference to the samples that should be decoded */
decoder->decodeBuffer = inData->data;
decoder->bytesToDecode = inData->mDataBytesSize;
UInt32 outBufferMaxSizeBytes = decoder->frameSize * decoder->outChannels * sizeof(AudioSampleType);
assert(outSamples->mDataByteSize <= outBufferMaxSizeBytes);
UInt32 numOutputDataPackets = outBufferMaxSizeBytes / decoder->maxOutputPacketSize;
/* Output packet stream are 512 LPCM samples */
AudioStreamPacketDescription outputPacketDesc[1024];
/* Create the output buffer list */
AudioBufferList outBufferList;
outBufferList.mNumberBuffers = 1;
outBufferList.mBuffers[0].mNumberChannels = decoder->outChannels;
outBufferList.mBuffers[0].mDataByteSize = outSamples->mDataByteSize;
outBufferList.mBuffers[0].mData = outSamples->mData;
/* Start the decoding process */
status = AudioConverterFillComplexBuffer(decoder->audioConverter,
decodeProc,
decoder,
&numOutputDataPackets,
&outBufferList,
outputPacketDesc);
if (noErr != status)
{
return -1;
}
return 0;
}