I was trying to implement a function to stretch the sound speed, without changing it's pitch and time scale.
I try the method to set the frequency of channel to slow of fast the speed.
Then use FMOD_DSP_PITCHSHIFT to correct the pitch sounds as default.
I was using wav format sound file for test and build function.
I'm trying to intergrate product resource which sound file was encoded as MP3.
PITCHSHIFT DSP doesn't work at MP3 sound channel. console log looks fine with no exception & error.
Same project and setting everything works fine in iOS Simulator.
After some research and experiments, results indicates even m4a works fine at iOS.
I wonder is this some kind of bug? or I missed something at configuration.
sample code was based on FMOD Sample project Play stream.
`/*==============================================================================
Play Stream Example
Copyright (c), Firelight Technologies Pty, Ltd 2004-2015.
This example shows how to simply play a stream such as an MP3 or WAV. The stream
behaviour is achieved by specifying FMOD_CREATESTREAM in the call to
System::createSound. This makes FMOD decode the file in realtime as it plays,
instead of loading it all at once which uses far less memory in exchange for a
small runtime CPU hit.
==============================================================================*/
#include "fmod.hpp"
#include "common.h"
int FMOD_Main()
{
FMOD::System *system;
FMOD::Sound *sound, *sound_to_play;
FMOD::Channel *channel = 0;
FMOD_RESULT result;
FMOD::DSP * pitch_shift;
unsigned int version;
void *extradriverdata = 0;
int numsubsounds;
Common_Init(&extradriverdata);
/*
Create a System object and initialize.
*/
result = FMOD::System_Create(&system);
ERRCHECK(result);
result = system->getVersion(&version);
ERRCHECK(result);
if (version < FMOD_VERSION)
{
Common_Fatal("FMOD lib version %08x doesn't match header version %08x", version, FMOD_VERSION);
}
result = system->init(32, FMOD_INIT_NORMAL, extradriverdata);
ERRCHECK(result);
result = system->createDSPByType(FMOD_DSP_TYPE_PITCHSHIFT, &pitch_shift);
ERRCHECK(result);
/*
This example uses an FSB file, which is a preferred pack format for fmod containing multiple sounds.
This could just as easily be exchanged with a wav/mp3/ogg file for example, but in this case you wouldnt need to call getSubSound.
Because getNumSubSounds is called here the example would work with both types of sound file (packed vs single).
*/
result = system->createSound(Common_MediaPath("aaa.m4a"), FMOD_LOOP_NORMAL | FMOD_2D, 0, &sound);
ERRCHECK(result);
result = sound->getNumSubSounds(&numsubsounds);
ERRCHECK(result);
if (numsubsounds)
{
sound->getSubSound(0, &sound_to_play);
ERRCHECK(result);
}
else
{
sound_to_play = sound;
}
/*
Play the sound.
*/
result = system->playSound(sound_to_play, 0, false, &channel);
ERRCHECK(result);
result = channel->addDSP(0, pitch_shift);
ERRCHECK(result);
float pitch = 1.f;
result = pitch_shift->setParameterFloat(FMOD_DSP_PITCHSHIFT_PITCH, pitch);
ERRCHECK(result);
pitch_shift->setActive(true);
ERRCHECK(result);
float defaultFrequency;
result = channel->getFrequency(&defaultFrequency);
ERRCHECK(result);
/*
Main loop.
*/
do
{
Common_Update();
if (Common_BtnPress(BTN_ACTION1))
{
bool paused;
result = channel->getPaused(&paused);
ERRCHECK(result);
result = channel->setPaused(!paused);
ERRCHECK(result);
}
if (Common_BtnPress(BTN_DOWN)) {
char valuestr;
int valuestrlen;
pitch_shift->getParameterFloat(FMOD_DSP_PITCHSHIFT_PITCH, &pitch, &valuestr, valuestrlen);
pitch+=0.1f;
pitch = pitch>2.0f?2.0f:pitch;
pitch_shift->setParameterFloat(FMOD_DSP_PITCHSHIFT_PITCH, pitch);
channel->setFrequency(defaultFrequency/pitch);
}
if (Common_BtnPress(BTN_UP)) {
char valuestr;
int valuestrlen;
pitch_shift->getParameterFloat(FMOD_DSP_PITCHSHIFT_PITCH, &pitch, &valuestr, valuestrlen);
pitch-=0.1f;
pitch = pitch<0.5f?0.5f:pitch;
pitch_shift->setParameterFloat(FMOD_DSP_PITCHSHIFT_PITCH, pitch);
channel->setFrequency(defaultFrequency/pitch);
}
result = system->update();
ERRCHECK(result);
{
unsigned int ms = 0;
unsigned int lenms = 0;
bool playing = false;
bool paused = false;
if (channel)
{
result = channel->isPlaying(&playing);
if ((result != FMOD_OK) && (result != FMOD_ERR_INVALID_HANDLE))
{
ERRCHECK(result);
}
result = channel->getPaused(&paused);
if ((result != FMOD_OK) && (result != FMOD_ERR_INVALID_HANDLE))
{
ERRCHECK(result);
}
result = channel->getPosition(&ms, FMOD_TIMEUNIT_MS);
if ((result != FMOD_OK) && (result != FMOD_ERR_INVALID_HANDLE))
{
ERRCHECK(result);
}
result = sound_to_play->getLength(&lenms, FMOD_TIMEUNIT_MS);
if ((result != FMOD_OK) && (result != FMOD_ERR_INVALID_HANDLE))
{
ERRCHECK(result);
}
}
Common_Draw("==================================================");
Common_Draw("Play Stream Example.");
Common_Draw("Copyright (c) Firelight Technologies 2004-2015.");
Common_Draw("==================================================");
Common_Draw("");
Common_Draw("Press %s to toggle pause", Common_BtnStr(BTN_ACTION1));
Common_Draw("Press %s to quit", Common_BtnStr(BTN_QUIT));
Common_Draw("");
Common_Draw("Time %02d:%02d:%02d/%02d:%02d:%02d : %s", ms / 1000 / 60, ms / 1000 % 60, ms / 10 % 100, lenms / 1000 / 60, lenms / 1000 % 60, lenms / 10 % 100, paused ? "Paused " : playing ? "Playing" : "Stopped");
Common_Draw("Pitch %02f",pitch);
}
Common_Sleep(50);
} while (!Common_BtnPress(BTN_QUIT));
/*
Shut down
*/
result = sound->release(); /* Release the parent, not the sound that was retrieved with getSubSound. */
ERRCHECK(result);
result = system->close();
ERRCHECK(result);
result = system->release();
ERRCHECK(result);
Common_Close();
return 0;
}
`
After some more experiments , i can approach the destination through switch sound file format to m4a on iOS Device.
MP3 still not working .
Related
I am trying to understand one 4 channel mic-array code provided by ST(AMicArray_Microphones_Streaming).
In the code, the PCM buffer data is sent on the USB via this function. In this code, I actually want to do some processing on the received data and then I want to send it on USB. My question is how to extract the raw data and process them?
This topic seems very broad to a beginner person if someone can help me with some start material or guidelines would be appreciated.
//PCMSamples = AUDIO_IN_SAMPLING_FREQUENCY/1000*AUDIO_IN_CHANNELS;
uint8_t USBD_AUDIO_Data_Transfer(USBD_HandleTypeDef *pdev, int16_t * audioData, uint16_t PCMSamples)
{
USBD_AUDIO_HandleTypeDef *haudio;
haudio = (USBD_AUDIO_HandleTypeDef *)pdev->pClassData;
if(haudioInstance.state==STATE_USB_WAITING_FOR_INIT){
return USBD_BUSY;
}
uint16_t dataAmount = PCMSamples * 2; /*Bytes*/
uint16_t true_dim = haudio->buffer_length;
uint16_t current_data_Amount = haudio->dataAmount;
uint16_t packet_dim = haudio->paketDimension;
if(haudio->state==STATE_USB_REQUESTS_STARTED || current_data_Amount!=dataAmount){
/*USB parameters definition, based on the amount of data passed*/
haudio->dataAmount=dataAmount;
uint16_t wr_rd_offset = (AUDIO_IN_PACKET_NUM/2) * dataAmount / packet_dim;
haudio->wr_ptr=wr_rd_offset * packet_dim;
haudio->rd_ptr = 0;
haudio->upper_treshold = wr_rd_offset + 1;
haudio->lower_treshold = wr_rd_offset - 1;
haudio->buffer_length = (packet_dim * (dataAmount / packet_dim) * AUDIO_IN_PACKET_NUM);
/*Memory allocation for data buffer, depending (also) on data amount passed to the transfer function*/
if(haudio->buffer != NULL)
{
USBD_free(haudio->buffer);
}
haudio->buffer = USBD_malloc(haudio->buffer_length + haudio->dataAmount);
if(haudio->buffer == NULL)
{
return USBD_FAIL;
}
memset(haudio->buffer,0,(haudio->buffer_length + haudio->dataAmount));
haudio->state=STATE_USB_BUFFER_WRITE_STARTED;
}else if(haudio->state==STATE_USB_BUFFER_WRITE_STARTED){
if(haudio->timeout++==TIMEOUT_VALUE){
haudio->state=STATE_USB_IDLE;
((USBD_AUDIO_ItfTypeDef *)pdev->pUserData)->Stop();
haudio->timeout=0;
}
memcpy((uint8_t * )&haudio->buffer[haudio->wr_ptr], (uint8_t *)(audioData), dataAmount);
haudio->wr_ptr += dataAmount;
haudio->wr_ptr = haudio->wr_ptr % (true_dim);
if((haudio->wr_ptr-dataAmount) == 0){
memcpy((uint8_t *)(((uint8_t *)haudio->buffer)+true_dim),(uint8_t *)haudio->buffer, dataAmount);
}
}
return USBD_OK;
}
I am guessing you are using the X-Cube Audio library from ST, if so the
AudioProcess()
function in the Core/Src/audio_application.c file matches your needs.
void AudioProcess(void)
{
if (CCA02M2_AUDIO_IN_PDMToPCM(CCA02M2_AUDIO_INSTANCE,(uint16_t * )PDM_Buffer,(uint16_t *)PCM_Buffer) != BSP_ERROR_NONE)
{
Error_Handler();
}
Send_Audio_to_USB((int16_t *)PCM_Buffer, (AUDIO_IN_SAMPLING_FREQUENCY/1000)*AUDIO_IN_CHANNELS * N_MS);
}
The PDM and PCM data are available here for processing.
ST provides video tutorial on Audio acquisition here. You can refer their channel for more information.
My app does huge data processing on audio coming from the mic input.
In order to get a "demo mode", I want to do the same thing based on a local .caf audio file.
I managed to get the audio file.
Now I am trying to use ExtAudioFileRead to read the .caf file and then do the data processing.
void readFile()
{
OSStatus err = noErr;
// Audio file
NSURL *path = [[NSBundle mainBundle] URLForResource:#"output" withExtension:#"caf"];
ExtAudioFileOpenURL((__bridge CFURLRef)path, &audio->audiofile);
assert(audio->audiofile);
// File's format.
AudioStreamBasicDescription fileFormat;
UInt32 size = sizeof(fileFormat);
err = ExtAudioFileGetProperty(audio->audiofile, kExtAudioFileProperty_FileDataFormat, &size, &fileFormat);
// tell the ExtAudioFile API what format we want samples back in
//bzero(&audio->clientFormat, sizeof(audio->clientFormat));
audio->clientFormat.mSampleRate = SampleRate;
audio->clientFormat.mFormatID = kAudioFormatLinearPCM;
audio->clientFormat.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
audio->clientFormat.mFramesPerPacket = 1;
audio->clientFormat.mChannelsPerFrame = 1;
audio->clientFormat.mBitsPerChannel = 16;//sizeof(AudioSampleType) * 8;
audio->clientFormat.mBytesPerPacket = 2 * audio->clientFormat.mChannelsPerFrame;
audio->clientFormat.mBytesPerFrame = 2 * audio->clientFormat.mChannelsPerFrame;
err = ExtAudioFileSetProperty(audio->audiofile, kExtAudioFileProperty_ClientDataFormat, sizeof(audio->clientFormat), &audio->clientFormat);
// find out how many frames we need to read
SInt64 numFrames = 0;
size = sizeof(numFrames);
err = ExtAudioFileGetProperty(audio->audiofile, kExtAudioFileProperty_FileLengthFrames, &size, &numFrames);
// create the buffers for reading in data
AudioBufferList *bufferList = malloc(sizeof(AudioBufferList) + sizeof(AudioBuffer) * (audio->clientFormat.mChannelsPerFrame - 1));
bufferList->mNumberBuffers = audio->clientFormat.mChannelsPerFrame;
for (int ii=0; ii < bufferList->mNumberBuffers; ++ii)
{
bufferList->mBuffers[ii].mDataByteSize = sizeof(float) * (int)numFrames;
bufferList->mBuffers[ii].mNumberChannels = 1;
bufferList->mBuffers[ii].mData = malloc(bufferList->mBuffers[ii].mDataByteSize);
}
UInt32 maxReadFrames = 1024;
UInt32 rFrames = (UInt32)numFrames;
while(rFrames > 0)
{
UInt32 framesToRead = (maxReadFrames > rFrames) ? rFrames : maxReadFrames;
err = ExtAudioFileRead(audio->audiofile, &framesToRead, bufferList);
[audio processAudio:bufferList];
if (rFrames % SampleRate == 0)
[audio realtimeUpdate:nil];
rFrames = rFrames - maxReadFrames;
}
// Close the file
ExtAudioFileDispose(audio->audiofile);
// destroy the buffers
for (int ii=0; ii < bufferList->mNumberBuffers; ++ii)
{
free(bufferList->mBuffers[ii].mData);
}
free(bufferList);
bufferList = NULL;
}
There is clearly something that i did not understand or that I am doing wrong with ExtAudioFileRead because this code does not work at all. I have two main problems :
The file is played instantaneously. I mean that 44'100 samples are clearly not equal to 1 second. My 3 minutes audio file processing is done in a few seconds...
During the processing, I need to update the UI. So I have a few dispatch_sync in processaudio and realtimeUpdate. This seems to be really not appreciated by ExtAudioFileRead and it freezes.
Thanks for you help.
The code you wrote is just reading samples from the file and then calling processAudio. This will be done as fast as possible. As soon as processAudio is finished the next batch of samples is read and processAudio is called again. You shouldn't assume that reading from an audio file (which is a low level and non blocking os call) takes the same time the audio read would take to play.
If you want to process the audio in the file according to the sample rate you should probably use an AUFilePlayer audio unit. This can play back the file at the right speed and you can use a callback to process the samples in real audio time instead of "as fast as possible".
I'm strongly following this Xamarin sample (based on this Apple sample) to convert a LinearPCM file to an AAC file.
The sample works great, but implemented in my project, the FillComplexBuffer method returns error -50 and the InputData event is not triggered once, thus nothing is converted.
The error only appears when testing on a device. When testing on the emulator, everything goes great and I get a good encoded AAC file at the end.
I tried a lot of things today, and I don't see any difference between my code and the sample code. Do you have any idea where this may come from?
I don't know if this is in anyway related to Xamarin, it doesn't seem so since the Xamarin sample works great.
Here's the relevant part of my code:
protected void Encode(string path)
{
// In class setup. File at TempWavFilePath has DecodedFormat as format.
//
// DecodedFormat = AudioStreamBasicDescription.CreateLinearPCM();
// AudioStreamBasicDescription encodedFormat = new AudioStreamBasicDescription()
// {
// Format = AudioFormatType.MPEG4AAC,
// SampleRate = DecodedFormat.SampleRate,
// ChannelsPerFrame = DecodedFormat.ChannelsPerFrame,
// };
// AudioStreamBasicDescription.GetFormatInfo (ref encodedFormat);
// EncodedFormat = encodedFormat;
// Setup converter
AudioStreamBasicDescription inputFormat = DecodedFormat;
AudioStreamBasicDescription outputFormat = EncodedFormat;
AudioConverterError converterCreateError;
AudioConverter converter = AudioConverter.Create(inputFormat, outputFormat, out converterCreateError);
if (converterCreateError != AudioConverterError.None)
{
Console.WriteLine("Converter creation error: " + converterCreateError);
}
converter.EncodeBitRate = 192000; // AAC 192kbps
// get the actual formats back from the Audio Converter
inputFormat = converter.CurrentInputStreamDescription;
outputFormat = converter.CurrentOutputStreamDescription;
/*** INPUT ***/
AudioFile inputFile = AudioFile.OpenRead(NSUrl.FromFilename(TempWavFilePath));
// init buffer
const int inputBufferBytesSize = 32768;
IntPtr inputBufferPtr = Marshal.AllocHGlobal(inputBufferBytesSize);
// calc number of packets per read
int inputSizePerPacket = inputFormat.BytesPerPacket;
int inputBufferPacketSize = inputBufferBytesSize / inputSizePerPacket;
AudioStreamPacketDescription[] inputPacketDescriptions = null;
// init position
long inputFilePosition = 0;
// define input delegate
converter.InputData += delegate(ref int numberDataPackets, AudioBuffers data, ref AudioStreamPacketDescription[] dataPacketDescription)
{
// how much to read
if (numberDataPackets > inputBufferPacketSize)
{
numberDataPackets = inputBufferPacketSize;
}
// read from the file
int outNumBytes;
AudioFileError readError = inputFile.ReadPackets(false, out outNumBytes, inputPacketDescriptions, inputFilePosition, ref numberDataPackets, inputBufferPtr);
if (readError != 0)
{
Console.WriteLine("Read error: " + readError);
}
// advance input file packet position
inputFilePosition += numberDataPackets;
// put the data pointer into the buffer list
data.SetData(0, inputBufferPtr, outNumBytes);
// add packet descriptions if required
if (dataPacketDescription != null)
{
if (inputPacketDescriptions != null)
{
dataPacketDescription = inputPacketDescriptions;
}
else
{
dataPacketDescription = null;
}
}
return AudioConverterError.None;
};
/*** OUTPUT ***/
// create the destination file
var outputFile = AudioFile.Create (NSUrl.FromFilename(path), AudioFileType.M4A, outputFormat, AudioFileFlags.EraseFlags);
// init buffer
const int outputBufferBytesSize = 32768;
IntPtr outputBufferPtr = Marshal.AllocHGlobal(outputBufferBytesSize);
AudioBuffers buffers = new AudioBuffers(1);
// calc number of packet per write
int outputSizePerPacket = outputFormat.BytesPerPacket;
AudioStreamPacketDescription[] outputPacketDescriptions = null;
if (outputSizePerPacket == 0) {
// if the destination format is VBR, we need to get max size per packet from the converter
outputSizePerPacket = (int)converter.MaximumOutputPacketSize;
// allocate memory for the PacketDescription structures describing the layout of each packet
outputPacketDescriptions = new AudioStreamPacketDescription [outputBufferBytesSize / outputSizePerPacket];
}
int outputBufferPacketSize = outputBufferBytesSize / outputSizePerPacket;
// init position
long outputFilePosition = 0;
long totalOutputFrames = 0; // used for debugging
// write magic cookie if necessary
if (converter.CompressionMagicCookie != null && converter.CompressionMagicCookie.Length != 0)
{
outputFile.MagicCookie = converter.CompressionMagicCookie;
}
// loop to convert data
Console.WriteLine ("Converting...");
while (true)
{
// create buffer
buffers[0] = new AudioBuffer()
{
NumberChannels = outputFormat.ChannelsPerFrame,
DataByteSize = outputBufferBytesSize,
Data = outputBufferPtr
};
int writtenPackets = outputBufferPacketSize;
// LET'S CONVERT (it's about time...)
AudioConverterError converterFillError = converter.FillComplexBuffer(ref writtenPackets, buffers, outputPacketDescriptions);
if (converterFillError != AudioConverterError.None)
{
Console.WriteLine("FillComplexBuffer error: " + converterFillError);
}
if (writtenPackets == 0) // EOF
{
break;
}
// write to output file
int inNumBytes = buffers[0].DataByteSize;
AudioFileError writeError = outputFile.WritePackets(false, inNumBytes, outputPacketDescriptions, outputFilePosition, ref writtenPackets, outputBufferPtr);
if (writeError != 0)
{
Console.WriteLine("WritePackets error: {0}", writeError);
}
// advance output file packet position
outputFilePosition += writtenPackets;
if (FlowFormat.FramesPerPacket != 0) {
// the format has constant frames per packet
totalOutputFrames += (writtenPackets * FlowFormat.FramesPerPacket);
} else {
// variable frames per packet require doing this for each packet (adding up the number of sample frames of data in each packet)
for (var i = 0; i < writtenPackets; ++i)
{
totalOutputFrames += outputPacketDescriptions[i].VariableFramesInPacket;
}
}
}
// write out any of the leading and trailing frames for compressed formats only
if (outputFormat.BitsPerChannel == 0)
{
Console.WriteLine("Total number of output frames counted: {0}", totalOutputFrames);
WritePacketTableInfo(converter, outputFile);
}
// write the cookie again - sometimes codecs will update cookies at the end of a conversion
if (converter.CompressionMagicCookie != null && converter.CompressionMagicCookie.Length != 0)
{
outputFile.MagicCookie = converter.CompressionMagicCookie;
}
// Clean everything
Marshal.FreeHGlobal(inputBufferPtr);
Marshal.FreeHGlobal(outputBufferPtr);
converter.Dispose();
outputFile.Dispose();
// Remove temp file
File.Delete(TempWavFilePath);
}
I already saw this SO question, but the not-detailed C++/Obj-C related answer doesn't seem to fit with my problem.
Thanks !
I finally found the solution!
I just had to declare AVAudioSession category before converting the file.
AVAudioSession.SharedInstance().SetCategory(AVAudioSessionCategory.AudioProcessing);
AVAudioSession.SharedInstance().SetActive(true);
Since I also use an AudioQueue to RenderOffline, I must in fact set the category to AVAudioSessionCategory.PlayAndRecord so both the offline rendering and the audio converting work.
I am trying to capture video from webcam using Opencv and transmit it over TCP. In addition, I wanted to encrypt the video using AES. But whenever run the AES decrpt function the video is unstable.
I am using the opencv over tcp example and AES example
Whenever I run this function:
img->imageData = aes_decrypt(&de, img->imageData, &imgsize);
my video gets unstable.
I have attached the code segment where I wrote the function.
/* start receiving images*/
while(1)
{
/* get raw data */
for (i = 0; i < imgsize; i += bytes) {
if ((bytes = recv(sock, sockdata + i, imgsize - i, 0)) == -1) {
quit("recv failed", 1);
}
}
pthread_mutex_lock(&mutex);
for (i = 0, k = 0; i < img->height; i++) {
for (j = 0; j < img->width; j++) {
((uchar*)(img->imageData + i * img->widthStep))[j] = sockdata[k++];
}
}
img->imageData = aes_decrypt(&de, img->imageData, &imgsize);
is_data_ready = 1;
pthread_mutex_unlock(&mutex);
/* have we terminated yet? */
pthread_testcancel();
/* no, take a rest for a while */
usleep(1000);
}
This is my first post, sorry for my bad English and format of the post.
I'm writing an iOS app that streams video and audio over the network.
I am using AVCaptureSession to grab raw video frames using AVCaptureVideoDataOutput and encode them in software using x264. This works great.
I wanted to do the same for audio, only that I don't need that much control on the audio side so I wanted to use the built in hardware encoder to produce an AAC stream. This meant using Audio Converter from the Audio Toolbox layer. In order to do so I put in a handler for AVCaptudeAudioDataOutput's audio frames:
- (void)captureOutput:(AVCaptureOutput *)captureOutput
didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer
fromConnection:(AVCaptureConnection *)connection
{
// get the audio samples into a common buffer _pcmBuffer
CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
CMBlockBufferGetDataPointer(blockBuffer, 0, NULL, &_pcmBufferSize, &_pcmBuffer);
// use AudioConverter to
UInt32 ouputPacketsCount = 1;
AudioBufferList bufferList;
bufferList.mNumberBuffers = 1;
bufferList.mBuffers[0].mNumberChannels = 1;
bufferList.mBuffers[0].mDataByteSize = sizeof(_aacBuffer);
bufferList.mBuffers[0].mData = _aacBuffer;
OSStatus st = AudioConverterFillComplexBuffer(_converter, converter_callback, (__bridge void *) self, &ouputPacketsCount, &bufferList, NULL);
if (0 == st) {
// ... send bufferList.mBuffers[0].mDataByteSize bytes from _aacBuffer...
}
}
In this case the callback function for the audio converter is pretty simple (assuming packet sizes and counts are setup properly):
- (void) putPcmSamplesInBufferList:(AudioBufferList *)bufferList withCount:(UInt32 *)count
{
bufferList->mBuffers[0].mData = _pcmBuffer;
bufferList->mBuffers[0].mDataByteSize = _pcmBufferSize;
}
And the setup for the audio converter looks like this:
{
// ...
AudioStreamBasicDescription pcmASBD = {0};
pcmASBD.mSampleRate = ((AVAudioSession *) [AVAudioSession sharedInstance]).currentHardwareSampleRate;
pcmASBD.mFormatID = kAudioFormatLinearPCM;
pcmASBD.mFormatFlags = kAudioFormatFlagsCanonical;
pcmASBD.mChannelsPerFrame = 1;
pcmASBD.mBytesPerFrame = sizeof(AudioSampleType);
pcmASBD.mFramesPerPacket = 1;
pcmASBD.mBytesPerPacket = pcmASBD.mBytesPerFrame * pcmASBD.mFramesPerPacket;
pcmASBD.mBitsPerChannel = 8 * pcmASBD.mBytesPerFrame;
AudioStreamBasicDescription aacASBD = {0};
aacASBD.mFormatID = kAudioFormatMPEG4AAC;
aacASBD.mSampleRate = pcmASBD.mSampleRate;
aacASBD.mChannelsPerFrame = pcmASBD.mChannelsPerFrame;
size = sizeof(aacASBD);
AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0, NULL, &size, &aacASBD);
AudioConverterNew(&pcmASBD, &aacASBD, &_converter);
// ...
}
This seems pretty straight forward only the IT DOES NOT WORK. Once the AVCaptureSession is running, the audio converter (specifically AudioConverterFillComplexBuffer) returns an 'hwiu' (hardware in use) error. Conversion works fine if the session is stopped but then I can't capture anything...
I was wondering if there was a way to get an AAC stream out of AVCaptureSession. The options I'm considering are:
Somehow using AVAssetWriterInput to encode audio samples into AAC and then get the encoded packets somehow (not through AVAssetWriter, which would only write to a file).
Reorganizing my app so that it uses AVCaptureSession only on the video side and uses Audio Queues on the audio side. This will make flow control (starting and stopping recording, responding to interruptions) more complicated and I'm afraid that it might cause synching problems between the audio and video. Also, it just doesn't seem like a good design.
Does anyone know if getting the AAC out of AVCaptureSession is possible? Do I have to use Audio Queues here? Could this get me into synching or control problems?
I ended up asking Apple for advice (it turns out you can do that if you have a paid developer account).
It seems that AVCaptureSession grabs a hold of the AAC hardware encoder but only lets you use it to write directly to file.
You can use the software encoder but you have to ask for it specifically instead of using AudioConverterNew:
AudioClassDescription *description = [self
getAudioClassDescriptionWithType:kAudioFormatMPEG4AAC
fromManufacturer:kAppleSoftwareAudioCodecManufacturer];
if (!description) {
return false;
}
// see the question as for setting up pcmASBD and arc ASBD
OSStatus st = AudioConverterNewSpecific(&pcmASBD, &aacASBD, 1, description, &_converter);
if (st) {
NSLog(#"error creating audio converter: %s", OSSTATUS(st));
return false;
}
with
- (AudioClassDescription *)getAudioClassDescriptionWithType:(UInt32)type
fromManufacturer:(UInt32)manufacturer
{
static AudioClassDescription desc;
UInt32 encoderSpecifier = type;
OSStatus st;
UInt32 size;
st = AudioFormatGetPropertyInfo(kAudioFormatProperty_Encoders,
sizeof(encoderSpecifier),
&encoderSpecifier,
&size);
if (st) {
NSLog(#"error getting audio format propery info: %s", OSSTATUS(st));
return nil;
}
unsigned int count = size / sizeof(AudioClassDescription);
AudioClassDescription descriptions[count];
st = AudioFormatGetProperty(kAudioFormatProperty_Encoders,
sizeof(encoderSpecifier),
&encoderSpecifier,
&size,
descriptions);
if (st) {
NSLog(#"error getting audio format propery: %s", OSSTATUS(st));
return nil;
}
for (unsigned int i = 0; i < count; i++) {
if ((type == descriptions[i].mSubType) &&
(manufacturer == descriptions[i].mManufacturer)) {
memcpy(&desc, &(descriptions[i]), sizeof(desc));
return &desc;
}
}
return nil;
}
The software encoder will take up CPU resources, of course, but will get the job done.