It is easy in iOS to get the number of bytes in a loaded .wav file:
UInt64 dataSize = 0; // dataSize
UInt32 ps = sizeof(UInt64); // property size
if( AudioFileGetProperty(fileId, kAudioFilePropertyAudioDataByteCount, &ps, &dataSize) )
puts( "error retriving data chunk size" );
return dataSize ;
But in the documentation I cannot seem to find any information on how to determine the sampling rate of a PCM wave file.
I found the answer using AudioStreamBasicDescription. All you have to do is:
UInt32 getAudioDataSamplingRate( AudioFileID fileId )
{
AudioStreamBasicDescription bsd;
UInt32 ps = sizeof(AudioStreamBasicDescription) ;
if( AudioFileGetProperty(fileId,
kAudioFilePropertyDataFormat, &ps, &bsd) )
puts( "error retriving af basic description" );
return bsd.mSampleRate ;
}
Related
I had a need to transmit sound over the network and for this I chose libraries "PortAudio" and "Opus". I am new to working with sound and therefore i don’t know many thing.I am new to working with sound and therefore i don’t know many things, but i read the documentation and looked at some examples, but i still have some problems with encoding/decoding with Opus. I do not understand how to correctly restore the original encoded PСM.I have some sequence of actions:
Some consts
const int FRAMES_PER_BUFFER = 960;
const int SAMPLE_RATE = 48000;
int NUM_CHANNELS = 2;
int totalFrames = 2 * SAMPLE_RATE; /* Record for a few seconds. */
int numSamples = totalFrames * 2;
int numBytes = numSamples * sizeof(float);
float *sampleBlock = nullptr;
int bytesOfPacket = 0;
unsigned char *packet = nullptr;
I get PСM to sampleBlock
paError = Pa_ReadStream(**&stream, sampleBlock, totalFrames);
if (paError != paNoError) {
cout << "PortAudio error : " << Pa_GetErrorText(paError) << endl;
std::system("pause");
}
Encoding sampleBlock
OpusEncoder *encoder;
int error;
int size;
encoder = opus_encoder_create(SAMPLE_RATE, NUM_CHANNELS, OPUS_APPLICATION_VOIP, &error);
size = opus_encoder_get_size(NUM_CHANNELS);
encoder = (OpusEncoder *)malloc(size);
packet = new unsigned char[480];
error = opus_encoder_init(encoder, SAMPLE_RATE, NUM_CHANNELS, OPUS_APPLICATION_VOIP);
if (error == -1) {
return -1;
}
bytesOfPacket = opus_encode_float(encoder, sampleBlock, FRAMES_PER_BUFFER, packet, 480);
opus_encoder_destroy(encoder);
Ok, i received a encoded packet to Opus
Decoding
OpusDecoder *decoder;
int error;
int size;
decoder = opus_decoder_create(SAMPLE_RATE, NUM_CHANNELS, &error);
size = opus_decoder_get_size(NUM_CHANNELS);
decoder = (OpusDecoder *)malloc(size);
error = opus_decoder_init(decoder, SAMPLE_RATE, NUM_CHANNELS);
opus_decode_float(decoder, packet, bytesOfPacket, sampleBlock, 480, 0);
opus_decoder_destroy(decoder);
Here i am trying to decode the Opus back to the PCM and save the result to the sampleBlock
Playing the sound
paError = Pa_WriteStream(**&stream, sampleBlock, totalFrames);
if (paError != paNoError) {
cout << "PortAudio error : " << Pa_GetErrorText(paError) << endl;
std::system("pause");
}
I get silence. I don't really understand the subtleties in working with sound since i am new to this business. Help please understand what is wrong.
As for your settings you're encoding 20ms of audio per opus_encode_float call. I don't see any iteration over this call so I suppose you don't hear anything because you encode only 20ms of audio. You should pass to opus_encode_float 20ms worth of samples with your sampleBlock pointer incrementing it through the whole buffer x times.
Try to encode more audio and remember that you have to add some sort of framing to decode it. You cannot just feed the whole buffer to the decoder. You should feed the decoder one time for each encoder call with the same data that each encoder call outputs.
Damiano
I write a voip app that uses "novocaine" library for recording and playback of sound. I set sample rate as 8kHz. This sample rate is set in novocaine in AudioStreamBasicDescription of audio unit and as audio session property kAudioSessionProperty_PreferredHardwareSampleRate. I understand that setting preferred hardware sample rate has no guarantee that actual hardware sample rate will be changed but it worked for all devices except iPhone6s and iPhone6s+ (when route is changed to speaker). With iPhone6s(+) and speaker route I receive 48kHz sound from microphone. So I need to somehow convert this 48 kHz sound to 8kHz. In documentation I found that AudioConverterRef can be used in this case but I have troubles with using it.
I use AudioConverterFillComplexBuffer for sample rate conversion but it always returns -50 OSStatus (one or more parameters passed to the function were not valid). This is how I use audio converter:
// Setup AudioStreamBasicDescription for input
inputFormat.mSampleRate = 48000.0;
inputFormat.mFormatID = kAudioFormatLinearPCM;
inputFormat.mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked;
inputFormat.mChannelsPerFrame = 1;
inputFormat.mBitsPerChannel = 8 * sizeof(float);
inputFormat.mFramesPerPacket = 1;
inputFormat.mBytesPerFrame = sizeof(float) * inputFormat.mChannelsPerFrame;
inputFormat.mBytesPerPacket = inputFormat.mBytesPerFrame * inputFormat.mFramesPerPacket;
// Setup AudioStreamBasicDescription for output
outputFormat.mSampleRate = 8000.0;
outputFormat.mFormatID = kAudioFormatLinearPCM;
outputFormat.mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked;
outputFormat.mChannelsPerFrame = 1;
outputFormat.mBitsPerChannel = 8 * sizeof(float);
outputFormat.mFramesPerPacket = 1;
outputFormat.mBytesPerFrame = sizeof(float) * outputFormat.mChannelsPerFrame;
outputFormat.mBytesPerPacket = outputFormat.mBytesPerFrame * outputFormat.mFramesPerPacket;
// Create new instance of audio converter
AudioConverterNew(&inputFormat, &outputFormat, &converter);
// Set conversion quality
UInt32 tmp = kAudioConverterQuality_Medium;
AudioConverterSetProperty( converter, kAudioConverterCodecQuality,
sizeof( tmp ), &tmp );
AudioConverterSetProperty( converter, kAudioConverterSampleRateConverterQuality, sizeof( tmp ), &tmp );
// Get the size of the IO buffer(s)
UInt32 bufferSizeFrames = 0;
size = sizeof(UInt32);
AudioUnitGetProperty(self.inputUnit,
kAudioDevicePropertyBufferFrameSize,
kAudioUnitScope_Global,
0,
&bufferSizeFrames,
&size);
UInt32 bufferSizeBytes = bufferSizeFrames * sizeof(Float32);
// Allocate an AudioBufferList plus enough space for array of AudioBuffers
UInt32 propsize = offsetof(AudioBufferList, mBuffers[0]) + (sizeof(AudioBuffer) * outputFormat.mChannelsPerFrame);
// Malloc buffer lists
convertedInputBuffer = (AudioBufferList *)malloc(propsize);
convertedInputBuffer->mNumberBuffers = 1;
// Pre-malloc buffers for AudioBufferLists
convertedInputBuffer->mBuffers[0].mNumberChannels = outputFormat.mChannelsPerFrame;
convertedInputBuffer->mBuffers[0].mDataByteSize = bufferSizeBytes;
convertedInputBuffer->mBuffers[0].mData = malloc(bufferSizeBytes);
memset(convertedInputBuffer->mBuffers[0].mData, 0, bufferSizeBytes);
// Setup callback for converter
static OSStatus inputProcPtr(AudioConverterRef inAudioConverter,
UInt32* ioNumberDataPackets,
AudioBufferList* ioData,
AudioStreamPacketDescription* __nullable* __nullable outDataPacketDescription,
void* __nullable inUserData)
{
// Read data from buffer
}
// Perform actual sample rate conversion
AudioConverterFillComplexBuffer(converter, inputProcPtr, NULL, &numberOfFrames, convertedInputBuffer, NULL)
inputProcPtr callback is never called. I tried to set different number of frames but still receive OSStatus -50.
1) Is using AudioConverterRef is correct way to make sample rate conversion or it could be done in different way?
2) What is wrong with my conversion implementation?
Thank you all in advance
One problem is this:
AudioUnitGetProperty(self.inputUnit,
kAudioDevicePropertyBufferFrameSize,
kAudioUnitScope_Global,
0,
&bufferSizeFrames,
&size);
kAudioDevicePropertyBufferFrameSize is an OSX property, and doesn't exist on iOS. How is this code even compiling?
If you've somehow made it compile, check the return code from this function! I've got a feeling that it's failing, and bufferSizeFrames is zero. That would make AudioConverterFillComplexBuffer return -50 (kAudio_ParamError).
So on iOS, either pick a bufferSizeFrames yourself or base it on AVAudioSession's IOBufferDuration if you must.
Another problem: check your return codes. All of them!
e.g.
UInt32 tmp = kAudioConverterQuality_Medium;
AudioConverterSetProperty( converter, kAudioConverterCodecQuality,
sizeof( tmp ), &tmp );
I'm pretty sure there's no codec to speak of in LPCM->LPCM conversions, and that kAudioConverterQuality_Medium is not the right value to use with kAudioConverterCodecQuality in any case. I don't see how this call can succeed.
I m using ExtAudioFileWriteAsync to write an audio file while using device recording, but once I get recording finished I try to read it with ExtAudioFileRead function and samples I get are not same samples I m writing... Anyone know why this could happen?
For writing:
self.audioManager.inputBlock = ^(float *data, UInt32 numFrames, UInt32 numChannels) {
for (int i = 0; i < numFrames*numChannels; i++) {
printf("write*%f\n", data[i]);
}
UInt32 numIncomingBytes = numFrames*numChannels*sizeof(float);
UInt32 *outputBuffer =(UInt32*)malloc(numIncomingBytes);
memcpy(outputBuffer, recordedData, numIncomingBytes);
AudioBufferList outgoingAudio;
outgoingAudio.mNumberBuffers = 1;
outgoingAudio.mBuffers[0].mNumberChannels = numChannels;
outgoingAudio.mBuffers[0].mDataByteSize = numIncomingBytes;
outgoingAudio.mBuffers[0].mData = self.outputBuffer;
if( 0 == pthread_mutex_trylock( &outputAudioFileLock ) )
{
ExtAudioFileWriteAsync(outputFile, numFrames, &outgoingAudio);
}
pthread_mutex_unlock( &outputAudioFileLock );
};
[self.audioManager play];
For reading:
UInt32 *outputBuffer = (UInt32 *)malloc(numFrames*numChannels*sizeof(float));
AudioBufferList convertedData;
convertedData.mNumberBuffers = 1;
convertedData.mBuffers[0].mNumberChannels = numChannels;
convertedData.mBuffers[0].mDataByteSize = numFrames*numChannels*sizeof(float);
convertedData.mBuffers[0].mData = outputBuffer;
NSMutableArray *samplesArray = [[NSMutableArray alloc]init];
while (numFrames > 0) {
ExtAudioFileRead(inputFile, &numFrames, &convertedData);
if (numFrames > 0) {
AudioBuffer audioBuffer = convertedData.mBuffers[0];
float *samples = (float *)audioBuffer.mData;
for (int i = 0; i < frameCount*numChannels; i++) {
printf("read*%f\n", samples[i]);
}
}
}
By the way I'm using Novocaine project in order to get device audio. I can reproduce saved audio with Novocaine code or with any other player.
When writing ExtAudioFileRef output :
ExtAudioFileCreateWithURL(audioFileRef, kAudioFileM4AType, &outputFileDesc, NULL, kAudioFileFlags_EraseFile, &outputFile);
Where outputFileDesc is
AudioStreamBasicDescription outputFileDesc = {44100.0, kAudioFormatMPEG4AAC, 0, 0, 1024, 0, thisNumChannels, 0, 0};
outputFileDesc.mFormatFlags = kAudioFormatFlagIsBigEndian | kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
When reading ExtAudioFileRef inputFile:
ExtAudioFileOpenURL(audioFileRef, &inputFile):
And in both cases (writing and reading) it is applied same format:
AudioStreamBasicDescription outputFormat;
_outputFormat.mSampleRate = self.samplingRate;
_outputFormat.mFormatID = kAudioFormatLinearPCM;
_outputFormat.mFormatFlags = kAudioFormatFlagIsFloat;
_outputFormat.mBytesPerPacket = 4*self.numChannels;
_outputFormat.mFramesPerPacket = 1;
_outputFormat.mBytesPerFrame = 4*self.numChannels;
_outputFormat.mChannelsPerFrame = self.numChannels;
_outputFormat.mBitsPerChannel = 32;
ExtAudioFileSetProperty(outputFile, kExtAudioFileProperty_ClientDataFormat, sizeof(AudioStreamBasicDescription), &_outputFormat);
ExtAudioFileSetProperty(inputFile, kExtAudioFileProperty_ClientDataFormat, sizeof(AudioStreamBasicDescription), &_outputFormat);
And by the way, even if read samples are not equal to written samples, mean value of both signals are quite similar. But I do not fully understand why are not totally equal!
Any idea what I'm doing wrong?
It sounds like there is an implicit format conversion from one or both of the ExtaudioFileRefs and you are seeing different samples as a result of the conversion. You have three formats: audio_in_format, file_format, and audio_out_format. If audio_in_format is different than file_format, The writing ExtAudioFileRef will create an audio converter for you to convert the input audio to file_format before writing to disk. And the reading ExtAudioFileRef will also create a converter if file_format is different than audio_out_format.
Opinion:
It's confusing that you named your writing ExtAudioFileRef "outputFile", and your reading ExtAudioFileRef "inputFile". I would use something like audioWriter and audioReader.
I use ExtAudioFileRead Function to load audio file to memory. But I found there is alway an error with code -50. That means I pass the wrong parameters to the function. But I have no idea which one is the wrong parameter.
The Audio File's data format is alac, sampleRate 44100k, has 2 channels.
My code is shown below:
ExtAudioFileRef recordFile;
OSStatus error = noErr;
error = ExtAudioFileOpenURL((CFURLRef)file, &recordFile);
checkError(error, "open file");
SInt64 frameCount;
UInt32 size = sizeof(frameCount);
error = ExtAudioFileGetProperty(recordFile, kExtAudioFileProperty_FileLengthFrames, &size, &frameCount);
checkError(error, "get frameTotlal");
soundStruct *sound = &_sound;
sound->frameCount = frameCount;
sound->isStereo = true;
sound->audioDataLeft = (SInt16 *)calloc(frameCount, sizeof(SInt16));
sound->audioDataRight = (SInt16 *)calloc(frameCount, sizeof(SInt16));
AudioStreamBasicDescription desc;
UInt32 descSize = sizeof(desc);
error = ExtAudioFileGetProperty(recordFile, kExtAudioFileProperty_FileDataFormat, &descSize, &desc);
[self printASBD:desc];
UInt32 channels = desc.mChannelsPerFrame;
error = ExtAudioFileSetProperty(recordFile, kExtAudioFileProperty_ClientDataFormat, sizeof(inFormat), &inFormat);
AudioBufferList *bufferList;
bufferList = (AudioBufferList *)malloc(sizeof(AudioBufferList) + sizeof(AudioBuffer) * (channels - 1));
AudioBuffer emptyBuff = {0};
size_t arrayIndex;
for (arrayIndex = 0; arrayIndex < channels; arrayIndex ++) {
bufferList->mBuffers[arrayIndex] = emptyBuff;
}
bufferList->mBuffers[0].mData = sound->audioDataLeft;
bufferList->mBuffers[0].mNumberChannels = 1;
bufferList->mBuffers[0].mDataByteSize = frameCount * sizeof(SInt16);
if (channels == 2) {
bufferList->mBuffers[1].mData = sound->audioDataRight;
bufferList->mBuffers[1].mNumberChannels = 1;
bufferList->mBuffers[1].mDataByteSize = frameCount * sizeof(SInt16);
bufferList->mNumberBuffers = 2;
}
UInt32 count = (UInt32)frameCount;
error = ExtAudioFileRead(recordFile, &count, bufferList);
checkError(error, "reading"); // Get a -50 error
free(bufferList);
ExtAudioFileDispose(recordFile);
Good question.
This error happened to me when I ExtAudioFileRead a MONO file, using a STEREO client data format in your call to ExtAudioFileSetProperty.
I don't think ExtAudioFileRead automatically upconverts mono files to stereo files, if there is a mismatch there I think it fails with this -50 error.
Either make the mono file stereo, or set inFormat.mChannelsPerFrame=1 for the mono files.
Remember, if you don't upconvert, you must account for the mono files in your audiorenderfunction by writing L/R channels from the single mono channel of data.
So ,I need to reverse some audio *.caf file,
I have seen that the way to do it should be:
You cannot just reverse the byte data. I have achieved the same
effect using CoreAudio and AudioUnits. Use ExtFileReader C API to read
the file into lPCM buffers and then you can reverse the buffers as
needed.
But I cannot find any documentation of the use of
ExtFileReader C API
So if I have a *.caf file, how can I read it in to a linear PCM, I have checked the Core Audio overview but cant find how to accomplish this?
How can i then, read my caf file to linear PCM?
thanks!
ExtendedAudioFile is in the AudioToolbox framework. It's pretty straightforward to read in a file to whatever format you'd like. Here's a quick (compiles, but not tested) example of reading in to 32-bit float non-interleaved Linear PCM:
#import <AudioToolbox/AudioToolbox.h>
...
ExtAudioFileRef audioFile = NULL;
CFURLRef url = NULL;
OSStatus err = ExtAudioFileOpenURL(url, &audioFile);
AudioStreamBasicDescription asbd;
UInt32 dataSize = sizeof(asbd);
// get the audio file's format
err = ExtAudioFileGetProperty(audioFile, kExtAudioFileProperty_FileDataFormat, &dataSize, &asbd);
// now set the client format to what we want on read (LPCM, 32-bit floating point)
AudioStreamBasicDescription clientFormat = asbd;
clientFormat.mFormatID = kAudioFormatLinearPCM;
clientFormat.mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagIsNonInterleaved | kAudioFormatFlagIsPacked;
clientFormat.mBitsPerChannel = 32;
clientFormat.mBytesPerPacket = 4;
clientFormat.mFramesPerPacket = 1;
clientFormat.mBytesPerFrame = 4;
err = ExtAudioFileSetProperty(audioFile, kExtAudioFileProperty_ClientDataFormat, sizeof(clientFormat), &clientFormat);
// okay, now the ext audio file is set up to convert samples to LPCM on read
// get the total number of samples
SInt64 numFrames = 0;
dataSize = sizeof(numFrames);
err = ExtAudioFileGetProperty(audioFile, kExtAudioFileProperty_FileLengthFrames, &dataSize, &numFrames);
// prepare an audio buffer list to hold the data when we read it from the file
UInt32 maxReadFrames = 4096; // how many samples will we read in at a time?
AudioBufferList *bufferList = (AudioBufferList *)malloc(sizeof(AudioBufferList) + sizeof(AudioBuffer) * (asbd.mChannelsPerFrame - 1));
bufferList->mNumberBuffers = asbd.mChannelsPerFrame;
for (int ii = 0; ii < bufferList->mNumberBuffers; ++ii) {
bufferList->mBuffers[ii].mDataByteSize = maxReadFrames * sizeof(float);
bufferList->mBuffers[ii].mData = malloc(bufferList->mBuffers[ii].mDataByteSize);
bzero(bufferList->mBuffers[ii].mData, bufferList->mBuffers[ii].mDataByteSize);
bufferList->mBuffers[ii].mNumberChannels = 1;
}
while(numFrames > 0) {
UInt32 framesToRead = (maxReadFrames > numFrames) ? numFrames : maxReadFrames;
err = ExtAudioFileRead(audioFile, &framesToRead, bufferList);
// okay, your LPCM audio data is in `bufferList` -- do whatever processing you'd like!
}
// clean up
for (int ii = 0; ii < bufferList->mNumberBuffers; ++ii) {
free(bufferList->mBuffers[ii].mData);
}
free(bufferList);
ExtAudioFileDispose(audioFile);