I try to build VoIP application on iOS with echo cancellation. For AEC as I understand I need to use Audio Units. The main problem is how to use AVAudioConverter to encode microphone data to Opus?
opusASBD = AudioStreamBasicDescription(mSampleRate: 48000.0,
mFormatID: kAudioFormatOpus,
mFormatFlags: 0,
mBytesPerPacket: 0,
mFramesPerPacket: 2880,
mBytesPerFrame: 0,
mChannelsPerFrame: 1,
mBitsPerChannel: 0,
mReserved: 0)
decoderOutputASBD = AudioStreamBasicDescription(mSampleRate: 48000.0,
mFormatID: kAudioFormatLinearPCM,
mFormatFlags: kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked | kAudioFormatFlagIsNonInterleaved,
mBytesPerPacket: 2,
mFramesPerPacket: 1,
mBytesPerFrame: 2,
mChannelsPerFrame: 1,
mBitsPerChannel: 16,
mReserved: 0)
self.converterSpeaker = AVAudioConverter(from: AVAudioFormat(streamDescription: &opusASBD)!,
to: AVAudioFormat(streamDescription: &decoderOutputASBD)!)
self.converterMic = AVAudioConverter(from: AVAudioFormat(streamDescription: &decoderOutputASBD)!,
to: AVAudioFormat(streamDescription: &opusASBD)!)
self.converterMic?.bitRate = 48000
var inDesc = AudioComponentDescription(componentType: kAudioUnitType_Output,
componentSubType: kAudioUnitSubType_VoiceProcessingIO,
componentManufacturer: kAudioUnitManufacturer_Apple,
componentFlags: 0,
componentFlagsMask: 0)
if let inputComponent = AudioComponentFindNext(nil, &inDesc) {
let status = AudioComponentInstanceNew(inputComponent, &self.audioUnit)
if status == noErr {
var flag = UInt32(1)
AudioUnitSetProperty(self.audioUnit,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input,
1,
&flag,
UInt32(MemoryLayout<UInt32>.size))
AudioUnitSetProperty(self.audioUnit,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Output,
0,
&flag,
UInt32(MemoryLayout<UInt32>.size))
AudioUnitSetProperty(self.audioUnit,
kAUVoiceIOProperty_VoiceProcessingEnableAGC,
kAudioUnitScope_Global,
0,
&flag,
UInt32(MemoryLayout<UInt32>.size))
AudioUnitSetProperty(self.audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input,
0,
&decoderOutputASBD,
UInt32(MemoryLayout<AudioStreamBasicDescription>.size))
AudioUnitSetProperty(self.audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output,
1,
&decoderOutputASBD,
UInt32(MemoryLayout<AudioStreamBasicDescription>.size))
var iCallback = AURenderCallbackStruct(inputProc: inputCallback,
inputProcRefCon: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque()))
AudioUnitSetProperty(self.audioUnit,
kAudioOutputUnitProperty_SetInputCallback,
kAudioUnitScope_Global,
1,
&iCallback,
UInt32(MemoryLayout<AURenderCallbackStruct>.size))
var rCallback = AURenderCallbackStruct(inputProc: renderCallback,
inputProcRefCon: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque()))
AudioUnitSetProperty(self.audioUnit,
kAudioUnitProperty_SetRenderCallback,
kAudioUnitScope_Global,
0,
&rCallback,
UInt32(MemoryLayout<AURenderCallbackStruct>.size))
AudioUnitInitialize(self.audioUnit)
AudioOutputUnitStart(self.audioUnit)
}
I'm using ring buffer for audio data from
https://github.com/michaeltyson/TPCircularBuffer
func inputCallback(_ inRefCon: UnsafeMutableRawPointer,
_ ioActionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>,
_ inTimeStamp: UnsafePointer<AudioTimeStamp>,
_ inOutputBusNumber: UInt32,
_ inNumberFrames: UInt32,
_ ioData: UnsafeMutablePointer<AudioBufferList>?) -> OSStatus {
let wSelf: AudioUnits = Unmanaged.fromOpaque(inRefCon).takeUnretainedValue()
var buffers = AudioBufferList(mNumberBuffers: 1, mBuffers: AudioBuffer())
AudioUnitRender(wSelf.audioUnit,
ioActionFlags,
inTimeStamp,
inOutputBusNumber,
inNumberFrames,
&buffers)
TPCircularBufferCopyAudioBufferList(&wSelf.ringBufferMic,
&buffers,
inTimeStamp,
inNumberFrames,
&wSelf.decoderOutputASBD)
wSelf.handleMic(inNumberFrames, inTimeStamp: inTimeStamp.pointee)
return noErr
}
func renderCallback(_ inRefCon: UnsafeMutableRawPointer,
_ ioActionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>,
_ inTimeStamp: UnsafePointer<AudioTimeStamp>,
_ inOutputBusNumber: UInt32,
_ inNumberFrames: UInt32,
_ ioData: UnsafeMutablePointer<AudioBufferList>?) -> OSStatus {
let wSelf: AudioUnits = Unmanaged.fromOpaque(inRefCon).takeUnretainedValue()
if let data = ioData {
let audioBufferList = UnsafeMutableAudioBufferListPointer(data)
if let buffer = audioBufferList.first {
buffer.mData?.assumingMemoryBound(to: Float32.self).assign(repeating: 0, count: Int(inNumberFrames))
}
var ioLengthInFrames = inNumberFrames
TPCircularBufferDequeueBufferListFrames(&wSelf.ringBufferSpeaker,
&ioLengthInFrames,
ioData!,
nil,
&wSelf.decoderOutputASBD)
}
return noErr
}
In microphone handler I just encoding to Opus then decoding and trying to render decoded audio data (DEBUG). But my voice is corrupted
func handleMic(_ frames: UInt32, inTimeStamp: AudioTimeStamp) {
var ioLengthInFrames = frames
var its = inTimeStamp
self.inputBufferMic = AVAudioPCMBuffer(pcmFormat: AVAudioFormat(streamDescription: &self.decoderOutputASBD)!,
frameCapacity: ioLengthInFrames)!
self.inputBufferMic.frameLength = self.inputBufferMic.frameCapacity
TPCircularBufferDequeueBufferListFrames(&self.ringBufferMic,
&ioLengthInFrames,
self.inputBufferMic.mutableAudioBufferList,
&its,
&self.decoderOutputASBD)
self.outputBufferMic = AVAudioCompressedBuffer(format: AVAudioFormat(streamDescription: &self.opusASBD)!,
packetCapacity: 1,
maximumPacketSize: 960)
var error: NSError?
self.converterMic?.convert(to: self.outputBufferMic,
error: &error,
withInputFrom: { [weak self] (packetCount, outputStatus) -> AVAudioBuffer? in
outputStatus.pointee = .haveData
return self?.inputBufferMic
})
if let e = error {
LoggerManager.sharedInstance.log("<AudioUnits>: OPUS encoding error:\n \(e)")
return
}
let mData = NSData(bytes: self.outputBufferMic.data,
length: Int(self.outputBufferMic.byteLength))
self.inputBufferSpeaker = AVAudioCompressedBuffer(format: AVAudioFormat(streamDescription: &self.opusASBD)!,
packetCapacity: 1,
maximumPacketSize: Int(AudioUnits.frameSize))
self.outputBufferSpeaker = AVAudioPCMBuffer(pcmFormat: AVAudioFormat(streamDescription: &self.decoderOutputASBD)!,
frameCapacity: AVAudioFrameCount(AudioUnits.frameSize))!
self.outputBufferSpeaker.frameLength = self.outputBufferSpeaker.frameCapacity
memcpy(self.inputBufferSpeaker.data, mData.bytes.bindMemory(to: UInt8.self, capacity: 1), mData.length)
self.inputBufferSpeaker.byteLength = UInt32(mData.length)
self.inputBufferSpeaker.packetCount = AVAudioPacketCount(1)
self.inputBufferSpeaker.packetDescriptions![0].mDataByteSize = self.inputBufferSpeaker.byteLength
self.converterSpeaker?.convert(to: self.outputBufferSpeaker,
error: &error,
withInputFrom: { [weak self] (packetCount, outputStatus) -> AVAudioBuffer? in
outputStatus.pointee = .haveData
return self?.inputBufferSpeaker
})
if let e = error {
LoggerManager.sharedInstance.log("<AudioUnits>: OPUS decoding error:\n \(e)")
return
}
TPCircularBufferCopyAudioBufferList(&self.ringBufferSpeaker,
&self.outputBufferSpeaker.mutableAudioBufferList.pointee,
nil,
AudioUnits.frameSize,
&self.decoderOutputASBD)
}
Related
I am trying PCM (microphone input)->G711 and am having trouble getting it to work.
With the current implementation, the conversion itself succeeds, but it is as if there is no data stored in the buffer.
I have been doing a lot of research, but I can't seem to find the cause of the problem, so I would appreciate your advice.
var audioEngine = AVAudioEngine()
var convertFormatBasicDescription = AudioStreamBasicDescription(
mSampleRate: 8000,
mFormatID: kAudioFormatULaw,
mFormatFlags: AudioFormatFlags(kAudioFormatULaw),
mBytesPerPacket: 1,
mFramesPerPacket: 1,
mBytesPerFrame: 1,
mChannelsPerFrame: 1,
mBitsPerChannel: 8,
mReserved: 0
)
func convert () {
let inputNode = audioEngine.inputNode
let micInputFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 44100, channels: 1, interleaved: true)!
let convertFormat = AVAudioFormat(streamDescription: &convertFormatBasicDescription)!
let converter = AVAudioConverter(from: micInputFormat, to: convertFormat)!
inputNode.installTap(onBus: 0, bufferSize: 1024, format: micInputFormat) { [weak self] (buffer, time) in
var newBufferAvailable = true
let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return buffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: convertFormat, frameCapacity: AVAudioFrameCount(convertFormat.sampleRate) * buffer.frameLength / AVAudioFrameCount(buffer.format.sampleRate))!
convertedBuffer.frameLength = convertedBuffer.frameCapacity
var error: NSError?
let status = converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
print(convertedBuffer.floatChannelData) <- nil
}
audioEngine.prepare()
try audioEngine.start()
}
Thank you.
It was audioBufferList.pointee.mBuffers.mData, not floatChannelData, that was needed to get the data converted to G711.
I'm developing an application that should record a user's voice and stream it to a custom device via the MQTT protocol.
The audio specification for the custom device: little-endian, unsigned, 16-bit LPCM at 8khz sample rate. Packets should be 1000 bytes each.
I'm not familiar with AudioEngine and I found this sample of code which I believe fits my case:
func startRecord() {
audioEngine = AVAudioEngine()
let bus = 0
let inputNode = audioEngine.inputNode
let inputFormat = inputNode.outputFormat(forBus: bus)
var streamDescription = AudioStreamBasicDescription()
streamDescription.mFormatID = kAudioFormatLinearPCM.littleEndian
streamDescription.mSampleRate = 8000.0
streamDescription.mChannelsPerFrame = 1
streamDescription.mBitsPerChannel = 16
streamDescription.mBytesPerPacket = 1000
let outputFormat = AVAudioFormat(streamDescription: &streamDescription)!
guard let converter: AVAudioConverter = AVAudioConverter(from: inputFormat, to: outputFormat) else {
print("Can't convert in to this format")
return
}
inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat) { (buffer, time) in
print("Buffer format: \(buffer.format)")
var newBufferAvailable = true
let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return buffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: AVAudioFrameCount(outputFormat.sampleRate) * buffer.frameLength / AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError?
let status = converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
assert(status != .error)
print("Converted buffer format:", convertedBuffer.format)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("Can't start the engine: \(error)")
}
}
But currently, the converter can't convert the input format to my output format and I don't understand why.
If I change my output format to something like that:
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 8000.0, channels: 1, interleaved: false)!
Then it works.
Your streamDescription is wrong, you hadn't filled in all the fields, and mBytesPerPacket was wrong - this is not the same kind of packet your protocol calls for. For uncompressed audio (like LPCM) AudioStreamBasicDescription requires this field to be 1. If your protocol requires samples to be in groups of 1000, then you will have to do that.
Try this
var streamDescription = AudioStreamBasicDescription()
streamDescription.mSampleRate = 8000.0
streamDescription.mFormatID = kAudioFormatLinearPCM
streamDescription.mFormatFlags = kAudioFormatFlagIsSignedInteger // no endian flag means little endian
streamDescription.mBytesPerPacket = 2
streamDescription.mFramesPerPacket = 1
streamDescription.mBytesPerFrame = 2
streamDescription.mChannelsPerFrame = 1
streamDescription.mBitsPerChannel = 16
streamDescription.mReserved = 0
I try to record audio using AUAudioUnit. I successfully get audio buffers but I also hear recorded sound through the speaker while recording. The question is how to get just buffers not passing sound to the speaker?
func startRecording() {
setupAudioSessionForRecording()
do {
let audioComponentDescription = AudioComponentDescription(
componentType: kAudioUnitType_Output,
componentSubType: kAudioUnitSubType_RemoteIO,
componentManufacturer: kAudioUnitManufacturer_Apple,
componentFlags: 0,
componentFlagsMask: 0 )
try auAudioUnit = AUAudioUnit(componentDescription: audioComponentDescription)
let audioFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
sampleRate: sampleRate,
interleaved: true,
channelLayout: AVAudioChannelLayout(layoutTag: kAudioChannelLayoutTag_Mono)!)
try auAudioUnit.inputBusses[0].setFormat(audioFormat)
try auAudioUnit.outputBusses[1].setFormat(audioFormat)
} catch {
print(error)
}
auAudioUnit.isInputEnabled = true
auAudioUnit.outputProvider = {(actionFlags, timestamp, frameCount, inputBusNumber, inputData) -> AUAudioUnitStatus in
let err : OSStatus = self.auAudioUnit.renderBlock(actionFlags,
timestamp,
frameCount,
1,
inputData,
.none)
if err == noErr {
self.processMicrophoneBuffer(inputDataList: inputData,
frameCount: UInt32(frameCount) )
} else {
print(err)
}
return err
}
do {
try auAudioUnit.allocateRenderResources()
try auAudioUnit.startHardware()
} catch {
print(error)
}
}
SOLUTION:
The solution was found here: https://gist.github.com/leonid-s-usov/dcd674b0a8baf96123cac6c4e08e3e0c
The idea is to call render block inside inputHandler instead of outputProvider
auAudioUnit.inputHandler = { (actionFlags, timestamp, frameCount, inputBusNumber) in
var bufferList = AudioBufferList(mNumberBuffers: 1,
mBuffers: AudioBuffer(
mNumberChannels: audioFormat!.channelCount,
mDataByteSize: 0,
mData: nil))
let err: OSStatus = block(actionFlags,
timestamp,
frameCount,
inputBusNumber,
&bufferList,
.none)
if err == noErr {
self.processMicrophoneBuffer(inputDataList: inputData,
frameCount: UInt32(frameCount) )
} else {
print(err)
}
}
One way to silence RemoteIO output is to zero the contents (frameCount samples) of the audio buffers in your recorded input data after you process (copy) each buffer.
We are working with Audio Unit on iOS as part of a VOIP application. We have successfully played audio but now we would like to control the playing speed and pitch of audio in real time. We are getting real time audio bytes from the UDP socket.
This is the code for audio unit init for playing:
init(_ client: UDPClient, _ tcpClient: TCPClient, _ opusHelper: OpusHelper, _ tvTemp: UILabel) {
super.init()
let success = initCircularBuffer(&circularBuffer, 4096)
if success {
print("Circular buffer init was successful")
} else {
print("Circular buffer init not successful")
}
self.tvTemp = tvTemp
self.opusHelper = opusHelper
monotonicTimer = MonotonicTimer()
udpClient = client
self.tcpClient = tcpClient
var desc = AudioComponentDescription(
componentType: OSType(kAudioUnitType_Output),
componentSubType: OSType(kAudioUnitSubType_VoiceProcessingIO),
componentManufacturer: OSType(kAudioUnitManufacturer_Apple),
componentFlags: 0,
componentFlagsMask: 0
)
let inputComponent = AudioComponentFindNext(nil, &desc)
status = AudioComponentInstanceNew(inputComponent!, &audioUnit)
if status != noErr {
print("Audio component instance new error \(status!)")
}
// Enable IO for recording
var flag: UInt32 = 1
// Enable IO for playback
status = AudioUnitSetProperty(
audioUnit!,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Output,
kOutputBus,
&flag,
MemoryLayoutStride.SizeOf32(flag)
)
if status != noErr {
print("Enable IO for playback error \(status!)")
}
var ioFormat = CAStreamBasicDescription(
sampleRate: 48000.0,
numChannels: 1,
pcmf: .int16,
isInterleaved: false
)
status = AudioUnitSetProperty(
audioUnit!,
AudioUnitPropertyID(kAudioUnitProperty_StreamFormat),
AudioUnitScope(kAudioUnitScope_Input),
0,
&ioFormat!,
MemoryLayoutStride.SizeOf32(ioFormat)
)
if status != noErr {
print("Unable to set stream format input to output \(status!)")
}
var playbackCallback = AURenderCallbackStruct(
inputProc: AudioController_PlaybackCallback,
inputProcRefCon: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque())
)
status = AudioUnitSetProperty(
audioUnit!,
AudioUnitPropertyID(kAudioUnitProperty_SetRenderCallback),
AudioUnitScope(kAudioUnitScope_Input),
kOutputBus,
&playbackCallback,
MemoryLayout<AURenderCallbackStruct>.size.ui
)
if status != noErr {
print("Failed to set recording render callback \(status!)")
}
status = AudioUnitInitialize(audioUnit!)
if status != noErr {
print("Failed to initialize audio unit \(status!)")
}
}
We are putting audio data from UDP in TPCircular Buffer:
let decodedData = self.opusHelper?.decodeStream(of: self.jitterGet.buffer)
let _ = TPCircularBufferProduceBytes(&self.circularBuffer, decodedData, UInt32(decodedData!.count * 2))
This is how we are are playing the audio.
func performPlayback(
_ ioActionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>,
inTimeStamp: UnsafePointer<AudioTimeStamp>,
inBufNumber: UInt32,
inNumberFrames: UInt32,
ioData: UnsafeMutablePointer<AudioBufferList>
) -> OSStatus {
let buffer = ioData[0].mBuffers
let bytesToCopy = ioData[0].mBuffers.mDataByteSize
var bufferTail: UnsafeMutableRawPointer?
// print("BYTES TO COPY: \(bytesToCopy)")
self.availableBytes = 0
bufferTail = TPCircularBufferTail(&self.circularBuffer, &self.availableBytes)
bytesToWrite = min(bytesToCopy, self.availableBytes)
print("BYTES TO WRITE: \(bytesToWrite)")
if bytesToWrite >= 3840 {
memcpy(buffer.mData, bufferTail, Int(bytesToWrite))
TPCircularBufferConsume(&self.circularBuffer, bytesToWrite)
} else {
let silence = [Int16](repeating: 0, count: Int(bytesToCopy))
memcpy(buffer.mData, silence, Int(bytesToCopy))
}
return noErr
}
Now we want to change the SPEED and PITCH of the playing audio, can you please guide us on how to integrate Varispeed and Timepitch in our current configuration, as we found out that these properties may help us.
https://stackoverflow.com/a/59061396/12020007 #hotpaw2
Your answer pointed us to the right path. Now we are looking to change speed and pitch.
I am getting audio PCM 16bit Mono data from udp packets like this:
(void)udpSocket:(GCDAsyncUdpSocket *)sock didReceiveData:(NSData *)data
fromAddress:(NSData *)address
withFilterContext:(id)filterContext
{
...
}
I am converting this data into PCM buffer by calling a swift function as below:
func toPCMBuffer(data: NSData) -> AVAudioPCMBuffer {
let audioFormat = AVAudioFormat(commonFormat: AVAudioCommonFormat.PCMFormatFloat32, sampleRate: 8000, channels: 1, interleaved: false) // given NSData audio format
var PCMBuffer = AVAudioPCMBuffer(PCMFormat: audioFormat, frameCapacity:1024*10)
PCMBuffer.frameLength = PCMBuffer.frameCapacity
let channels = UnsafeBufferPointer(start: PCMBuffer.floatChannelData, count: Int(PCMBuffer.format.channelCount))
data.getBytes(UnsafeMutablePointer<Void>(channels[0]) , length: data.length)
return PCMBuffer
}
Data is converted to PCM buffer and i can see its length in logs.
But when i try to play the buffer i hear no voice.
Here is the code for receiving:
func toPCMBuffer(data: NSData) -> AVAudioPCMBuffer {
let audioFormat = AVAudioFormat(commonFormat: AVAudioCommonFormat.PCMFormatFloat32, sampleRate: 8000, channels: 1, interleaved: false) // given NSData audio format
var PCMBuffer = AVAudioPCMBuffer(PCMFormat: audioFormat, frameCapacity:1024*10)
PCMBuffer.frameLength = PCMBuffer.frameCapacity
let channels = UnsafeBufferPointer(start: PCMBuffer.floatChannelData, count: Int(PCMBuffer.format.channelCount))
data.getBytes(UnsafeMutablePointer<Void>(channels[0]) , length: data.length)
var mainMixer = audioEngine.mainMixerNode
audioEngine.attachNode(audioFilePlayer)
audioEngine.connect(audioFilePlayer, to:mainMixer, format: PCMBuffer.format)
audioEngine.startAndReturnError(nil)
audioFilePlayer.play()
audioFilePlayer.scheduleBuffer(PCMBuffer, atTime: nil, options: nil, completionHandler: nil)
return PCMBuffer
}
ended up using an objective-c function:data is getting converted fine
-(AudioBufferList *) getBufferListFromData: (NSData *) data
{
if (data.length > 0)
{
NSUInteger len = [data length];
//NSData *d2 = [data subdataWithRange:NSMakeRange(4, 1028)];
//I guess you can use Byte*, void* or Float32*. I am not sure if that makes any difference.
Byte* byteData = (Byte*) malloc (len);
memcpy (byteData, [data bytes], len);
if (byteData)
{
AudioBufferList * theDataBuffer =(AudioBufferList*)malloc(sizeof(AudioBufferList) * 1);
theDataBuffer->mNumberBuffers = 1;
theDataBuffer->mBuffers[0].mDataByteSize =(UInt32) len;
theDataBuffer->mBuffers[0].mNumberChannels = 1;
theDataBuffer->mBuffers[0].mData = byteData;
// Read the data into an AudioBufferList
return theDataBuffer;
}
}
return nil;
}