I am trying to read the frequencies values from a CMSampleBuffer returned by captureOutput of AVCaptureAudioDataOutputSampleBufferDelegate.
The idea is to create a AVAudioPCMBuffer so that then I can read its floatChannelData. But I am not sure how to pass the buffer to it.
I guess I could create it with:
public func captureOutput(_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection) {
guard let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) else {
return
}
let length = CMBlockBufferGetDataLength(blockBuffer)
let audioFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 44100, channels: 1, interleaved: false)
let pcmBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat!, frameCapacity: AVAudioFrameCount(length))
pcmBuffer?.frameLength = pcmBuffer!.frameCapacity
But how could I fill its data?
Something along these lines should help:
var asbd = CMSampleBufferGetFormatDescription(sampleBuffer)!.audioStreamBasicDescription!
var audioBufferList = AudioBufferList()
var blockBuffer : CMBlockBuffer?
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
sampleBuffer,
bufferListSizeNeededOut: nil,
bufferListOut: &audioBufferList,
bufferListSize: MemoryLayout<AudioBufferList>.size,
blockBufferAllocator: nil,
blockBufferMemoryAllocator: nil,
flags: 0,
blockBufferOut: &blockBuffer
)
let mBuffers = audioBufferList.mBuffers
let frameLength = AVAudioFrameCount(Int(mBuffers.mDataByteSize) / MemoryLayout<Float>.size)
let pcmBuffer = AVAudioPCMBuffer(pcmFormat: AVAudioFormat(streamDescription: &asbd)!, frameCapacity: frameLength)!
pcmBuffer.frameLength = frameLength
pcmBuffer.mutableAudioBufferList.pointee.mBuffers = mBuffers
pcmBuffer.mutableAudioBufferList.pointee.mNumberBuffers = 1
This seems to create a valid AVAudioPCMBuffer at the end of it inside a capture session. But it's at the wrong frame length for my use case right now, so need to do some further buffering.
Related
I am trying PCM (microphone input)->G711 and am having trouble getting it to work.
With the current implementation, the conversion itself succeeds, but it is as if there is no data stored in the buffer.
I have been doing a lot of research, but I can't seem to find the cause of the problem, so I would appreciate your advice.
var audioEngine = AVAudioEngine()
var convertFormatBasicDescription = AudioStreamBasicDescription(
mSampleRate: 8000,
mFormatID: kAudioFormatULaw,
mFormatFlags: AudioFormatFlags(kAudioFormatULaw),
mBytesPerPacket: 1,
mFramesPerPacket: 1,
mBytesPerFrame: 1,
mChannelsPerFrame: 1,
mBitsPerChannel: 8,
mReserved: 0
)
func convert () {
let inputNode = audioEngine.inputNode
let micInputFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 44100, channels: 1, interleaved: true)!
let convertFormat = AVAudioFormat(streamDescription: &convertFormatBasicDescription)!
let converter = AVAudioConverter(from: micInputFormat, to: convertFormat)!
inputNode.installTap(onBus: 0, bufferSize: 1024, format: micInputFormat) { [weak self] (buffer, time) in
var newBufferAvailable = true
let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return buffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: convertFormat, frameCapacity: AVAudioFrameCount(convertFormat.sampleRate) * buffer.frameLength / AVAudioFrameCount(buffer.format.sampleRate))!
convertedBuffer.frameLength = convertedBuffer.frameCapacity
var error: NSError?
let status = converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
print(convertedBuffer.floatChannelData) <- nil
}
audioEngine.prepare()
try audioEngine.start()
}
Thank you.
It was audioBufferList.pointee.mBuffers.mData, not floatChannelData, that was needed to get the data converted to G711.
I'm developing an application that should record a user's voice and stream it to a custom device via the MQTT protocol.
The audio specification for the custom device: little-endian, unsigned, 16-bit LPCM at 8khz sample rate. Packets should be 1000 bytes each.
I'm not familiar with AudioEngine and I found this sample of code which I believe fits my case:
func startRecord() {
audioEngine = AVAudioEngine()
let bus = 0
let inputNode = audioEngine.inputNode
let inputFormat = inputNode.outputFormat(forBus: bus)
var streamDescription = AudioStreamBasicDescription()
streamDescription.mFormatID = kAudioFormatLinearPCM.littleEndian
streamDescription.mSampleRate = 8000.0
streamDescription.mChannelsPerFrame = 1
streamDescription.mBitsPerChannel = 16
streamDescription.mBytesPerPacket = 1000
let outputFormat = AVAudioFormat(streamDescription: &streamDescription)!
guard let converter: AVAudioConverter = AVAudioConverter(from: inputFormat, to: outputFormat) else {
print("Can't convert in to this format")
return
}
inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat) { (buffer, time) in
print("Buffer format: \(buffer.format)")
var newBufferAvailable = true
let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return buffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: AVAudioFrameCount(outputFormat.sampleRate) * buffer.frameLength / AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError?
let status = converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
assert(status != .error)
print("Converted buffer format:", convertedBuffer.format)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("Can't start the engine: \(error)")
}
}
But currently, the converter can't convert the input format to my output format and I don't understand why.
If I change my output format to something like that:
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 8000.0, channels: 1, interleaved: false)!
Then it works.
Your streamDescription is wrong, you hadn't filled in all the fields, and mBytesPerPacket was wrong - this is not the same kind of packet your protocol calls for. For uncompressed audio (like LPCM) AudioStreamBasicDescription requires this field to be 1. If your protocol requires samples to be in groups of 1000, then you will have to do that.
Try this
var streamDescription = AudioStreamBasicDescription()
streamDescription.mSampleRate = 8000.0
streamDescription.mFormatID = kAudioFormatLinearPCM
streamDescription.mFormatFlags = kAudioFormatFlagIsSignedInteger // no endian flag means little endian
streamDescription.mBytesPerPacket = 2
streamDescription.mFramesPerPacket = 1
streamDescription.mBytesPerFrame = 2
streamDescription.mChannelsPerFrame = 1
streamDescription.mBitsPerChannel = 16
streamDescription.mReserved = 0
I have, for the past week, been trying to take audio from the microphone (on iOS), down sample it and write that to a '.aac' file.
I've finally gotten to point where it's almost working
let inputNode = audioEngine.inputNode
let inputFormat = inputNode.outputFormat(forBus: 0)
let bufferSize = UInt32(4096)
//let sampleRate = 44100.0
let sampleRate = 8000
let bitRate = sampleRate * 16
let fileUrl = url(appending: "NewRecording.aac")
print("Write to \(fileUrl)")
do {
outputFile = try AVAudioFile(forWriting: fileUrl,
settings: [
AVFormatIDKey: kAudioFormatMPEG4AAC,
AVSampleRateKey: sampleRate,
AVEncoderBitRateKey: bitRate,
AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue,
AVNumberOfChannelsKey: 1],
commonFormat: .pcmFormatFloat32,
interleaved: false)
} catch let error {
print("Failed to create audio file for \(fileUrl): \(error)")
return
}
recordButton.setImage(RecordingStyleKit.imageOfMicrophone(fill: .red), for: [])
// Down sample the audio to 8kHz
let fmt = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: Double(sampleRate), channels: 1, interleaved: false)!
let converter = AVAudioConverter(from: inputFormat, to: fmt)!
inputNode.installTap(onBus: 0, bufferSize: AVAudioFrameCount(bufferSize), format: inputFormat) { (buffer, time) in
let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer
}
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: fmt,
frameCapacity: AVAudioFrameCount(fmt.sampleRate) * buffer.frameLength / AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError? = nil
let status = converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
assert(status != .error)
if let outputFile = self.outputFile {
do {
try outputFile.write(from: convertedBuffer)
}
catch let error {
print("Write failed: \(error)")
}
}
}
audioEngine.prepare()
do {
try audioEngine.start()
}
catch {
print(error.localizedDescription)
}
The problem is, the resulting file MUST be in MPEG ADTS, AAC, v4 LC, 8 kHz, monaural format, but the code above only generates MPEG ADTS, AAC, v2 LC, 8 kHz, monaural
That is, it MUST be v4, not v2 (I have no choice)
(This result is generated by using file {name} on the command line to dump it's properties. I also use MediaInfo to provide additional information)
I've been trying to figure out if there is someway to provide a hint or setting to AVAudioFile which will change the LC (Low Complexity) version from 2 to 4?
I've been scanning through the docs and examples but can't seem to find any suggestions
I've been using the following methods to convert an AVAudioPCMBuffer to [UInt8] and then [UInt8] back to an AVAudioPCMBuffer. The problem is that every single conversion is a total of 17640 bytes, which to stream over MultipeerConnectivity is a lot. In fact, I think my stream ends up reading data slower than data is coming in, since if I end the stream on one device I can see that the other continues to read data until it realizes that the stream has ended.
Here is my conversion of AVAudioPCMBuffer to [UInt8]. Credit for this answer goes to Rhythmic Fistman from this answer.
func audioBufferToBytes(audioBuffer: AVAudioPCMBuffer) -> [UInt8] {
let srcLeft = audioBuffer.floatChannelData![0]
let bytesPerFrame = audioBuffer.format.streamDescription.pointee.mBytesPerFrame
let numBytes = Int(bytesPerFrame * audioBuffer.frameLength)
// initialize bytes by 0
var audioByteArray = [UInt8](repeating: 0, count: numBytes)
srcLeft.withMemoryRebound(to: UInt8.self, capacity: numBytes) { srcByteData in
audioByteArray.withUnsafeMutableBufferPointer {
$0.baseAddress!.initialize(from: srcByteData, count: numBytes)
}
}
return audioByteArray
}
And here is [UInt8] to AVAudioPCMBuffer
func bytesToAudioBuffer(_ buf: [UInt8]) -> AVAudioPCMBuffer {
let fmt = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 8000, channels: 1, interleaved: false)
let frameLength = UInt32(buf.count) / fmt.streamDescription.pointee.mBytesPerFrame
let audioBuffer = AVAudioPCMBuffer(pcmFormat: fmt, frameCapacity: frameLength)
audioBuffer.frameLength = frameLength
let dstLeft = audioBuffer.floatChannelData![0]
buf.withUnsafeBufferPointer {
let src = UnsafeRawPointer($0.baseAddress!).bindMemory(to: Float.self, capacity: Int(frameLength))
dstLeft.initialize(from: src, count: Int(frameLength))
}
return audioBuffer
}
Can anyone help me compress this data so that it is easier to send over a stream, and then decompress it so it can be played?
I'm trying to live volume levels using AVCaptureDevice etc it compiles and runs but the values just seem to be random and I keep getting overflow errors as well.
EDIT:
also is it normal for the RMS range to be 0 to about 20000?
if let audioCaptureDevice : AVCaptureDevice = AVCaptureDevice.defaultDeviceWithMediaType(AVMediaTypeAudio){
try audioCaptureDevice.lockForConfiguration()
let audioInput = try AVCaptureDeviceInput(device: audioCaptureDevice)
audioCaptureDevice.unlockForConfiguration()
if(captureSession.canAddInput(audioInput)){
captureSession.addInput(audioInput)
print("added input")
}
let audioOutput = AVCaptureAudioDataOutput()
audioOutput.setSampleBufferDelegate(self, queue: GlobalUserInitiatedQueue)
if(captureSession.canAddOutput(audioOutput)){
captureSession.addOutput(audioOutput)
print("added output")
}
//supposed to start session not on UI queue coz it takes a while
dispatch_async(GlobalUserInitiatedQueue) {
print("starting captureSession")
self.captureSession.startRunning()
}
}
...
func captureOutput(captureOutput: AVCaptureOutput!, let didOutputSampleBuffer sampleBuffer: CMSampleBuffer!, fromConnection connection: AVCaptureConnection!) {
// Needs to be initialized somehow, even if we take only the address
var audioBufferList = AudioBufferList(mNumberBuffers: 1,
mBuffers: AudioBuffer(mNumberChannels: 1, mDataByteSize: 0, mData: nil))
//this needs to be in method otherwise only runs 125 times?
var blockBuffer: CMBlockBuffer?
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
sampleBuffer,
nil,
&audioBufferList,
sizeof(audioBufferList.dynamicType),
nil,
nil,
UInt32(kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment),
&buffer
)
let abl = UnsafeMutableAudioBufferListPointer(&audioBufferList)
for buffer in abl{
let samples = UnsafeMutableBufferPointer<Int16>(start: UnsafeMutablePointer(buffer.mData),
count: Int(buffer.mDataByteSize)/sizeof(Int16))
var sum:Int = 0
for sample in samples {
sum = sum + Int(sample*sample)
}
let rms = sqrt(Double(sum)/count)
}
Use AVCaptureAudioDataOutputSampleBufferDelegate's method
captureOutput(captureOutput: AVCaptureOutput!, let didOutputSampleBuffer sampleBuffer: CMSampleBuffer!, fromConnection connection: AVCaptureConnection!)
to get AVCaptureConnection from last parameter.
Then get AVCaptureAudioChannel from connection.audioChannels
Then you can get volume levels from it:
audioChannel.averagePowerLevel
audioChannel.peakHoldLevel
Hey I don't understand much of it but here is a working Swift 5 version:
func captureOutput(_ output : AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection : AVCaptureConnection) {
var buffer: CMBlockBuffer? = nil
// Needs to be initialized somehow, even if we take only the address
let convenianceBuffer = AudioBuffer(mNumberChannels: 1, mDataByteSize: 0, mData: nil)
var audioBufferList = AudioBufferList(mNumberBuffers: 1,
mBuffers: convenianceBuffer)
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
sampleBuffer,
bufferListSizeNeededOut: nil,
bufferListOut: &audioBufferList,
bufferListSize: MemoryLayout<AudioBufferList>.size(ofValue: audioBufferList),
blockBufferAllocator: nil,
blockBufferMemoryAllocator: nil,
flags: UInt32(kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment),
blockBufferOut: &buffer
)
let abl = UnsafeMutableAudioBufferListPointer(&audioBufferList)
for buffer in abl {
let originRawPtr = buffer.mData
let ptrDataSize = Int(buffer.mDataByteSize)
// From raw pointer to typed Int16 pointer
let buffPtrInt16 = originRawPtr?.bindMemory(to: Int16.self, capacity: ptrDataSize)
// From pointer typed Int16 to pointer of [Int16]
// So we can iterate on it simply
let unsafePtrByteSize = ptrDataSize/Int16.bitWidth
let samples = UnsafeMutableBufferPointer<Int16>(start: buffPtrInt16,
count: unsafePtrByteSize)
// Average of each sample squared, then root squared
let sumOfSquaredSamples = samples.map(Float.init).reduce(0) { $0 + $1*$1 }
let averageOfSomething = sqrt(sumOfSquaredSamples / Float(samples.count))
DispatchQueue.main.async {
print("Calulcus of something: \(String(averageOfSomething))" )
}
}
}
It appears I have it working. I casted sample to an Int64 before doing any manipulations.
for buffer in abl{
let samples = UnsafeMutableBufferPointer<Int16>(start: UnsafeMutablePointer(buffer.mData),
count: Int(buffer.mDataByteSize)/sizeof(Int16))
var sum:Int64 = 0
for sample in samples {
let s = Int64(sample)
sum +=s*s
}
dispatch_async(dispatch_get_main_queue()) {
self.volLevel.text = String(sqrt(Float(sum/Int64(samples.count))))
}
I've played with your example. This is a full working swift 2 code snippet:
// also define a variable in class scope, otherwise captureOutput will not be called
var session : AVCaptureSession!
func startCapture() {
if let device : AVCaptureDevice = AVCaptureDevice.defaultDeviceWithMediaType(AVMediaTypeAudio){
do {
self.session = AVCaptureSession()
try device.lockForConfiguration()
let audioInput = try AVCaptureDeviceInput(device: device)
device.unlockForConfiguration()
if(self.session.canAddInput(audioInput)){
self.session.addInput(audioInput)
print("added input")
}
let audioOutput = AVCaptureAudioDataOutput()
audioOutput.setSampleBufferDelegate(self, queue: dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0))
if(self.session.canAddOutput(audioOutput)){
self.session.addOutput(audioOutput)
print("added output")
}
//supposed to start session not on UI queue coz it takes a while
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0)) {
print("starting captureSession")
self.session.startRunning()
}
} catch {
}
}
}
func captureOutput(captureOutput: AVCaptureOutput!, let didOutputSampleBuffer sampleBuffer: CMSampleBuffer!, fromConnection connection: AVCaptureConnection!) {
var buffer: CMBlockBuffer? = nil
// Needs to be initialized somehow, even if we take only the address
var audioBufferList = AudioBufferList(mNumberBuffers: 1,
mBuffers: AudioBuffer(mNumberChannels: 1, mDataByteSize: 0, mData: nil))
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
sampleBuffer,
nil,
&audioBufferList,
sizeof(audioBufferList.dynamicType),
nil,
nil,
UInt32(kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment),
&buffer
)
let abl = UnsafeMutableAudioBufferListPointer(&audioBufferList)
for buffer in abl {
let samples = UnsafeMutableBufferPointer<Int16>(start: UnsafeMutablePointer(buffer.mData),
count: Int(buffer.mDataByteSize)/sizeof(Int16))
var sum:Int64 = 0
for sample in samples {
let s = Int64(sample)
sum = (sum + s*s)
}
dispatch_async(dispatch_get_main_queue()) {
print( String(sqrt(Float(sum/Int64(samples.count)))))
}
}
}