Swift AVFoundation timing info for audio measurements - ios

I am creating an application that will take an audio measurement by playing some stimulus data and recording the microphone input, and then analysing the data.
I am having trouble accounting for the time taken to initialise and start the audio engine, as this varies each time and is also dependant on the hardware used, etc.
So, I have an audio engine and have installed a Tap the hardware input, with input 1 being the microphone recording, and input 2 being a reference input (also from the hardware). The output is physically Y-Split and fed back into input 2.
The app initialises the engine, plays the stimulus audio plus 1 second of silence (to allow propagation time for the microphone to record the whole signal back), and then stop and close the engine.
I write the two input buffers as a WAV file so that I can import this into an an existing DAW. to visually examine the signals. I can see that each time I take a measurement, the time difference between the two signals is different (despite the fact the microphone is not moved and the hardware has stayed the same). I am assuming this is to do with the latency of the hardware, the time taken to initialise the engine and the way the divice distributes tasks.
I have tried to capture the absolute time using mach_absolute_time of the first buffer callback on each installTap function and subtracting the two, and I can see that this does vary quite a lot with each call:
class newAVAudioEngine{
var engine = AVAudioEngine()
var audioBuffer = AVAudioPCMBuffer()
var running = true
var in1Buf:[Float]=Array(repeating:0, count:totalRecordSize)
var in2Buf:[Float]=Array(repeating:0, count:totalRecordSize)
var buf1current:Int = 0
var buf2current:Int = 0
var in1firstRun:Bool = false
var in2firstRun:Bool = false
var in1StartTime = 0
var in2startTime = 0
func measure(inputSweep:SweepFilter) -> measurement {
initializeEngine(inputSweep: inputSweep)
while running == true {
}
let measureResult = measurement.init(meas: meas,ref: ref)
return measureResult
}
func initializeEngine(inputSweep:SweepFilter) {
buf1current = 0
buf2current = 0
in1StartTime = 0
in2startTime = 0
in1firstRun = true
in2firstRun = true
in1Buf = Array(repeating:0, count:totalRecordSize)
in2Buf = Array(repeating:0, count:totalRecordSize)
engine.stop()
engine.reset()
engine = AVAudioEngine()
let srcNode = AVAudioSourceNode { _, _, frameCount, AudioBufferList -> OSStatus in
let ablPointer = UnsafeMutableAudioBufferListPointer(AudioBufferList)
if (Int(frameCount) + time) <= inputSweep.stimulus.count {
for frame in 0..<Int(frameCount) {
let value = inputSweep.stimulus[frame + time]
for buffer in ablPointer {
let buf: UnsafeMutableBufferPointer<Float> = UnsafeMutableBufferPointer(buffer)
buf[frame] = value
}
}
time += Int(frameCount)
return noErr
} else {
for frame in 0..<Int(frameCount) {
let value = 0
for buffer in ablPointer {
let buf: UnsafeMutableBufferPointer<Float> = UnsafeMutableBufferPointer(buffer)
buf[frame] = Float(value)
}
}
}
return noErr
}
let format = engine.outputNode.inputFormat(forBus: 0)
let stimulusFormat = AVAudioFormat(commonFormat: format.commonFormat,
sampleRate: Double(sampleRate),
channels: 1,
interleaved: format.isInterleaved)
do {
try AVAudioSession.sharedInstance().setCategory(.playAndRecord)
let ioBufferDuration = 128.0 / 44100.0
try AVAudioSession.sharedInstance().setPreferredIOBufferDuration(ioBufferDuration)
} catch {
assertionFailure("AVAudioSession setup failed")
}
let input = engine.inputNode
let inputFormat = input.inputFormat(forBus: 0)
print("InputNode Format is \(inputFormat)")
engine.attach(srcNode)
engine.connect(srcNode, to: engine.mainMixerNode, format: stimulusFormat)
if internalRefLoop == true {
srcNode.installTap(onBus: 0, bufferSize: 1024, format: stimulusFormat, block: {(buffer: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in
if self.in2firstRun == true {
var info = mach_timebase_info()
mach_timebase_info(&info)
let currentTime = mach_absolute_time()
let nanos = currentTime * UInt64(info.numer) / UInt64(info.denom)
self.in2startTime = Int(nanos)
self.in2firstRun = false
}
do {
let floatData = buffer.floatChannelData?.pointee
for frame in 0..<buffer.frameLength{
if (self.buf2current + Int(frame)) < totalRecordSize{
self.in2Buf[self.buf2current + Int(frame)] = floatData![Int(frame)]
}
}
self.buf2current += Int(buffer.frameLength)
if (self.numberOfSamples + Int(buffer.frameLength)) <= totalRecordSize{
try self.stimulusFile.write(from: buffer)
self.numberOfSamples += Int(buffer.frameLength) } else {
self.engine.stop()
self.running = false
}
} catch {
print(NSString(string: "write failed"))
}
})
}
let micAudioConverter = AVAudioConverter(from: inputFormat, to: stimulusFormat!)
var micChannelMap:[NSNumber] = [0,-1]
micAudioConverter?.channelMap = micChannelMap
let refAudioConverter = AVAudioConverter(from: inputFormat, to: stimulusFormat!)
var refChannelMap:[NSNumber] = [1,-1]
refAudioConverter?.channelMap = refChannelMap
//Measurement Tap
engine.inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat, block: {(buffer2: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in
//print(NSString(string:"writing"))
if self.in1firstRun == true {
var info = mach_timebase_info()
mach_timebase_info(&info)
let currentTime = mach_absolute_time()
let nanos = currentTime * UInt64(info.numer) / UInt64(info.denom)
self.in1StartTime = Int(nanos)
self.in1firstRun = false
}
do {
let micConvertedBuffer = AVAudioPCMBuffer(pcmFormat: stimulusFormat!, frameCapacity: buffer2.frameCapacity)
let micInputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer2
}
var error: NSError? = nil
//let status = audioConverter.convert(to: convertedBuffer!, error: &error, withInputFrom: inputBlock)
let status = micAudioConverter?.convert(to: micConvertedBuffer!, error: &error, withInputFrom: micInputBlock)
//print(status)
let floatData = micConvertedBuffer?.floatChannelData?.pointee
for frame in 0..<micConvertedBuffer!.frameLength{
if (self.buf1current + Int(frame)) < totalRecordSize{
self.in1Buf[self.buf1current + Int(frame)] = floatData![Int(frame)]
}
if (self.buf1current + Int(frame)) >= totalRecordSize {
self.engine.stop()
self.running = false
}
}
self.buf1current += Int(micConvertedBuffer!.frameLength)
try self.measurementFile.write(from: micConvertedBuffer!)
} catch {
print(NSString(string: "write failed"))
}
if internalRefLoop == false {
if self.in2firstRun == true{
var info = mach_timebase_info()
mach_timebase_info(&info)
let currentTime = mach_absolute_time()
let nanos = currentTime * UInt64(info.numer) / UInt64(info.denom)
self.in2startTime = Int(nanos)
self.in2firstRun = false
}
do {
let refConvertedBuffer = AVAudioPCMBuffer(pcmFormat: stimulusFormat!, frameCapacity: buffer2.frameCapacity)
let refInputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer2
}
var error: NSError? = nil
let status = refAudioConverter?.convert(to: refConvertedBuffer!, error: &error, withInputFrom: refInputBlock)
//print(status)
let floatData = refConvertedBuffer?.floatChannelData?.pointee
for frame in 0..<refConvertedBuffer!.frameLength{
if (self.buf2current + Int(frame)) < totalRecordSize{
self.in2Buf[self.buf2current + Int(frame)] = floatData![Int(frame)]
}
}
if (self.numberOfSamples + Int(buffer2.frameLength)) <= totalRecordSize{
self.buf2current += Int(refConvertedBuffer!.frameLength)
try self.stimulusFile.write(from: refConvertedBuffer!) } else {
self.engine.stop()
self.running = false
}
} catch {
print(NSString(string: "write failed"))
}
}
}
)
assert(engine.inputNode != nil)
running = true
try! engine.start()
So The above method is my entire class. Currently each buffer call on installTap writes the input directly to a WAV file. This is where I can see the two end results differing each time. I have tried adding the startTime variable and subtracting the two, but the results still vary.
Do I need to take into account my output will have latency too that may vary with each call? If so, how do I add this time into the equation? What I am looking for is for the two inputs and outputs to all have relative time, so that I can compare them. The different hardware latency will not matter too much, as long as I can identify the end call times.

If you are doing real-time measurements, you might want to use AVAudioSinkNode instead of a Tap. The Sink Node is new and introduced along with AVAudioSourceNode you are using. With installing a Tap you won't be able to get precise timing.

Related

Set left and right headphone volume using two different sliders

I am generating a wave sound for different frequencies and user should hear this wave sound using headphones only and he/she will set left and right headphone volumes using two different sliders. To achieve wave sound I wrote below code which works perfect.
But problem is: From last 5 days I am trying to set volume for left and right headphones separately, but no luck.
class Synth {
// MARK: Properties
public static let shared = Synth()
public var volume: Float {
set {
audioEngine.mainMixerNode.outputVolume = newValue
}
get {
audioEngine.mainMixerNode.outputVolume
}
}
public var frequencyRampValue: Float = 0
public var frequency: Float = 440 {
didSet {
if oldValue != 0 {
frequencyRampValue = frequency - oldValue
} else {
frequencyRampValue = 0
}
}
}
private var audioEngine: AVAudioEngine
private lazy var sourceNode = AVAudioSourceNode { _, _, frameCount, audioBufferList in
let ablPointer = UnsafeMutableAudioBufferListPointer(audioBufferList)
let localRampValue = self.frequencyRampValue
let localFrequency = self.frequency - localRampValue
let period = 1 / localFrequency
for frame in 0..<Int(frameCount) {
let percentComplete = self.time / period
let sampleVal = self.signal(localFrequency + localRampValue * percentComplete, self.time)
self.time += self.deltaTime
self.time = fmod(self.time, period)
for buffer in ablPointer {
let buf: UnsafeMutableBufferPointer<Float> = UnsafeMutableBufferPointer(buffer)
buf[frame] = sampleVal
}
}
self.frequencyRampValue = 0
return noErr
}
private var time: Float = 0
private let sampleRate: Double
private let deltaTime: Float
private var signal: Signal
// MARK: Init
init(signal: #escaping Signal = Oscillator.square) {
audioEngine = AVAudioEngine()
let mainMixer = audioEngine.mainMixerNode
let outputNode = audioEngine.outputNode
let format = outputNode.inputFormat(forBus: 0)
sampleRate = format.sampleRate
deltaTime = 1 / Float(sampleRate)
self.signal = signal
let inputFormat = AVAudioFormat(commonFormat: format.commonFormat,
sampleRate: format.sampleRate,
channels: 1,
interleaved: format.isInterleaved)
audioEngine.attach(sourceNode)
audioEngine.connect(sourceNode, to: mainMixer, format: inputFormat)
audioEngine.connect(mainMixer, to: outputNode, format: nil)
mainMixer.outputVolume = 0
audioEngine.mainMixerNode.pan = 100 // this does not work,
//audioEngine.mainMixerNode.pan = 1.0 // this also does not work
do {
try audioEngine.start()
} catch {
print("Could not start engine: \(error.localizedDescription)")
}
}
//This function will be called in view controller to generate sound
public func setWaveformTo(_ signal: #escaping Signal) {
self.signal = signal
}
}
With the above code I can hear the wave sound as normal in left and right headphone.
I tried to use audioEngine.mainMixerNode.pan for value 100 and -100 also -1.0 and 1.0 but this did not make any change.
I tried to use audioEngine.mainMixerNode.pan for value 100 and -100 but this did not make any change.
The allowable range for the pan value is {-1.0, 1.0}. The values that you say you used are outside that range, so it's not surprising that they had no effect. Try 0.75 or -0.75 instead.

Swift AudioToolbox not playing back audio file nor filling audio buffer

I am trying to play back an audio file using AudioToolbox. I wrote this and swift based on an old objective C example on the apple website. It compiles and runs, however the callback function never gets triggered once CFRunLoop starts. (It gets called during set up, but I call it manually so that doesn't count.)
My understanding of how this is supposed to work is that when this line is called:
status = AudioQueueNewOutput(&dataFormat, callback, &aqData, CFRunLoopGetCurrent(), commonModes, 0, &queue)
It is supposed to create an AudioQueue object, place it inside CFRunLoop, set the callback function called "callback", then give me back a reference to the queue object. The callback function either lives inside the AudioQueue, which lives inside CFRunLoop, or the callback function lives in CFRunLoop directly. Not sure.
When I'm done setting up I call:
status = AudioQueueStart(aqData.mQueue!, nil)
which "starts" the queue.
Then I call:
repeat {
CFRunLoopRunInMode(CFRunLoopMode.defaultMode, 0.1, false)
}
My understanding is that this is supposed to trigger the audio queue, which in turn calls my callback function. However, from this point on the callback function never gets hit. I and thinking there might be a way to inspect the audio queue, or perhaps inspect CFRunLoop. I might have made a mistake on one of the pointers somewhere.
The that result is the app plays nothing but silence.
let kNumberBuffers = 3;
var aqData = AQPlayerState()
var bufferLength : Float64 = 0.1
func playAudioFileWithToolbox(){
let bundle = Bundle.main
let permissions : AudioFilePermissions = .readPermission
let filePath = bundle.path(forResource: "dreams", ofType: "wav")!
var filePathArray = Array(filePath.utf8)
let filePathSize = filePath.count
let audioFileUrl = CFURLCreateFromFileSystemRepresentation(nil, &filePathArray, filePathSize, false)
var status = AudioFileOpenURL(audioFileUrl!, permissions, kAudioFileWAVEType, &aqData.mAudioFile)
if status != noErr{
print("ErrorOpeningAudioFileUrl")
}
var dataFormatSize:UInt32 = UInt32(MemoryLayout<AudioStreamBasicDescription>.size)
status = AudioFileGetProperty(aqData.mAudioFile!, kAudioFilePropertyDataFormat, &dataFormatSize, &aqData.mDataFormat)
if status != noErr{
print("Error getting AudioStreamBasicDescription")
}
var queue : AudioQueueRef? = aqData.mQueue
var dataFormat = aqData.mDataFormat
let commonModes = CFRunLoopMode.commonModes.rawValue
status = AudioQueueNewOutput(&dataFormat, callback, &aqData, CFRunLoopGetCurrent(), commonModes, 0, &queue)
if status == noErr{
aqData.mQueue = queue
} else {
print("TroubleSettingUpOutputQueue")
}
var maxPacketSize:UInt32 = 0;
var propertySize:UInt32 = UInt32(MemoryLayout.size(ofValue: maxPacketSize))
AudioFileGetProperty(aqData.mAudioFile!, kAudioFilePropertyPacketSizeUpperBound, &propertySize, &maxPacketSize)
var bufferByteSize = aqData.bufferByteSize
DeriveBufferSize(ASBDesc: &dataFormat, maxPacketSize: maxPacketSize, seconds: bufferLength, outBufferSize: &bufferByteSize, outNumPacketsToRead: &aqData.mNumPacketsToRead)
aqData.bufferByteSize = bufferByteSize
let isFormatVBR = aqData.mDataFormat.mBytesPerPacket == 0 || aqData.mDataFormat.mFramesPerPacket == 0
if isFormatVBR{
aqData.mPacketDescs = UnsafeMutablePointer<AudioStreamPacketDescription>.allocate(capacity: Int(aqData.mNumPacketsToRead))
} else {
aqData.mPacketDescs = nil
}
var cookieSize = UInt32(MemoryLayout.size(ofValue: UInt32.self))
let couldNotGetProperty = AudioFileGetPropertyInfo(aqData.mAudioFile!, kAudioFilePropertyMagicCookieData, &cookieSize, nil)
if couldNotGetProperty == 0 && cookieSize > 0{
var magicCookie = UnsafeMutableRawPointer.allocate(byteCount: Int(cookieSize), alignment: MemoryLayout<UInt32>.alignment)
status = AudioFileGetProperty(aqData.mAudioFile!, kAudioFilePropertyMagicCookieData, &cookieSize, &magicCookie)
if status != noErr{
print("Error:Failed to get magic cookie.")
}
AudioQueueSetProperty(aqData.mQueue!, kAudioQueueProperty_MagicCookie, magicCookie, cookieSize)
magicCookie.deallocate()
}
aqData.mCurrentPacket = 0
for i in 0..<kNumberBuffers{
var pointer = aqData.mBuffers?.advanced(by: i)
status = AudioQueueAllocateBuffer(aqData.mQueue!, aqData.bufferByteSize, &pointer)
if status != noErr{
print("Error allocating audio buffer.")
continue
}
var buffer = aqData.mBuffers![i]
callback(&aqData, aqData.mQueue!, &buffer) //I can imagine how this does anything when it is not running
}
//Set Volume
AudioQueueSetParameter(aqData.mQueue!, kAudioQueueParam_Volume, 0.5)//I have way bigger problems
//Start Playing
aqData.mIsRunning = true
status = AudioQueueStart(aqData.mQueue!, nil)
if status != noErr{
print("Error:Failed to start audio queue.")
}
repeat {
CFRunLoopRunInMode(CFRunLoopMode.defaultMode, 0.1, false)
} while aqData.mIsRunning
CFRunLoopRunInMode(CFRunLoopMode.defaultMode, 1, false)
}
private let callback: AudioQueueOutputCallback = { userData, inAQ, inBuffer in
var aqData = userData!.load(as: AQPlayerState.self) // 255
if !aqData.mIsRunning{ return } // 2
var numBytesReadFromFile : UInt32 = 0
var numPackets = aqData.mNumPacketsToRead
AudioFileReadPacketData(aqData.mAudioFile!, false, &numBytesReadFromFile, aqData.mPacketDescs, aqData.mCurrentPacket, &numPackets, inBuffer)
if (numPackets > 0) {
inBuffer.pointee.mAudioDataByteSize = numBytesReadFromFile
let packetCount = aqData.mPacketDescs!.pointee.mVariableFramesInPacket
AudioQueueEnqueueBuffer (
aqData.mQueue!,
inBuffer,
packetCount,
aqData.mPacketDescs
);
aqData.mCurrentPacket += Int64(numPackets)
} else {
AudioQueueStop (aqData.mQueue!,false)
aqData.mIsRunning = false
}
}
func DeriveBufferSize (ASBDesc: inout AudioStreamBasicDescription, maxPacketSize:UInt32, seconds:Float64, outBufferSize: inout UInt32,outNumPacketsToRead: inout UInt32) {
let maxBufferSize = 0x50000
let minBufferSize = 0x4000
if ASBDesc.mFramesPerPacket != 0 {
let numPacketsForTime = ASBDesc.mSampleRate / Float64(ASBDesc.mFramesPerPacket) * seconds
outBufferSize = UInt32(numPacketsForTime) * maxPacketSize
} else { // 9
outBufferSize = max(UInt32(maxBufferSize), maxPacketSize)
}
if outBufferSize > maxBufferSize && outBufferSize > maxPacketSize{ //10
outBufferSize = UInt32(maxBufferSize)
} else if outBufferSize < minBufferSize {
outBufferSize = UInt32(minBufferSize)
}
outNumPacketsToRead = outBufferSize / UInt32(maxPacketSize) // 12
}
}
struct AQPlayerState{
var mDataFormat:AudioStreamBasicDescription = AudioStreamBasicDescription()
var mQueue:AudioQueueRef?
var mBuffers:AudioQueueBufferRef? = UnsafeMutablePointer<AudioQueueBuffer>.allocate(capacity: 3)
var mAudioFile: AudioFileID?
var bufferByteSize:UInt32 = 0
var mCurrentPacket:Int64 = 0
var mNumPacketsToRead:UInt32 = 0
var mPacketDescs : UnsafeMutablePointer<AudioStreamPacketDescription>?
var mIsRunning : Bool = false
init(){
}
}

Extract meter levels from audio file

I need to extract audio meter levels from a file so I can render the levels before playing the audio. I know AVAudioPlayer can get this information while playing the audio file through
func averagePower(forChannel channelNumber: Int) -> Float.
But in my case I would like to obtain an [Float] of meter levels beforehand.
Swift 4
It takes on an iPhone:
0.538s to process an 8MByte mp3 player with a 4min47s duration, and 44,100 sampling rate
0.170s to process an 712KByte mp3 player with a 22s duration, and 44,100 sampling rate
0.089s to process caffile created by converting the file above using this command afconvert -f caff -d LEI16 audio.mp3 audio.caf in the terminal.
Let's begin:
A) Declare this class that is going to hold the necessary information about the audio asset:
/// Holds audio information used for building waveforms
final class AudioContext {
/// The audio asset URL used to load the context
public let audioURL: URL
/// Total number of samples in loaded asset
public let totalSamples: Int
/// Loaded asset
public let asset: AVAsset
// Loaded assetTrack
public let assetTrack: AVAssetTrack
private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
self.audioURL = audioURL
self.totalSamples = totalSamples
self.asset = asset
self.assetTrack = assetTrack
}
public static func load(fromAudioURL audioURL: URL, completionHandler: #escaping (_ audioContext: AudioContext?) -> ()) {
let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])
guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
fatalError("Couldn't load AVAssetTrack")
}
asset.loadValuesAsynchronously(forKeys: ["duration"]) {
var error: NSError?
let status = asset.statusOfValue(forKey: "duration", error: &error)
switch status {
case .loaded:
guard
let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
let audioFormatDesc = formatDescriptions.first,
let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
else { break }
let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
completionHandler(audioContext)
return
case .failed, .cancelled, .loading, .unknown:
print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
}
completionHandler(nil)
}
}
}
We are going to use its asynchronous function load, and handle its result to a completion handler.
B) Import AVFoundation and Accelerate in your view controller:
import AVFoundation
import Accelerate
C) Declare the noise level in your view controller (in dB):
let noiseFloor: Float = -80
For example, anything less than -80dB will be considered as silence.
D) The following function takes an audio context and produces the desired dB powers. targetSamples is by default set to 100, you can change that to suit your UI needs:
func render(audioContext: AudioContext?, targetSamples: Int = 100) -> [Float]{
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples
guard let reader = try? AVAssetReader(asset: audioContext.asset)
else {
fatalError("Couldn't initialize the AVAssetReader")
}
reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))
let outputSettingsDict: [String : Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsBigEndianKey: false,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsNonInterleaved: false
]
let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
outputSettings: outputSettingsDict)
readerOutput.alwaysCopiesSampleData = false
reader.add(readerOutput)
var channelCount = 1
let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription]
for item in formatDescriptions {
guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else {
fatalError("Couldn't get the format description")
}
channelCount = Int(fmtDesc.pointee.mChannelsPerFrame)
}
let samplesPerPixel = max(1, channelCount * sampleRange.count / targetSamples)
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
var outputSamples = [Float]()
var sampleBuffer = Data()
// 16-bit samples
reader.startReading()
defer { reader.cancelReading() }
while reader.status == .reading {
guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
break
}
// Append audio sample buffer into our current sample buffer
var readBufferLength = 0
var readBufferPointer: UnsafeMutablePointer<Int8>?
CMBlockBufferGetDataPointer(readBuffer, 0, &readBufferLength, nil, &readBufferPointer)
sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
CMSampleBufferInvalidate(readSampleBuffer)
let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
let downSampledLength = totalSamples / samplesPerPixel
let samplesToProcess = downSampledLength * samplesPerPixel
guard samplesToProcess > 0 else { continue }
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// Process the remaining samples at the end which didn't fit into samplesPerPixel
let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size
if samplesToProcess > 0 {
let downSampledLength = 1
let samplesPerPixel = samplesToProcess
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
guard reader.status == .completed else {
fatalError("Couldn't read the audio file")
}
return outputSamples
}
E) render uses this function to down-sample the data from the audio file, and convert to decibels:
func processSamples(fromData sampleBuffer: inout Data,
outputSamples: inout [Float],
samplesToProcess: Int,
downSampledLength: Int,
samplesPerPixel: Int,
filter: [Float]) {
sampleBuffer.withUnsafeBytes { (samples: UnsafePointer<Int16>) in
var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess)
let sampleCount = vDSP_Length(samplesToProcess)
//Convert 16bit int samples to floats
vDSP_vflt16(samples, 1, &processingBuffer, 1, sampleCount)
//Take the absolute values to get amplitude
vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount)
//get the corresponding dB, and clip the results
getdB(from: &processingBuffer)
//Downsample and average
var downSampledData = [Float](repeating: 0.0, count: downSampledLength)
vDSP_desamp(processingBuffer,
vDSP_Stride(samplesPerPixel),
filter, &downSampledData,
vDSP_Length(downSampledLength),
vDSP_Length(samplesPerPixel))
//Remove processed samples
sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size)
outputSamples += downSampledData
}
}
F) Which in turn calls this function that gets the corresponding dB, and clips the results to [noiseFloor, 0]:
func getdB(from normalizedSamples: inout [Float]) {
// Convert samples to a log scale
var zero: Float = 32768.0
vDSP_vdbcon(normalizedSamples, 1, &zero, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count), 1)
//Clip to [noiseFloor, 0]
var ceil: Float = 0.0
var noiseFloorMutable = noiseFloor
vDSP_vclip(normalizedSamples, 1, &noiseFloorMutable, &ceil, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count))
}
G) Finally you can get the waveform of the audio like so:
guard let path = Bundle.main.path(forResource: "audio", ofType:"mp3") else {
fatalError("Couldn't find the file path")
}
let url = URL(fileURLWithPath: path)
var outputArray : [Float] = []
AudioContext.load(fromAudioURL: url, completionHandler: { audioContext in
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
outputArray = self.render(audioContext: audioContext, targetSamples: 300)
})
Don't forget that AudioContext.load(fromAudioURL:) is asynchronous.
This solution is synthesized from this repo by William Entriken. All credit goes to him.
Swift 5
Here is the same code updated to Swift 5 syntax:
import AVFoundation
import Accelerate
/// Holds audio information used for building waveforms
final class AudioContext {
/// The audio asset URL used to load the context
public let audioURL: URL
/// Total number of samples in loaded asset
public let totalSamples: Int
/// Loaded asset
public let asset: AVAsset
// Loaded assetTrack
public let assetTrack: AVAssetTrack
private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
self.audioURL = audioURL
self.totalSamples = totalSamples
self.asset = asset
self.assetTrack = assetTrack
}
public static func load(fromAudioURL audioURL: URL, completionHandler: #escaping (_ audioContext: AudioContext?) -> ()) {
let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])
guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
fatalError("Couldn't load AVAssetTrack")
}
asset.loadValuesAsynchronously(forKeys: ["duration"]) {
var error: NSError?
let status = asset.statusOfValue(forKey: "duration", error: &error)
switch status {
case .loaded:
guard
let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
let audioFormatDesc = formatDescriptions.first,
let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
else { break }
let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
completionHandler(audioContext)
return
case .failed, .cancelled, .loading, .unknown:
print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
}
completionHandler(nil)
}
}
}
let noiseFloor: Float = -80
func render(audioContext: AudioContext?, targetSamples: Int = 100) -> [Float]{
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples
guard let reader = try? AVAssetReader(asset: audioContext.asset)
else {
fatalError("Couldn't initialize the AVAssetReader")
}
reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))
let outputSettingsDict: [String : Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsBigEndianKey: false,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsNonInterleaved: false
]
let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
outputSettings: outputSettingsDict)
readerOutput.alwaysCopiesSampleData = false
reader.add(readerOutput)
var channelCount = 1
let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription]
for item in formatDescriptions {
guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else {
fatalError("Couldn't get the format description")
}
channelCount = Int(fmtDesc.pointee.mChannelsPerFrame)
}
let samplesPerPixel = max(1, channelCount * sampleRange.count / targetSamples)
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
var outputSamples = [Float]()
var sampleBuffer = Data()
// 16-bit samples
reader.startReading()
defer { reader.cancelReading() }
while reader.status == .reading {
guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
break
}
// Append audio sample buffer into our current sample buffer
var readBufferLength = 0
var readBufferPointer: UnsafeMutablePointer<Int8>?
CMBlockBufferGetDataPointer(readBuffer,
atOffset: 0,
lengthAtOffsetOut: &readBufferLength,
totalLengthOut: nil,
dataPointerOut: &readBufferPointer)
sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
CMSampleBufferInvalidate(readSampleBuffer)
let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
let downSampledLength = totalSamples / samplesPerPixel
let samplesToProcess = downSampledLength * samplesPerPixel
guard samplesToProcess > 0 else { continue }
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// Process the remaining samples at the end which didn't fit into samplesPerPixel
let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size
if samplesToProcess > 0 {
let downSampledLength = 1
let samplesPerPixel = samplesToProcess
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
guard reader.status == .completed else {
fatalError("Couldn't read the audio file")
}
return outputSamples
}
func processSamples(fromData sampleBuffer: inout Data,
outputSamples: inout [Float],
samplesToProcess: Int,
downSampledLength: Int,
samplesPerPixel: Int,
filter: [Float]) {
sampleBuffer.withUnsafeBytes { (samples: UnsafeRawBufferPointer) in
var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess)
let sampleCount = vDSP_Length(samplesToProcess)
//Create an UnsafePointer<Int16> from samples
let unsafeBufferPointer = samples.bindMemory(to: Int16.self)
let unsafePointer = unsafeBufferPointer.baseAddress!
//Convert 16bit int samples to floats
vDSP_vflt16(unsafePointer, 1, &processingBuffer, 1, sampleCount)
//Take the absolute values to get amplitude
vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount)
//get the corresponding dB, and clip the results
getdB(from: &processingBuffer)
//Downsample and average
var downSampledData = [Float](repeating: 0.0, count: downSampledLength)
vDSP_desamp(processingBuffer,
vDSP_Stride(samplesPerPixel),
filter, &downSampledData,
vDSP_Length(downSampledLength),
vDSP_Length(samplesPerPixel))
//Remove processed samples
sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size)
outputSamples += downSampledData
}
}
func getdB(from normalizedSamples: inout [Float]) {
// Convert samples to a log scale
var zero: Float = 32768.0
vDSP_vdbcon(normalizedSamples, 1, &zero, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count), 1)
//Clip to [noiseFloor, 0]
var ceil: Float = 0.0
var noiseFloorMutable = noiseFloor
vDSP_vclip(normalizedSamples, 1, &noiseFloorMutable, &ceil, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count))
}
Old solution
Here is a function you could use to pre-render the meter levels of an audio file without playing it:
func averagePowers(audioFileURL: URL, forChannel channelNumber: Int, completionHandler: #escaping(_ success: [Float]) -> ()) {
let audioFile = try! AVAudioFile(forReading: audioFileURL)
let audioFilePFormat = audioFile.processingFormat
let audioFileLength = audioFile.length
//Set the size of frames to read from the audio file, you can adjust this to your liking
let frameSizeToRead = Int(audioFilePFormat.sampleRate/20)
//This is to how many frames/portions we're going to divide the audio file
let numberOfFrames = Int(audioFileLength)/frameSizeToRead
//Create a pcm buffer the size of a frame
guard let audioBuffer = AVAudioPCMBuffer(pcmFormat: audioFilePFormat, frameCapacity: AVAudioFrameCount(frameSizeToRead)) else {
fatalError("Couldn't create the audio buffer")
}
//Do the calculations in a background thread, if you don't want to block the main thread for larger audio files
DispatchQueue.global(qos: .userInitiated).async {
//This is the array to be returned
var returnArray : [Float] = [Float]()
//We're going to read the audio file, frame by frame
for i in 0..<numberOfFrames {
//Change the position from which we are reading the audio file, since each frame starts from a different position in the audio file
audioFile.framePosition = AVAudioFramePosition(i * frameSizeToRead)
//Read the frame from the audio file
try! audioFile.read(into: audioBuffer, frameCount: AVAudioFrameCount(frameSizeToRead))
//Get the data from the chosen channel
let channelData = audioBuffer.floatChannelData![channelNumber]
//This is the array of floats
let arr = Array(UnsafeBufferPointer(start:channelData, count: frameSizeToRead))
//Calculate the mean value of the absolute values
let meanValue = arr.reduce(0, {$0 + abs($1)})/Float(arr.count)
//Calculate the dB power (You can adjust this), if average is less than 0.000_000_01 we limit it to -160.0
let dbPower: Float = meanValue > 0.000_000_01 ? 20 * log10(meanValue) : -160.0
//append the db power in the current frame to the returnArray
returnArray.append(dbPower)
}
//Return the dBPowers
completionHandler(returnArray)
}
}
And you can call it like so:
let path = Bundle.main.path(forResource: "audio.mp3", ofType:nil)!
let url = URL(fileURLWithPath: path)
averagePowers(audioFileURL: url, forChannel: 0, completionHandler: { array in
//Use the array
})
Using instruments, this solution makes high cpu usage during 1.2 seconds, takes about 5 seconds to return to the main thread with the returnArray, and up to 10 seconds when on low battery mode.
First of all, this is heavy operation, so it will take some OS time and resources to accomplish this. In below example I will use standard frame rates and sampling, but you should really sample far far less if you for example only want to display bars as an indications
OK so you don't need to play sound to analyze it. So in this i will not use AVAudioPlayer at all I assume that I will take track as URL:
let path = Bundle.main.path(forResource: "example3.mp3", ofType:nil)!
let url = URL(fileURLWithPath: path)
Then I will use AVAudioFile to get track information into AVAudioPCMBuffer. Whenever you have it in buffer you have all information regarding your track:
func buffer(url: URL) {
do {
let track = try AVAudioFile(forReading: url)
let format = AVAudioFormat(commonFormat:.pcmFormatFloat32, sampleRate:track.fileFormat.sampleRate, channels: track.fileFormat.channelCount, interleaved: false)
let buffer = AVAudioPCMBuffer(pcmFormat: format!, frameCapacity: UInt32(track.length))!
try track.read(into : buffer, frameCount:UInt32(track.length))
self.analyze(buffer: buffer)
} catch {
print(error)
}
}
As you may notice there is analyze method for it. You should have close to floatChannelData variable in your buffer. It's a plain data so you'll need to parse it. I will post a method and below explain this:
func analyze(buffer: AVAudioPCMBuffer) {
let channelCount = Int(buffer.format.channelCount)
let frameLength = Int(buffer.frameLength)
var result = Array(repeating: [Float](repeatElement(0, count: frameLength)), count: channelCount)
for channel in 0..<channelCount {
for sampleIndex in 0..<frameLength {
let sqrtV = sqrt(buffer.floatChannelData![channel][sampleIndex*buffer.stride]/Float(buffer.frameLength))
let dbPower = 20 * log10(sqrtV)
result[channel][sampleIndex] = dbPower
}
}
}
There are some calculations (heavy one) involved in it. When I was working on similar solutions couple of moths ago I came across this tutorial: https://www.raywenderlich.com/5154-avaudioengine-tutorial-for-ios-getting-started there is excelent explanation of this calculation there and also parts of the code that I pasted above and also use in my project, so I want to credit author here: Scott McAlister đź‘Ź
Based on #Jakub's answer above, here's an Objective-C version.
If you want to increase the accuracy, change the deciblesCount variable, but beware of performance hit. If you want to return more bars, you can increase the divisions variable when you call the function (with no additional performance hit). You should probably put it on a background thread in any case.
A 3:36 minute / 5.2MB song takes about 1.2s. The above images are of a shotgun firing with 30 and 100 divisions respectively
-(NSArray *)returnWaveArrayForFile:(NSString *)filepath numberOfDivisions:(int)divisions{
//pull file
NSError * error;
NSURL * url = [NSURL URLWithString:filepath];
AVAudioFile * file = [[AVAudioFile alloc] initForReading:url error:&error];
//create av stuff
AVAudioFormat * format = [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32 sampleRate:file.fileFormat.sampleRate channels:file.fileFormat.channelCount interleaved:false];
AVAudioPCMBuffer * buffer = [[AVAudioPCMBuffer alloc] initWithPCMFormat:format frameCapacity:(int)file.length];
[file readIntoBuffer:buffer frameCount:(int)file.length error:&error];
//grab total number of decibles, 1000 seems to work
int deciblesCount = MIN(1000,buffer.frameLength);
NSMutableArray * channels = [NSMutableArray new];
float frameIncrement = buffer.frameLength / (float)deciblesCount;
//needed later
float maxDecible = 0;
float minDecible = HUGE_VALF;
NSMutableArray * sd = [NSMutableArray new]; //used for standard deviation
for (int n = 0; n < MIN(buffer.format.channelCount, 2); n++){ //go through channels
NSMutableArray * decibles = [NSMutableArray new]; //holds actual decible values
//go through pulling the decibles
for (int i = 0; i < deciblesCount; i++){
int offset = frameIncrement * i; //grab offset
//equation from stack, no idea the maths
float sqr = sqrtf(buffer.floatChannelData[n][offset * buffer.stride]/(float)buffer.frameLength);
float decible = 20 * log10f(sqr);
decible += 160; //make positive
decible = (isnan(decible) || decible < 0) ? 0 : decible; //if it's not a number or silent, make it zero
if (decible > 0){ //if it has volume
[sd addObject:#(decible)];
}
[decibles addObject:#(decible)];//add to decibles array
maxDecible = MAX(maxDecible, decible); //grab biggest
minDecible = MIN(minDecible, decible); //grab smallest
}
[channels addObject:decibles]; //add to channels array
}
//find standard deviation and then deducted the bottom slag
NSExpression * expression = [NSExpression expressionForFunction:#"stddev:" arguments:#[[NSExpression expressionForConstantValue:sd]]];
float standardDeviation = [[expression expressionValueWithObject:nil context:nil] floatValue];
float deviationDeduct = standardDeviation / (standardDeviation + (maxDecible - minDecible));
//go through calculating deviation percentage
NSMutableArray * deviations = [NSMutableArray new];
NSMutableArray * returning = [NSMutableArray new];
for (int c = 0; c < (int)channels.count; c++){
NSArray * channel = channels[c];
for (int n = 0; n < (int)channel.count; n++){
float decible = [channel[n] floatValue];
float remainder = (maxDecible - decible);
float deviation = standardDeviation / (standardDeviation + remainder) - deviationDeduct;
[deviations addObject:#(deviation)];
}
//go through creating percentage
float maxTotal = 0;
int catchCount = floorf(deciblesCount / divisions); //total decible values within a segment or division
NSMutableArray * totals = [NSMutableArray new];
for (int n = 0; n < divisions; n++){
float total = 0.0f;
for (int k = 0; k < catchCount; k++){ //go through each segment
int index = n * catchCount + k; //create the index
float deviation = [deviations[index] floatValue]; //grab value
total += deviation; //add to total
}
//max out maxTotal var -> used later to calc percentages
maxTotal = MAX(maxTotal, total);
[totals addObject:#(total)]; //add to totals array
}
//normalise percentages and return
NSMutableArray * percentages = [NSMutableArray new];
for (int n = 0; n < divisions; n++){
float total = [totals[n] floatValue]; //grab the total value for that segment
float percentage = total / maxTotal; //divide by the biggest value -> making it a percentage
[percentages addObject:#(percentage)]; //add to the array
}
//add to the returning array
[returning addObject:percentages];
}
//return channel data -> array of two arrays of percentages
return (NSArray *)returning;
}
Call like this:
int divisions = 30; //number of segments you want for your display
NSString * path = [[NSBundle mainBundle] pathForResource:#"satie" ofType:#"mp3"];
NSArray * channels = [_audioReader returnWaveArrayForFile:path numberOfDivisions:divisions];
You get the two channels back in that array, which you can use to update your UI. Values in each array are between 0 and 1 which you can use to build your bars.

Split AKAudioFile into chunks separated by silence

Given a single AKAudioFile that has been created from an AKNodeRecorder containing a series of spoken words, where each word is separated by at least 1 second, what is the best approach to ultimately create a series of files with each file containing one word?
I believe this can be accomplished if there is a way to iterate the file in, for example, 100 ms chunks, and measure the average amplitude of each chunk. "Silent chunks" could be those below some arbitrarily small amplitude. While iterating, if I encounter a chunk with non-silent amplitude, I can grab the starting timestamp of this "non-silent" chunk to create an audio file that starts here and ends at the start time of the next "silent" chunk.
Whether it'd be using a manual approach like the one above or a more built-in processing technique to AudioKit, any suggestions would be greatly appreciated.
I don't have a complete solution, but I've started working on something similar to this. This function could serve as a jumping off point for what you need. Basically you want to read the file into a buffer then analyze the buffer data. At that point you could dice it up into smaller buffers and write those to file.
public class func guessBoundaries(url: URL, sensitivity: Double = 1) -> [Double]? {
var out: [Double] = []
guard let audioFile = try? AVAudioFile(forReading: url) else { return nil }
let processingFormat = audioFile.processingFormat
let frameCount = AVAudioFrameCount(audioFile.length)
guard let pcmBuffer = AVAudioPCMBuffer(pcmFormat: processingFormat, frameCapacity: frameCount) else { return nil }
audioFile.framePosition = 0
do {
audioFile.framePosition = 0
try audioFile.read(into: pcmBuffer, frameCount: frameCount)
} catch let err as NSError {
AKLog("ERROR: Couldn't read data into buffer. \(err)")
return nil
}
let channelCount = Int(pcmBuffer.format.channelCount)
let bufferLength = 1024
let inThreshold: Double = 0.001 / sensitivity
let outThreshold: Double = 0.0001 * sensitivity
let minSegmentDuration: Double = 1
var counter = 0
var thresholdCrossed = false
var rmsBuffer = [Float](repeating: 0, count: bufferLength)
var lastTime: Double = 0
AKLog("inThreshold", inThreshold, "outThreshold", outThreshold)
for i in 0 ..< Int(pcmBuffer.frameLength) {
// n is the channel
for n in 0 ..< channelCount {
guard let sample: Float = pcmBuffer.floatChannelData?[n][i] else { continue }
if counter == rmsBuffer.count {
let time: Double = Double(i) / processingFormat.sampleRate
let avg = rmsBuffer.reduce(0, +) / rmsBuffer.count
// AKLog("Average Value at frame \(i):", avg)
if avg > inThreshold && !thresholdCrossed && time - lastTime > minSegmentDuration {
thresholdCrossed = true
out.append(time)
lastTime = time
} else if avg <= outThreshold && thresholdCrossed && time - lastTime > minSegmentDuration {
thresholdCrossed = false
out.append(time)
lastTime = time
}
counter = 0
}
rmsBuffer[counter] = abs(sample)
counter += 1
}
}
rmsBuffer.removeAll()
return out
}

Audio Queue Services Player in Swift isn't calling callback

I've been playing around with Audio Queue Services for about a week and I've written a swift version of from the Apple Audio Queue Services Guide.
I'm recording in Linear PCM and saving to disk with this method:
AudioFileCreateWithURL(url, kAudioFileWAVEType, &format,
AudioFileFlags.dontPageAlignAudioData.union(.eraseFile), &audioFileID)
My AudioQueueOutputCallback isn't being called even though I can verify that my bufferSize is seemingly large enough and that it's getting passed actual data. I'm not getting any OSStatus errors and it seems like everything should work. Theres very little in the way of Swift written AudioServiceQueues and should I get this working I'd be happy to open the rest of my code.
Any and all suggestions welcome!
class SVNPlayer: SVNPlayback {
var state: PlayerState!
private let callback: AudioQueueOutputCallback = { aqData, inAQ, inBuffer in
guard let userData = aqData else { return }
let audioPlayer = Unmanaged<SVNPlayer>.fromOpaque(userData).takeUnretainedValue()
guard audioPlayer.state.isRunning,
let queue = audioPlayer.state.mQueue else { return }
var buffer = inBuffer.pointee // dereference pointers
var numBytesReadFromFile: UInt32 = 0
var numPackets = audioPlayer.state.mNumPacketsToRead
var mPacketDescIsNil = audioPlayer.state.mPacketDesc == nil // determine if the packetDesc
if mPacketDescIsNil {
audioPlayer.state.mPacketDesc = AudioStreamPacketDescription(mStartOffset: 0, mVariableFramesInPacket: 0, mDataByteSize: 0)
}
AudioFileReadPacketData(audioPlayer.state.mAudioFile, false, &numBytesReadFromFile, // read the packet at the saved file
&audioPlayer.state.mPacketDesc!, audioPlayer.state.mCurrentPacket,
&numPackets, buffer.mAudioData)
if numPackets > 0 {
buffer.mAudioDataByteSize = numBytesReadFromFile
AudioQueueEnqueueBuffer(queue, inBuffer, mPacketDescIsNil ? numPackets : 0,
&audioPlayer.state.mPacketDesc!)
audioPlayer.state.mCurrentPacket += Int64(numPackets)
} else {
AudioQueueStop(queue, false)
audioPlayer.state.isRunning = false
}
}
init(inputPath: String, audioFormat: AudioStreamBasicDescription, numberOfBuffers: Int) throws {
super.init()
var format = audioFormat
let pointer = UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque()) // get an unmananged reference to self
guard let audioFileUrl = CFURLCreateFromFileSystemRepresentation(nil,
inputPath,
CFIndex(strlen(inputPath)), false) else {
throw MixerError.playerInputPath }
var audioFileID: AudioFileID?
try osStatus { AudioFileOpenURL(audioFileUrl, AudioFilePermissions.readPermission, 0, &audioFileID) }
guard audioFileID != nil else { throw MixerError.playerInputPath }
state = PlayerState(mDataFormat: audioFormat, // setup the player state with mostly initial values
mQueue: nil,
mAudioFile: audioFileID!,
bufferByteSize: 0,
mCurrentPacket: 0,
mNumPacketsToRead: 0,
isRunning: false,
mPacketDesc: nil,
onError: nil)
var dataFormatSize = UInt32(MemoryLayout<AudioStreamBasicDescription>.stride)
try osStatus { AudioFileGetProperty(audioFileID!, kAudioFilePropertyDataFormat, &dataFormatSize, &state.mDataFormat) }
var queue: AudioQueueRef?
try osStatus { AudioQueueNewOutput(&format, callback, pointer, CFRunLoopGetCurrent(), CFRunLoopMode.commonModes.rawValue, 0, &queue) } // setup output queue
guard queue != nil else { throw MixerError.playerOutputQueue }
state.mQueue = queue // add to playerState
var maxPacketSize = UInt32()
var propertySize = UInt32(MemoryLayout<UInt32>.stride)
try osStatus { AudioFileGetProperty(state.mAudioFile, kAudioFilePropertyPacketSizeUpperBound, &propertySize, &maxPacketSize) }
deriveBufferSize(maxPacketSize: maxPacketSize, seconds: 0.5, outBufferSize: &state.bufferByteSize, outNumPacketsToRead: &state.mNumPacketsToRead)
let isFormatVBR = state.mDataFormat.mBytesPerPacket == 0 || state.mDataFormat.mFramesPerPacket == 0
if isFormatVBR { //Allocating Memory for a Packet Descriptions Array
let size = UInt32(MemoryLayout<AudioStreamPacketDescription>.stride)
state.mPacketDesc = AudioStreamPacketDescription(mStartOffset: 0,
mVariableFramesInPacket: state.mNumPacketsToRead,
mDataByteSize: size)
} // if CBR it stays set to null
for _ in 0..<numberOfBuffers { // Allocate and Prime Audio Queue Buffers
let bufferRef = UnsafeMutablePointer<AudioQueueBufferRef?>.allocate(capacity: 1)
let foo = state.mDataFormat.mBytesPerPacket * 1024 / UInt32(numberOfBuffers)
try osStatus { AudioQueueAllocateBuffer(state.mQueue!, foo, bufferRef) } // allocate the buffer
if let buffer = bufferRef.pointee {
AudioQueueEnqueueBuffer(state.mQueue!, buffer, 0, nil)
}
}
let gain: Float32 = 1.0 // Set an Audio Queue’s Playback Gain
try osStatus { AudioQueueSetParameter(state.mQueue!, kAudioQueueParam_Volume, gain) }
}
func start() throws {
state.isRunning = true // Start and Run an Audio Queue
try osStatus { AudioQueueStart(state.mQueue!, nil) }
while state.isRunning {
CFRunLoopRunInMode(CFRunLoopMode.defaultMode, 0.25, false)
}
CFRunLoopRunInMode(CFRunLoopMode.defaultMode, 1.0, false)
state.isRunning = false
}
func stop() throws {
guard state.isRunning,
let queue = state.mQueue else { return }
try osStatus { AudioQueueStop(queue, true) }
try osStatus { AudioQueueDispose(queue, true) }
try osStatus { AudioFileClose(state.mAudioFile) }
state.isRunning = false
}
private func deriveBufferSize(maxPacketSize: UInt32, seconds: Float64, outBufferSize: inout UInt32, outNumPacketsToRead: inout UInt32){
let maxBufferSize = UInt32(0x50000)
let minBufferSize = UInt32(0x4000)
if state.mDataFormat.mFramesPerPacket != 0 {
let numPacketsForTime: Float64 = state.mDataFormat.mSampleRate / Float64(state.mDataFormat.mFramesPerPacket) * seconds
outBufferSize = UInt32(numPacketsForTime) * maxPacketSize
} else {
outBufferSize = maxBufferSize > maxPacketSize ? maxBufferSize : maxPacketSize
}
if outBufferSize > maxBufferSize && outBufferSize > maxPacketSize {
outBufferSize = maxBufferSize
} else if outBufferSize < minBufferSize {
outBufferSize = minBufferSize
}
outNumPacketsToRead = outBufferSize / maxPacketSize
}
}
My player state struct is :
struct PlayerState: PlaybackState {
var mDataFormat: AudioStreamBasicDescription
var mQueue: AudioQueueRef?
var mAudioFile: AudioFileID
var bufferByteSize: UInt32
var mCurrentPacket: Int64
var mNumPacketsToRead: UInt32
var isRunning: Bool
var mPacketDesc: AudioStreamPacketDescription?
var onError: ((Error) -> Void)?
}
Instead of enqueuing an empty buffer, try calling your callback so it enqueues a (hopefully) full buffer. I'm unsure about the runloop stuff, but I'm sure you know what you're doing.

Resources