I need to extract audio meter levels from a file so I can render the levels before playing the audio. I know AVAudioPlayer can get this information while playing the audio file through
func averagePower(forChannel channelNumber: Int) -> Float.
But in my case I would like to obtain an [Float] of meter levels beforehand.
Swift 4
It takes on an iPhone:
0.538s to process an 8MByte mp3 player with a 4min47s duration, and 44,100 sampling rate
0.170s to process an 712KByte mp3 player with a 22s duration, and 44,100 sampling rate
0.089s to process caffile created by converting the file above using this command afconvert -f caff -d LEI16 audio.mp3 audio.caf in the terminal.
Let's begin:
A) Declare this class that is going to hold the necessary information about the audio asset:
/// Holds audio information used for building waveforms
final class AudioContext {
/// The audio asset URL used to load the context
public let audioURL: URL
/// Total number of samples in loaded asset
public let totalSamples: Int
/// Loaded asset
public let asset: AVAsset
// Loaded assetTrack
public let assetTrack: AVAssetTrack
private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
self.audioURL = audioURL
self.totalSamples = totalSamples
self.asset = asset
self.assetTrack = assetTrack
}
public static func load(fromAudioURL audioURL: URL, completionHandler: #escaping (_ audioContext: AudioContext?) -> ()) {
let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])
guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
fatalError("Couldn't load AVAssetTrack")
}
asset.loadValuesAsynchronously(forKeys: ["duration"]) {
var error: NSError?
let status = asset.statusOfValue(forKey: "duration", error: &error)
switch status {
case .loaded:
guard
let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
let audioFormatDesc = formatDescriptions.first,
let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
else { break }
let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
completionHandler(audioContext)
return
case .failed, .cancelled, .loading, .unknown:
print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
}
completionHandler(nil)
}
}
}
We are going to use its asynchronous function load, and handle its result to a completion handler.
B) Import AVFoundation and Accelerate in your view controller:
import AVFoundation
import Accelerate
C) Declare the noise level in your view controller (in dB):
let noiseFloor: Float = -80
For example, anything less than -80dB will be considered as silence.
D) The following function takes an audio context and produces the desired dB powers. targetSamples is by default set to 100, you can change that to suit your UI needs:
func render(audioContext: AudioContext?, targetSamples: Int = 100) -> [Float]{
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples
guard let reader = try? AVAssetReader(asset: audioContext.asset)
else {
fatalError("Couldn't initialize the AVAssetReader")
}
reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))
let outputSettingsDict: [String : Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsBigEndianKey: false,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsNonInterleaved: false
]
let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
outputSettings: outputSettingsDict)
readerOutput.alwaysCopiesSampleData = false
reader.add(readerOutput)
var channelCount = 1
let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription]
for item in formatDescriptions {
guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else {
fatalError("Couldn't get the format description")
}
channelCount = Int(fmtDesc.pointee.mChannelsPerFrame)
}
let samplesPerPixel = max(1, channelCount * sampleRange.count / targetSamples)
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
var outputSamples = [Float]()
var sampleBuffer = Data()
// 16-bit samples
reader.startReading()
defer { reader.cancelReading() }
while reader.status == .reading {
guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
break
}
// Append audio sample buffer into our current sample buffer
var readBufferLength = 0
var readBufferPointer: UnsafeMutablePointer<Int8>?
CMBlockBufferGetDataPointer(readBuffer, 0, &readBufferLength, nil, &readBufferPointer)
sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
CMSampleBufferInvalidate(readSampleBuffer)
let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
let downSampledLength = totalSamples / samplesPerPixel
let samplesToProcess = downSampledLength * samplesPerPixel
guard samplesToProcess > 0 else { continue }
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// Process the remaining samples at the end which didn't fit into samplesPerPixel
let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size
if samplesToProcess > 0 {
let downSampledLength = 1
let samplesPerPixel = samplesToProcess
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
guard reader.status == .completed else {
fatalError("Couldn't read the audio file")
}
return outputSamples
}
E) render uses this function to down-sample the data from the audio file, and convert to decibels:
func processSamples(fromData sampleBuffer: inout Data,
outputSamples: inout [Float],
samplesToProcess: Int,
downSampledLength: Int,
samplesPerPixel: Int,
filter: [Float]) {
sampleBuffer.withUnsafeBytes { (samples: UnsafePointer<Int16>) in
var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess)
let sampleCount = vDSP_Length(samplesToProcess)
//Convert 16bit int samples to floats
vDSP_vflt16(samples, 1, &processingBuffer, 1, sampleCount)
//Take the absolute values to get amplitude
vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount)
//get the corresponding dB, and clip the results
getdB(from: &processingBuffer)
//Downsample and average
var downSampledData = [Float](repeating: 0.0, count: downSampledLength)
vDSP_desamp(processingBuffer,
vDSP_Stride(samplesPerPixel),
filter, &downSampledData,
vDSP_Length(downSampledLength),
vDSP_Length(samplesPerPixel))
//Remove processed samples
sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size)
outputSamples += downSampledData
}
}
F) Which in turn calls this function that gets the corresponding dB, and clips the results to [noiseFloor, 0]:
func getdB(from normalizedSamples: inout [Float]) {
// Convert samples to a log scale
var zero: Float = 32768.0
vDSP_vdbcon(normalizedSamples, 1, &zero, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count), 1)
//Clip to [noiseFloor, 0]
var ceil: Float = 0.0
var noiseFloorMutable = noiseFloor
vDSP_vclip(normalizedSamples, 1, &noiseFloorMutable, &ceil, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count))
}
G) Finally you can get the waveform of the audio like so:
guard let path = Bundle.main.path(forResource: "audio", ofType:"mp3") else {
fatalError("Couldn't find the file path")
}
let url = URL(fileURLWithPath: path)
var outputArray : [Float] = []
AudioContext.load(fromAudioURL: url, completionHandler: { audioContext in
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
outputArray = self.render(audioContext: audioContext, targetSamples: 300)
})
Don't forget that AudioContext.load(fromAudioURL:) is asynchronous.
This solution is synthesized from this repo by William Entriken. All credit goes to him.
Swift 5
Here is the same code updated to Swift 5 syntax:
import AVFoundation
import Accelerate
/// Holds audio information used for building waveforms
final class AudioContext {
/// The audio asset URL used to load the context
public let audioURL: URL
/// Total number of samples in loaded asset
public let totalSamples: Int
/// Loaded asset
public let asset: AVAsset
// Loaded assetTrack
public let assetTrack: AVAssetTrack
private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
self.audioURL = audioURL
self.totalSamples = totalSamples
self.asset = asset
self.assetTrack = assetTrack
}
public static func load(fromAudioURL audioURL: URL, completionHandler: #escaping (_ audioContext: AudioContext?) -> ()) {
let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])
guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
fatalError("Couldn't load AVAssetTrack")
}
asset.loadValuesAsynchronously(forKeys: ["duration"]) {
var error: NSError?
let status = asset.statusOfValue(forKey: "duration", error: &error)
switch status {
case .loaded:
guard
let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
let audioFormatDesc = formatDescriptions.first,
let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
else { break }
let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
completionHandler(audioContext)
return
case .failed, .cancelled, .loading, .unknown:
print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
}
completionHandler(nil)
}
}
}
let noiseFloor: Float = -80
func render(audioContext: AudioContext?, targetSamples: Int = 100) -> [Float]{
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples
guard let reader = try? AVAssetReader(asset: audioContext.asset)
else {
fatalError("Couldn't initialize the AVAssetReader")
}
reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))
let outputSettingsDict: [String : Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsBigEndianKey: false,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsNonInterleaved: false
]
let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
outputSettings: outputSettingsDict)
readerOutput.alwaysCopiesSampleData = false
reader.add(readerOutput)
var channelCount = 1
let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription]
for item in formatDescriptions {
guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else {
fatalError("Couldn't get the format description")
}
channelCount = Int(fmtDesc.pointee.mChannelsPerFrame)
}
let samplesPerPixel = max(1, channelCount * sampleRange.count / targetSamples)
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
var outputSamples = [Float]()
var sampleBuffer = Data()
// 16-bit samples
reader.startReading()
defer { reader.cancelReading() }
while reader.status == .reading {
guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
break
}
// Append audio sample buffer into our current sample buffer
var readBufferLength = 0
var readBufferPointer: UnsafeMutablePointer<Int8>?
CMBlockBufferGetDataPointer(readBuffer,
atOffset: 0,
lengthAtOffsetOut: &readBufferLength,
totalLengthOut: nil,
dataPointerOut: &readBufferPointer)
sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
CMSampleBufferInvalidate(readSampleBuffer)
let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
let downSampledLength = totalSamples / samplesPerPixel
let samplesToProcess = downSampledLength * samplesPerPixel
guard samplesToProcess > 0 else { continue }
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// Process the remaining samples at the end which didn't fit into samplesPerPixel
let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size
if samplesToProcess > 0 {
let downSampledLength = 1
let samplesPerPixel = samplesToProcess
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
guard reader.status == .completed else {
fatalError("Couldn't read the audio file")
}
return outputSamples
}
func processSamples(fromData sampleBuffer: inout Data,
outputSamples: inout [Float],
samplesToProcess: Int,
downSampledLength: Int,
samplesPerPixel: Int,
filter: [Float]) {
sampleBuffer.withUnsafeBytes { (samples: UnsafeRawBufferPointer) in
var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess)
let sampleCount = vDSP_Length(samplesToProcess)
//Create an UnsafePointer<Int16> from samples
let unsafeBufferPointer = samples.bindMemory(to: Int16.self)
let unsafePointer = unsafeBufferPointer.baseAddress!
//Convert 16bit int samples to floats
vDSP_vflt16(unsafePointer, 1, &processingBuffer, 1, sampleCount)
//Take the absolute values to get amplitude
vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount)
//get the corresponding dB, and clip the results
getdB(from: &processingBuffer)
//Downsample and average
var downSampledData = [Float](repeating: 0.0, count: downSampledLength)
vDSP_desamp(processingBuffer,
vDSP_Stride(samplesPerPixel),
filter, &downSampledData,
vDSP_Length(downSampledLength),
vDSP_Length(samplesPerPixel))
//Remove processed samples
sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size)
outputSamples += downSampledData
}
}
func getdB(from normalizedSamples: inout [Float]) {
// Convert samples to a log scale
var zero: Float = 32768.0
vDSP_vdbcon(normalizedSamples, 1, &zero, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count), 1)
//Clip to [noiseFloor, 0]
var ceil: Float = 0.0
var noiseFloorMutable = noiseFloor
vDSP_vclip(normalizedSamples, 1, &noiseFloorMutable, &ceil, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count))
}
Old solution
Here is a function you could use to pre-render the meter levels of an audio file without playing it:
func averagePowers(audioFileURL: URL, forChannel channelNumber: Int, completionHandler: #escaping(_ success: [Float]) -> ()) {
let audioFile = try! AVAudioFile(forReading: audioFileURL)
let audioFilePFormat = audioFile.processingFormat
let audioFileLength = audioFile.length
//Set the size of frames to read from the audio file, you can adjust this to your liking
let frameSizeToRead = Int(audioFilePFormat.sampleRate/20)
//This is to how many frames/portions we're going to divide the audio file
let numberOfFrames = Int(audioFileLength)/frameSizeToRead
//Create a pcm buffer the size of a frame
guard let audioBuffer = AVAudioPCMBuffer(pcmFormat: audioFilePFormat, frameCapacity: AVAudioFrameCount(frameSizeToRead)) else {
fatalError("Couldn't create the audio buffer")
}
//Do the calculations in a background thread, if you don't want to block the main thread for larger audio files
DispatchQueue.global(qos: .userInitiated).async {
//This is the array to be returned
var returnArray : [Float] = [Float]()
//We're going to read the audio file, frame by frame
for i in 0..<numberOfFrames {
//Change the position from which we are reading the audio file, since each frame starts from a different position in the audio file
audioFile.framePosition = AVAudioFramePosition(i * frameSizeToRead)
//Read the frame from the audio file
try! audioFile.read(into: audioBuffer, frameCount: AVAudioFrameCount(frameSizeToRead))
//Get the data from the chosen channel
let channelData = audioBuffer.floatChannelData![channelNumber]
//This is the array of floats
let arr = Array(UnsafeBufferPointer(start:channelData, count: frameSizeToRead))
//Calculate the mean value of the absolute values
let meanValue = arr.reduce(0, {$0 + abs($1)})/Float(arr.count)
//Calculate the dB power (You can adjust this), if average is less than 0.000_000_01 we limit it to -160.0
let dbPower: Float = meanValue > 0.000_000_01 ? 20 * log10(meanValue) : -160.0
//append the db power in the current frame to the returnArray
returnArray.append(dbPower)
}
//Return the dBPowers
completionHandler(returnArray)
}
}
And you can call it like so:
let path = Bundle.main.path(forResource: "audio.mp3", ofType:nil)!
let url = URL(fileURLWithPath: path)
averagePowers(audioFileURL: url, forChannel: 0, completionHandler: { array in
//Use the array
})
Using instruments, this solution makes high cpu usage during 1.2 seconds, takes about 5 seconds to return to the main thread with the returnArray, and up to 10 seconds when on low battery mode.
First of all, this is heavy operation, so it will take some OS time and resources to accomplish this. In below example I will use standard frame rates and sampling, but you should really sample far far less if you for example only want to display bars as an indications
OK so you don't need to play sound to analyze it. So in this i will not use AVAudioPlayer at all I assume that I will take track as URL:
let path = Bundle.main.path(forResource: "example3.mp3", ofType:nil)!
let url = URL(fileURLWithPath: path)
Then I will use AVAudioFile to get track information into AVAudioPCMBuffer. Whenever you have it in buffer you have all information regarding your track:
func buffer(url: URL) {
do {
let track = try AVAudioFile(forReading: url)
let format = AVAudioFormat(commonFormat:.pcmFormatFloat32, sampleRate:track.fileFormat.sampleRate, channels: track.fileFormat.channelCount, interleaved: false)
let buffer = AVAudioPCMBuffer(pcmFormat: format!, frameCapacity: UInt32(track.length))!
try track.read(into : buffer, frameCount:UInt32(track.length))
self.analyze(buffer: buffer)
} catch {
print(error)
}
}
As you may notice there is analyze method for it. You should have close to floatChannelData variable in your buffer. It's a plain data so you'll need to parse it. I will post a method and below explain this:
func analyze(buffer: AVAudioPCMBuffer) {
let channelCount = Int(buffer.format.channelCount)
let frameLength = Int(buffer.frameLength)
var result = Array(repeating: [Float](repeatElement(0, count: frameLength)), count: channelCount)
for channel in 0..<channelCount {
for sampleIndex in 0..<frameLength {
let sqrtV = sqrt(buffer.floatChannelData![channel][sampleIndex*buffer.stride]/Float(buffer.frameLength))
let dbPower = 20 * log10(sqrtV)
result[channel][sampleIndex] = dbPower
}
}
}
There are some calculations (heavy one) involved in it. When I was working on similar solutions couple of moths ago I came across this tutorial: https://www.raywenderlich.com/5154-avaudioengine-tutorial-for-ios-getting-started there is excelent explanation of this calculation there and also parts of the code that I pasted above and also use in my project, so I want to credit author here: Scott McAlister 👏
Based on #Jakub's answer above, here's an Objective-C version.
If you want to increase the accuracy, change the deciblesCount variable, but beware of performance hit. If you want to return more bars, you can increase the divisions variable when you call the function (with no additional performance hit). You should probably put it on a background thread in any case.
A 3:36 minute / 5.2MB song takes about 1.2s. The above images are of a shotgun firing with 30 and 100 divisions respectively
-(NSArray *)returnWaveArrayForFile:(NSString *)filepath numberOfDivisions:(int)divisions{
//pull file
NSError * error;
NSURL * url = [NSURL URLWithString:filepath];
AVAudioFile * file = [[AVAudioFile alloc] initForReading:url error:&error];
//create av stuff
AVAudioFormat * format = [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32 sampleRate:file.fileFormat.sampleRate channels:file.fileFormat.channelCount interleaved:false];
AVAudioPCMBuffer * buffer = [[AVAudioPCMBuffer alloc] initWithPCMFormat:format frameCapacity:(int)file.length];
[file readIntoBuffer:buffer frameCount:(int)file.length error:&error];
//grab total number of decibles, 1000 seems to work
int deciblesCount = MIN(1000,buffer.frameLength);
NSMutableArray * channels = [NSMutableArray new];
float frameIncrement = buffer.frameLength / (float)deciblesCount;
//needed later
float maxDecible = 0;
float minDecible = HUGE_VALF;
NSMutableArray * sd = [NSMutableArray new]; //used for standard deviation
for (int n = 0; n < MIN(buffer.format.channelCount, 2); n++){ //go through channels
NSMutableArray * decibles = [NSMutableArray new]; //holds actual decible values
//go through pulling the decibles
for (int i = 0; i < deciblesCount; i++){
int offset = frameIncrement * i; //grab offset
//equation from stack, no idea the maths
float sqr = sqrtf(buffer.floatChannelData[n][offset * buffer.stride]/(float)buffer.frameLength);
float decible = 20 * log10f(sqr);
decible += 160; //make positive
decible = (isnan(decible) || decible < 0) ? 0 : decible; //if it's not a number or silent, make it zero
if (decible > 0){ //if it has volume
[sd addObject:#(decible)];
}
[decibles addObject:#(decible)];//add to decibles array
maxDecible = MAX(maxDecible, decible); //grab biggest
minDecible = MIN(minDecible, decible); //grab smallest
}
[channels addObject:decibles]; //add to channels array
}
//find standard deviation and then deducted the bottom slag
NSExpression * expression = [NSExpression expressionForFunction:#"stddev:" arguments:#[[NSExpression expressionForConstantValue:sd]]];
float standardDeviation = [[expression expressionValueWithObject:nil context:nil] floatValue];
float deviationDeduct = standardDeviation / (standardDeviation + (maxDecible - minDecible));
//go through calculating deviation percentage
NSMutableArray * deviations = [NSMutableArray new];
NSMutableArray * returning = [NSMutableArray new];
for (int c = 0; c < (int)channels.count; c++){
NSArray * channel = channels[c];
for (int n = 0; n < (int)channel.count; n++){
float decible = [channel[n] floatValue];
float remainder = (maxDecible - decible);
float deviation = standardDeviation / (standardDeviation + remainder) - deviationDeduct;
[deviations addObject:#(deviation)];
}
//go through creating percentage
float maxTotal = 0;
int catchCount = floorf(deciblesCount / divisions); //total decible values within a segment or division
NSMutableArray * totals = [NSMutableArray new];
for (int n = 0; n < divisions; n++){
float total = 0.0f;
for (int k = 0; k < catchCount; k++){ //go through each segment
int index = n * catchCount + k; //create the index
float deviation = [deviations[index] floatValue]; //grab value
total += deviation; //add to total
}
//max out maxTotal var -> used later to calc percentages
maxTotal = MAX(maxTotal, total);
[totals addObject:#(total)]; //add to totals array
}
//normalise percentages and return
NSMutableArray * percentages = [NSMutableArray new];
for (int n = 0; n < divisions; n++){
float total = [totals[n] floatValue]; //grab the total value for that segment
float percentage = total / maxTotal; //divide by the biggest value -> making it a percentage
[percentages addObject:#(percentage)]; //add to the array
}
//add to the returning array
[returning addObject:percentages];
}
//return channel data -> array of two arrays of percentages
return (NSArray *)returning;
}
Call like this:
int divisions = 30; //number of segments you want for your display
NSString * path = [[NSBundle mainBundle] pathForResource:#"satie" ofType:#"mp3"];
NSArray * channels = [_audioReader returnWaveArrayForFile:path numberOfDivisions:divisions];
You get the two channels back in that array, which you can use to update your UI. Values in each array are between 0 and 1 which you can use to build your bars.
Given a single AKAudioFile that has been created from an AKNodeRecorder containing a series of spoken words, where each word is separated by at least 1 second, what is the best approach to ultimately create a series of files with each file containing one word?
I believe this can be accomplished if there is a way to iterate the file in, for example, 100 ms chunks, and measure the average amplitude of each chunk. "Silent chunks" could be those below some arbitrarily small amplitude. While iterating, if I encounter a chunk with non-silent amplitude, I can grab the starting timestamp of this "non-silent" chunk to create an audio file that starts here and ends at the start time of the next "silent" chunk.
Whether it'd be using a manual approach like the one above or a more built-in processing technique to AudioKit, any suggestions would be greatly appreciated.
I don't have a complete solution, but I've started working on something similar to this. This function could serve as a jumping off point for what you need. Basically you want to read the file into a buffer then analyze the buffer data. At that point you could dice it up into smaller buffers and write those to file.
public class func guessBoundaries(url: URL, sensitivity: Double = 1) -> [Double]? {
var out: [Double] = []
guard let audioFile = try? AVAudioFile(forReading: url) else { return nil }
let processingFormat = audioFile.processingFormat
let frameCount = AVAudioFrameCount(audioFile.length)
guard let pcmBuffer = AVAudioPCMBuffer(pcmFormat: processingFormat, frameCapacity: frameCount) else { return nil }
audioFile.framePosition = 0
do {
audioFile.framePosition = 0
try audioFile.read(into: pcmBuffer, frameCount: frameCount)
} catch let err as NSError {
AKLog("ERROR: Couldn't read data into buffer. \(err)")
return nil
}
let channelCount = Int(pcmBuffer.format.channelCount)
let bufferLength = 1024
let inThreshold: Double = 0.001 / sensitivity
let outThreshold: Double = 0.0001 * sensitivity
let minSegmentDuration: Double = 1
var counter = 0
var thresholdCrossed = false
var rmsBuffer = [Float](repeating: 0, count: bufferLength)
var lastTime: Double = 0
AKLog("inThreshold", inThreshold, "outThreshold", outThreshold)
for i in 0 ..< Int(pcmBuffer.frameLength) {
// n is the channel
for n in 0 ..< channelCount {
guard let sample: Float = pcmBuffer.floatChannelData?[n][i] else { continue }
if counter == rmsBuffer.count {
let time: Double = Double(i) / processingFormat.sampleRate
let avg = rmsBuffer.reduce(0, +) / rmsBuffer.count
// AKLog("Average Value at frame \(i):", avg)
if avg > inThreshold && !thresholdCrossed && time - lastTime > minSegmentDuration {
thresholdCrossed = true
out.append(time)
lastTime = time
} else if avg <= outThreshold && thresholdCrossed && time - lastTime > minSegmentDuration {
thresholdCrossed = false
out.append(time)
lastTime = time
}
counter = 0
}
rmsBuffer[counter] = abs(sample)
counter += 1
}
}
rmsBuffer.removeAll()
return out
}
I'm using apple's HealthKit sample however resting energy value shown in Health app in iPhone doesn't match with the value fetched in sample app.
As per apple docs, HKQuantityTypeIdentifierBasalEnergyBurned is representing resting energy so I fetched this value from the HealthKit but the value I received doesn't match with the resting energy shown in Health App.
So I came across the apple's HealthKit sample where they are calculating resting energy based on formula:
// Calculates the user's total basal (resting) energy burn based off of their height, weight, age,
// and biological sex. If there is not enough information, return an error.
private func fetchTotalBasalBurn(completion: #escaping (HKQuantity?, Error?) -> Void)
{
let todayPredicate: NSPredicate = self.predicateForSamplesToday()
let weightType = HKQuantityType.quantityType(forIdentifier: HKQuantityTypeIdentifier.bodyMass)!
let heightType = HKQuantityType.quantityType(forIdentifier: HKQuantityTypeIdentifier.height)!
let queryWeigth: HKCompletionHandle = {
(weight, error) -> Void in
guard let weight = weight else {
completion(nil, error)
return
}
let queryHeigth: HKCompletionHandle = {
(height, error) -> Void in
if height == nil {
completion(nil, error)
return;
}
var dateOfBirth: Date!
do {
dateOfBirth = try self.healthStore!.dateOfBirth()
} catch {
completion(nil, error)
return
}
var biologicalSexObjet: HKBiologicalSexObject!
do {
biologicalSexObjet = try self.healthStore!.biologicalSex()
} catch {
completion(nil, error)
return
}
// Once we have pulled all of the information without errors, calculate the user's total basal energy burn
let basalEnergyButn: HKQuantity? = self.calculateBasalBurnTodayFromWeight(weight, height: height, dateOfBirth: dateOfBirth!, biologicalSex: biologicalSexObjet)
completion(basalEnergyButn, nil)
}
if let healthStore = self.healthStore {
healthStore.mostRecentQuantitySample(ofType: heightType, predicate: todayPredicate, completion: queryHeigth)
}
}
if let healthStore = self.healthStore {
healthStore.mostRecentQuantitySample(ofType: weightType, predicate: nil, completion: queryWeigth)
}
}
private func calculateBasalBurnTodayFromWeight(_ weight: HKQuantity?, height: HKQuantity?, dateOfBirth: Date, biologicalSex: HKBiologicalSexObject) -> HKQuantity?
{
// Only calculate Basal Metabolic Rate (BMR) if we have enough information about the user
guard let weight = weight, let height = height else {
return nil
}
// Note the difference between calling +unitFromString: vs creating a unit from a string with
// a given prefix. Both of these are equally valid, however one may be more convenient for a given
// use case.
let heightInCentimeters: Double = height.doubleValue(for: HKUnit(from:"cm"))
let weightInKilograms: Double = weight.doubleValue(for: HKUnit.gramUnit(with: HKMetricPrefix.kilo))
let nowDate = Date()
let ageComponents: DateComponents = Calendar.current.dateComponents([Calendar.Component.year], from: dateOfBirth, to: nowDate)
let ageInYears: Int = ageComponents.year!
// BMR is calculated in kilocalories per day.
let BMR: Double = self.calculateBMRFromWeight(weightInKilograms: weightInKilograms, height: heightInCentimeters, age: ageInYears, biologicalSex: biologicalSex.biologicalSex)
// Figure out how much of today has completed so we know how many kilocalories the user has burned.
let (startOfToday, endOfToday): (Date, Date) = self.datesFromToday()
let secondsInDay: TimeInterval = endOfToday.timeIntervalSince(startOfToday)
let percentOfDayComplete: Double = nowDate.timeIntervalSince(startOfToday) / secondsInDay
let kilocaloriesBurned: Double = BMR * percentOfDayComplete
let basalBurn = HKQuantity(unit: HKUnit.kilocalorie(), doubleValue: kilocaloriesBurned)
return basalBurn
}
/// Returns BMR value in kilocalories per day. Note that there are different ways of calculating the
/// BMR. In this example we chose an arbitrary function to calculate BMR based on weight, height, age,
/// and biological sex.
private func calculateBMRFromWeight(weightInKilograms: Double, height heightInCentimeters: Double, age ageInYears: Int, biologicalSex: HKBiologicalSex) -> Double
{
var BMR: Double = 0
if biologicalSex == .male {
BMR = 66.0 + (13.8 * weightInKilograms) + (5.0 * heightInCentimeters) - (6.8 * Double(ageInYears))
return BMR
}
BMR = 655 + (9.6 * weightInKilograms) + (1.8 * heightInCentimeters) - (4.7 * Double(ageInYears))
return BMR
}
I'm tried the sample app to fetch resting energy however still resting energy value shown in health app and sample app doesn't have same value.
Could any body tell me how to fetch resting energy or what is the calculation used by Health App to find resting energy?
It would be great if someone can give me some pointers on it, I'm pretty new to HealthKit.
Thanks.
It seems that Apple's sample is outdated. As of iOS 8 and watchOS 2 there's a call to retrieve this information in the same way that active calories are retrieved; simply change the identifier. Apple Documentation
HKObjectType.quantityType(forIdentifier: HKQuantityTypeIdentifier.basalEnergyBurned)
Don't forget to include the additional permission to read this data as well.