How to encode self-delimited opus in iOS - ios

I can record opus using AVAudioRecorder as following:
let opusRecordingSettings = [AVFormatIDKey: kAudioFormatOpus,
AVSampleRateKey: 16000.0,
AVNumberOfChannelsKey: 1] as [String: Any]
do {
try audioRecordingSession.setCategory(.playAndRecord, mode: .default)
try audioRecordingSession.setActive(true)
audioRecorder = try AVAudioRecorder(url: fileUrl(), settings: opusRecordingSettings)
audioRecorder.delegate = self
audioRecorder.prepareToRecord()
audioRecorder.record()
}
catch _ { }
// ... ... ...
Now I need to encode opus as following:
| header | encoded opus data | header | encoded opus data | ... | ... |
Each header indicates the size of opus data (in bytes)
I am guessing (correct me if I am wrong):
AVAudioRecorder puts opus packets continuously side by side (since by
default opus packets are non-delimited) in a .opus file after
recording is finished
default frame duration is 20 ms and therefore frameSize = 20 * 16 = 320 ( since I am using 16 kHz sample rate)
each packet contains exactly one frame and therefore packetSize == frameSize
I understand that if I can somehow loop over the opus packets, then I can calculate size of each packet and append it as header (dataChunk = header + encodedOpusData)
I got the concept of self delimited opus from: ietf.org/Self-Delimiting-Framing but don't know the following:
how to create delimited opus audio file (delimited by header where header indicates size of opus data in bytes)
how and when to append header (while recording or after recording is completed?)

I was able to encode self-delimited opus using AVAudioRecorder and AVAssetReader (complete solution: hovermind.com/ios/libopus-kit)
FYI, this is probably not elegant solution but other methods i.e.
tapping on InputNode of AudioEngine or using AudioQueue did not work
for me. If anyone finds elegant solution, can suggest an edit of this
answer
Here is the summery (code below):
record Linear PCM using AVAudioRecorder and save recorded audio file
i.e. temp.wav
read recorded audio file using AVAssetReader and extract PCM (don't load audio file using Data(contentsOf: ...), there is metadata/header in the audio file that would cause some noise)
split PCM data into x byte chunks and loop over it (in my case x = 640)
encode x byte PCM chunk to opus using OpusKit pod
calculate size of encoded opus and append as header
Code (Swift 5, iOS 13, Xcode 11.3)
import UIKit
import MapKit
import MessageKit
import AVFoundation
import OpusKit
import os
class BasicChatViewController: ChatViewController {
override func viewDidLoad() {
super.viewDidLoad()
Logger.logIt(#function)
Logger.logIt("Initilizing opus lib kit")
OpusKit.shared.initialize(sampleRate: Opus.SAMPLE_RATE_DEFAULT,
numberOfChannels: Opus.CHANNEL_COUNT_DEFAULT,
packetSize: Opus.OPUS_ENCODER_BUFFER_SIZE,
encodeBlockSize: Opus.FRAME_SIZE_DEFAULT)
// configure record button here
}
//
// MARK - recording
//
var isRecording = false
var avAudioPlayer: AVAudioPlayer!
var audioRecorder: AVAudioRecorder!
#objc
func onTapRecordButton(sender: UIButton){
Logger.logIt(#function)
toggleRecording()
}
private func toggleRecording(){
Logger.logIt(#function)
Logger.logIt("isRecording: \(isRecording)")
if isRecording {
isRecording = false
stopRecording()
} else {
isRecording = true
checkPermissionAndStartRecording()
}
}
//
// END - recording
//
}
//
// Audio recording related extensions
//
extension BasicChatViewController: AVAudioRecorderDelegate {
private func checkPermissionAndStartRecording() {
Logger.logIt(#function)
AudioUtil.checkRecordingPermission() { isPermissionGranted in
Logger.logIt("isPermissionGranted: \(isPermissionGranted)")
if isPermissionGranted {
self.recordUsingAVAudioRecorder()
} else {
Logger.logIt("don't have permission to record")
}
}
}
private func setupRecorder() {
Logger.logIt(#function)
let tempAudioFileUrl = AudioUtil.TEMP_WAV_FILE
Logger.logIt("tempAudioFileUrl: \(tempAudioFileUrl)")
let linearPcmRecordingSettings = LinearPCMRecording.LINEAR_PCM_RECODING_SETTINGS_DEFAULT
Logger.logIt("RecordingSettings: \(linearPcmRecordingSettings)")
do {
startRecordingSession()
audioRecorder = try AVAudioRecorder(url: tempAudioFileUrl, settings: linearPcmRecordingSettings)
audioRecorder.delegate = self
//audioRecorder.isMeteringEnabled = true
audioRecorder.prepareToRecord()
}
catch {
Logger.logIt("\(error.localizedDescription)")
}
}
private func startRecording() {
Logger.logIt(#function)
if audioRecorder == nil {
setupRecorder()
}
audioRecorder.record()
}
private func stopRecording() {
Logger.logIt(#function)
guard audioRecorder != nil else {
return
}
audioRecorder.stop()
}
private func deleteTempAudioFile(){
Logger.logIt(#function)
guard audioRecorder != nil else {
return
}
if audioRecorder.isRecording {
return
}
// delete temporary audio file
let recordingDeleted = audioRecorder.deleteRecording()
if recordingDeleted {
Logger.logIt("temp (recorded) audio file deleted")
} else {
Logger.logIt("failed to delete temp (recorded) audio file")
}
}
private func startRecordingSession(){
Logger.logIt(#function)
do {
try AVAudioSession.sharedInstance().setCategory(.record, mode: .spokenAudio)
try AVAudioSession.sharedInstance().setActive(true)
} catch {
Logger.logIt("Failed to deactivate recording session")
}
}
private func stopRecordingSession(){
Logger.logIt(#function)
do {
try AVAudioSession.sharedInstance().setActive(false)
} catch {
Logger.logIt("Failed to deactivate recording session")
}
}
private func recordUsingAVAudioRecorder(){
Logger.logIt(#function)
setupRecorder()
startRecording()
}
private func encodeRecordedAudio(){
Logger.logIt(#function)
let pcmData = AudioUtil.extractPcmOnly(from: AudioUtil.TEMP_WAV_FILE)
if pcmData.count > 1 {
Logger.logIt("encoding pcm to self-delimited opus")
let encodedOpusData = AudioUtil.encodeToSelfDelimitedOpus(pcmData: pcmData, splitSize: PCM.SPLIT_CHUNK_SIZE_DEFAULT)
Logger.logIt("encoded opus: \(encodedOpusData)")
Logger.logIt("save encoded opus")
AudioUtil.saveAudio(to: AudioUtil.ENCODED_OPUS_FILE, audioData: encodedOpusData)
} else {
Logger.logIt("no data to encode")
}
deleteTempAudioFile()
stopRecordingSession()
}
func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) {
Logger.logIt(#function)
let finishedSuccessFully = flag
if finishedSuccessFully {
Logger.logIt("finished recording successfully")
encodeRecordedAudio()
} else {
Logger.logIt("recording failed - audio encoding error")
}
}
}
AudioUtil
import Foundation
import AVFoundation
import OpusKit
//
// Opus audio info.
//
public class OpusAudioInfo {
public static let `default` = OpusAudioInfo()
var channels: opus_int32
var headerSize: Int // bytes
var packetSize: opus_int32
var sampleRate: opus_int32 {
didSet {
packetSize = Int32(Opus.FRAME_DURATION_DEFAULT) * (sampleRate / 1000)
}
}
public init(sampleRate: opus_int32 = Opus.SAMPLE_RATE_16_KHZ,
channels: opus_int32 = Opus.CHANNEL_COUNT_DEFAULT,
headerSize: Int = 1) {
self.sampleRate = sampleRate
self.packetSize = Int32(Opus.FRAME_DURATION_DEFAULT) * (sampleRate / 1000)
self.channels = channels
self.headerSize = headerSize
}
}
//
// RAW PCM info.
//
public class PCMInfo {
public static let `default` = PCMInfo()
var sampleRate:Int32
var channels:Int16
var bitDepth:Int16
public init(sampleRate:Int32 = PCM.SAMPLE_RATE_16_KHZ,
channels:Int16 = Int16(PCM.MONO),
bitDepth:Int16 = Int16(PCM.BIT_DEPTH_DEFAULT)) {
self.sampleRate = sampleRate
self.channels = channels
self.bitDepth = bitDepth
}
}
//
// Utility class for audio related operations
//
public class AudioUtil {
private init(){}
//
// Default audio files url in document directory
//
public static let RAW_PCM_FILE = FileUtil.createFileUrl(for: "pcm.raw", in: FileUtil.DOCUMENTS_DIR)
public static let TEMP_WAV_FILE = FileUtil.createFileUrl(for: "wav.wav", in: FileUtil.DOCUMENTS_DIR)
public static let ENCODED_OPUS_FILE = FileUtil.createFileUrl(for: "encoded_opus_ios.opus", in: FileUtil.DOCUMENTS_DIR)
public static let DECODED_WAV_WITH_HEADER_FILE = FileUtil.createFileUrl(for: "decoded_wav_with_header.wav", in: FileUtil.DOCUMENTS_DIR)
/**
Creates fake wav header to play Linear PCM
AVAudioPlayer by default can not play Linear PCM, therefore we need to create a fake wav header
- parameter sampleRate: samples per second
- parameter channelCount: number of channels
- parameter bitDepth: bits per sample
- parameter pcmDataSizeInBytes: PCM data size in bytes
- returns : Data - wav header data
*/
public static func createWavHeader(sampleRate: Int32, channelCount: Int16, bitDepth: Int16, pcmDataSizeInBytes dataSize: Int32) -> Data {
/*
WAV header details: http://www.topherlee.com/software/pcm-tut-wavformat.html
Positions Sample Value Description
1 - 4 "RIFF" Marks the file as a riff file. Characters are each 1 byte long.
5 - 8 File size (integer) Size of the overall file - 8 bytes, in bytes (32-bit integer). Typically, you'd fill this in after creation.
9 -12 "WAVE" File Type Header. For our purposes, it always equals "WAVE".
13-16 "fmt " Format chunk marker. Includes trailing null
17-20 16 Length of format data as listed above
21-22 1 Type of format (1 is PCM) - 2 byte integer
23-24 2 Number of Channels - 2 byte integer
25-28 44100 Sample Rate - 32 byte integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz.
29-32 176400 (Sample Rate * BitsPerSample * Channels) / 8.
33-34 4 (BitsPerSample * Channels) / 8.1 - 8 bit mono2 - 8 bit stereo/16 bit mono4 - 16 bit stereo
35-36 16 Bits per sample
37-40 "data" "data" chunk header. Marks the beginning of the data section.
41-44 File size (data) Size of the data section.
Sample values are given above for a 16-bit stereo source.
An example in swift :
let WAV_HEADER: [Any] = [
"R","I","F","F",
0xFF,0xFF,0xFF,0x7F, // file size
"W","A","V","E",
"f","m","t"," ", // Chunk ID
0x10,0x00,0x00,0x00, // Chunk Size - length of format above
0x01,0x00, // Format Code: 1 is PCM, 3 is IEEE float
0x01,0x00, // Number of Channels (e.g. 2)
0x80,0xBB,0x00,0x00, // Samples per Second, Sample Rate (e.g. 48000)
0x00,0xDC,0x05,0x00, // Bytes per second, byte rate = sample rate * bits per sample * channels / 8
0x08,0x00, // Bytes per Sample Frame, block align = bits per sample * channels / 8
0x20,0x00, // bits per sample (16 for PCM, 32 for float)
"d","a","t","a",
0xFF,0xFF,0xFF,0x7F // size of data section
]
*/
let WAV_HEADER_SIZE:Int32 = 44
let FORMAT_CODE_PCM:Int16 = 1
let fileSize:Int32 = dataSize + WAV_HEADER_SIZE
let sampleRate:Int32 = sampleRate
let subChunkSize:Int32 = 16
let format:Int16 = FORMAT_CODE_PCM
let channels:Int16 = channelCount
let bitsPerSample:Int16 = bitDepth
let byteRate:Int32 = sampleRate * Int32(channels * bitsPerSample / 8)
let blockAlign: Int16 = (bitsPerSample * channels) / 8
let header = NSMutableData()
header.append([UInt8]("RIFF".utf8), length: 4)
header.append(byteArray(from: fileSize), length: 4)
//WAVE
header.append([UInt8]("WAVE".utf8), length: 4)
//FMT
header.append([UInt8]("fmt ".utf8), length: 4)
header.append(byteArray(from: subChunkSize), length: 4)
header.append(byteArray(from: format), length: 2)
header.append(byteArray(from: channels), length: 2)
header.append(byteArray(from: sampleRate), length: 4)
header.append(byteArray(from: byteRate), length: 4)
header.append(byteArray(from: blockAlign), length: 2)
header.append(byteArray(from: bitsPerSample), length: 2)
header.append([UInt8]("data".utf8), length: 4)
header.append(byteArray(from: dataSize), length: 4)
return header as Data
}
/**
Creates default wav header based on default PCM constants
- parameter dataSize: size of PCM data in bytes
- returns : Data - wav header data
*/
public static func createDefaultWavHeader(dataSize: Int32) -> Data {
return createWavHeader(sampleRate: PCM.SAMPLE_RATE_DEFAULT,
channelCount: Int16(PCM.CHANNEL_COUNT_DEFAULT),
bitDepth: Int16(PCM.BIT_DEPTH_DEFAULT),
pcmDataSizeInBytes: dataSize)
}
/**
Converts given value to byte array
- parameter value:FixedWidthInteger type
- returns: array of bytes
*/
public static func byteArray<T>(from value: T) -> [UInt8] where T: FixedWidthInteger {
// https://stackoverflow.com/a/56964191/4802664
// .littleEndian is required
return withUnsafeBytes(of: value.littleEndian) { Array($0) }
}
/**
Generates wav audio data buffer from given header and raw PCM
- parameter wavHeader: a fake RIFF WAV header (appended to PCM)
- parameter pcmData: Linear PCM data
- returns: Data
*/
public static func generateWav(header wavHeader: Data, pcmData: Data) -> Data {
var wavData = Data()
wavData.append(wavHeader)
wavData.append(pcmData)
return wavData
}
/**
Checks permission for recording and invokes callback with flag
- parameter callback: clouser to invoked after checking permission
*/
public static func checkRecordingPermission(onPermissionChecked callback: #escaping(_ isPermissionGranted: Bool) -> Void) {
Logger.logIt(#function)
var isPermissionGranted = false
switch AVAudioSession.sharedInstance().recordPermission {
case .granted:
isPermissionGranted = true
break
case .denied:
isPermissionGranted = false
break
case .undetermined:
AVAudioSession.sharedInstance().requestRecordPermission({ (allowed) in
if allowed {
isPermissionGranted = true
} else {
isPermissionGranted = false
}
})
break
default:
isPermissionGranted = false
break
}
callback(isPermissionGranted)
}
/**
Saves given audio data to specified url
- parameter fileUri: file url where audio data will be saved
*/
public static func saveAudio(to fileUri: URL, audioData: Data) {
Logger.logIt(#function)
Logger.logIt("save to: \(fileUri)")
do {
try audioData.write(to: fileUri)
} catch {
Logger.logIt(error.localizedDescription)
}
}
/**
Encodes given PCM data into self delimited opus (`|header|data|header|data|...|`) using libopus
- parameter pcmData: Linear PCM data buffer (loaded from file or coming from AudioEngine tapping)
- parameter splitSize: size of chunk to split the given pcmData
- returns : encoded data (encoded as: `|header|data|header|data|...|`)
*/
public static func encodeToSelfDelimitedOpus(pcmData: Data, splitSize: Int) -> Data {
Logger.logIt(#function)
var encodedData = Data()
var readIndex = 0
var readStart = 0
var readEnd = 0
var pcmChunk: Data
var readCount = 1
let splitCount = (pcmData.count / splitSize)
Logger.logIt("split count: \(splitCount)")
var header: Data
while readCount <= splitCount {
readStart = readIndex
readEnd = readStart + splitSize
//
// to prevent index out of bound exception
// check readEnd index
//
if(readEnd >= pcmData.count){
readEnd = readStart + (pcmData.count - readIndex)
}
pcmChunk = pcmData[readStart..<readEnd]
//print("chunk: \(pcmChunk)")
if let encodedChunk = OpusKit.shared.encodeData(pcmChunk) {
//
// header is exactly one byte
// header indicates size of the encoded opus data
//
header = Data(from: encodedChunk.count)[0..<1]
//Logger.logIt("header: \([UInt8](header))")
encodedData.append(header)
encodedData.append(encodedChunk)
} else {
print("failed to encode at index: \(readStart)")
}
readIndex += splitSize
readCount += 1
}
//
// remaining data
//
//Logger.logIt("append remaining data")
pcmChunk = pcmData[readIndex..<pcmData.count]
if let encodedChunk = OpusKit.shared.encodeData(pcmChunk) {
header = Data(from: encodedChunk.count)[0..<1]
//Logger.logIt("header: \([UInt8](header))")
encodedData.append(header)
encodedData.append(encodedChunk)
} else {
print("failed to encode at index: \(readIndex)")
}
return encodedData
}
/**
Decodes given self delimited opus data to PCM
Custom opus is encoded as `|header|data|header|data|...|`
Loops over the data, reads data size from header and takes slice/chunk of given opus data based on data size from header. Then each chunk is decode using libopus
- parameter opusData: Encoded opus data buffer
- parameter headerSizeInBytes: size of header in bytes (default is 1)
- returns : decoded pcm data
*/
public static func decodeSelfDelimitedOpusToPcm(opusData: Data, headerSizeInBytes headerSize: Int = 1) -> Data {
var decodedData: Data = Data()
var headerData: Data
var opusChunkSizeFromHeader = 0
var readIndex = 0
var readStart = 0
var readEnd = 0
var extractedOpusChunk: Data
while readIndex < opusData.count {
headerData = opusData[readIndex..<(readIndex + headerSize)]
//Logger.logIt("headerData: \([UInt8](headerData))")
opusChunkSizeFromHeader = Int([UInt8](headerData)[0])
readStart = readIndex + headerSize
readEnd = readStart + opusChunkSizeFromHeader
extractedOpusChunk = opusData[readStart..<readEnd]
//Logger.logIt("extracted: \(extractedOpusChunk)")
if let decodedDataChunk = OpusKit.shared.decodeData(extractedOpusChunk) {
//Logger.logIt("decodedDataChunk: \(decodedDataChunk)")
decodedData.append(decodedDataChunk)
} else {
print("failed to decode at index: \(readStart)")
}
readIndex += (headerSize + opusChunkSizeFromHeader)
}
return decodedData
}
/**
Extracts PCM only from a audio file using AVAssetReader
Normally system will append some meta data while saving audio file with extension, and therefore we need to use AVAssetReader to get PCM only
- parameter fileUrl : audio file url
- returns: PCM Data
*/
public static func extractPcmOnly(from fileUrl: URL) -> Data {
let pcmOnly = NSMutableData()
do {
let asset = AVAsset(url: fileUrl)
let assetReader = try AVAssetReader(asset: asset)
let track = asset.tracks(withMediaType: AVMediaType.audio).first
let outputSettings = LinearPCMRecording.LINEAR_PCM_RECODING_SETTINGS_DEFAULT
let trackOutput = AVAssetReaderTrackOutput(track: track!, outputSettings: outputSettings)
assetReader.add(trackOutput)
assetReader.startReading()
Logger.logIt("reading data with AVAssetReader")
while assetReader.status == AVAssetReader.Status.reading {
if let sampleBufferRef = trackOutput.copyNextSampleBuffer() {
if let blockBufferRef = CMSampleBufferGetDataBuffer(sampleBufferRef) {
let bufferLength = CMBlockBufferGetDataLength(blockBufferRef)
let data = NSMutableData(length: bufferLength)
// func CMBlockBufferCopyDataBytes(_ theSourceBuffer: CMBlockBuffer, atOffset offsetToData: Int, dataLength: Int, destination: UnsafeMutableRawPointer) -> OSStatus
CMBlockBufferCopyDataBytes(blockBufferRef, atOffset: 0, dataLength: bufferLength, destination: data!.mutableBytes)
let samples = data!.mutableBytes.assumingMemoryBound(to: UInt16.self)
pcmOnly.append(samples, length: bufferLength)
CMSampleBufferInvalidate(sampleBufferRef)
}
} else {
Logger.logIt("failed to copy next")
}
}
} catch {
Logger.logIt(error.localizedDescription)
}
return pcmOnly as Data
}
}
Additional classes
import Foundation
import AVFoundation
import OpusKit
public class Audio {
public static let SAMPLE_RATE_16_KHZ: opus_int32 = 16_000
public static let SAMPLE_RATE_8_KHZ: opus_int32 = 8_000
public static let SAMPLE_RATE_DEFAULT = SAMPLE_RATE_16_KHZ
public static let MONO:Int32 = 1
public static let CHANNEL_COUNT_DEFAULT:Int32 = MONO
public static let BIT_DEPTH_DEFAULT:Int32 = 16
public static let FRAME_DURATION_DEFAULT = 20 // milliseconds
// FRAME_SIZE = FRAME (duration in millisecond) * SAMPLE_RATE
public static let FRAME_SIZE_DEFAULT:Int32 = (SAMPLE_RATE_DEFAULT / 1000) * Int32(FRAME_DURATION_DEFAULT)
}
public class PCM: Audio {
public static let SPLIT_CHUNK_SIZE_DEFAULT:Int = Int(FRAME_SIZE_DEFAULT * (BIT_DEPTH_DEFAULT / 8))
}
public class WAV: Audio {
public static let HEADER_SIZE:Int32 = 44 // always 44 bytes
public static let WAV_HEADER_FORMAT_PCM:Int16 = 1
public static let WAV_HEADER_SUB_CHUNK_SIZE:Int32 = 16 // always 16
}
public class Opus: Audio {
public static let ENCODED_OUTPUT_MEMORY_SIZE_LIMIT:Int32 = 255 // Size of the allocated memory for the output payload
public static let OPUS_ENCODER_BUFFER_SIZE:Int32 = 1275 // ref: https://stackoverflow.com/a/55707654/4802664
}
public class PCMRecordingSetting {
private static let SAMPLE_RATE_16_KHZ = 16_000
private static let BIT_DEPTH_16 = 16
private static let CHANNEL_MONO = 1
public var sampleRate:Int = SAMPLE_RATE_16_KHZ {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public var channelCount:Int = CHANNEL_MONO {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public var bitDepth:Int = BIT_DEPTH_16 {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public private(set) var bitRate = SAMPLE_RATE_16_KHZ * BIT_DEPTH_16 * CHANNEL_MONO
private func updateBitRate(){
bitRate = sampleRate * bitDepth * channelCount
}
public static let LINEAR_PCM_DEFAULT = [
AVFormatIDKey: kAudioFormatLinearPCM,
AVSampleRateKey: SAMPLE_RATE_16_KHZ,
AVNumberOfChannelsKey: CHANNEL_MONO,
AVLinearPCMBitDepthKey: BIT_DEPTH_16,
AVLinearPCMIsFloatKey: false
] as [String : Any]
public var recordingSettings = LINEAR_PCM_DEFAULT
private func updateLinearPCMRecordingSettings(){
Logger.debug(#function)
recordingSettings = [
AVFormatIDKey: kAudioFormatLinearPCM,
AVSampleRateKey: sampleRate,
AVNumberOfChannelsKey: channelCount,
AVLinearPCMBitDepthKey: bitDepth,
AVLinearPCMIsFloatKey: false
] as [String : Any]
}
public init(sampleRate: Int, channelCount: Int, bitDepth: Int){
self.sampleRate = sampleRate
self.channelCount = channelCount
self.bitDepth = bitDepth
updateBitRate()
updateLinearPCMRecordingSettings()
}
public static let `default` = PCMRecordingSetting(sampleRate: SAMPLE_RATE_16_KHZ, channelCount: CHANNEL_MONO, bitDepth: BIT_DEPTH_16)
}

Related

Converting bytes to floats in Swift when receiving bluetooth communications [duplicate]

This is my code to convert byte data to float. I tried every answers given in this site. I am getting exponential value for this "<44fa0000>" byte data
static func returnFloatValue(mutableData:NSMutableData)->Float
{
let qtyRange = mutableData.subdataWithRange(NSMakeRange(0, 4))
let qtyString = String(qtyRange)
let qtyTrimString = qtyString.stringByTrimmingCharactersInSet(NSCharacterSet(charactersInString: "<>"))
let qtyValue = Float(strtoul(qtyTrimString, nil, 16)/10)
return qtyValue
}
Thanks
<44fa0000> is the big-endian memory representation of the
binary floating point number 2000.0. To get the number back from
the data, you have to read it into an UInt32 first, convert from
big-endian to host byteorder, and then cast the result to
a Float.
In Swift 2 that would be
func floatValueFromData(data: NSData) -> Float {
return unsafeBitCast(UInt32(bigEndian: UnsafePointer(data.bytes).memory), Float.self)
}
Example:
let bytes: [UInt8] = [0x44, 0xFA, 0x00, 0x00]
let data = NSData(bytes: bytes, length: 4)
print(data) // <44fa0000>
let f = floatValueFromData(data)
print(f) // 2000.0
In Swift 3 you would use Data instead of NSData, and the
unsafeBitCast can be replaced by the Float(bitPattern:)
initializer:
func floatValue(data: Data) -> Float {
return Float(bitPattern: UInt32(bigEndian: data.withUnsafeBytes { $0.pointee } ))
}
In Swift 5 the withUnsafeBytes() method of Data calls the closure with an (untyped) UnsafeRawBufferPointer, and you can load() the value from the raw memory:
func floatValue(data: Data) -> Float {
return Float(bitPattern: UInt32(bigEndian: data.withUnsafeBytes { $0.load(as: UInt32.self) }))
}
Here is some swift 5:
let data = Data([0x44, 0xfa, 0x00, 0x00]) // 0x44fa0000
let floatNb:Float = data.withUnsafeBytes { $0.load(as: Float.self) }
// note that depending on the input endianess, you could add .reversed() to data
let floatNb:Float = data.reversed().withUnsafeBytes { $0.load(as: Float.self) }
WARNING: this sample throws if your Data is under 4 bytes..
.
Safe Data extension:
extension Data {
enum Endianess {
case little
case big
}
func toFloat(endianess: Endianess = .little) -> Float? {
guard self.count <= 4 else { return nil }
switch endianess {
case .big:
let data = [UInt8](repeating: 0x00, count: 4-self.count) + self
return data.withUnsafeBytes { $0.load(as: Float.self) }
case .little:
let data = self + [UInt8](repeating: 0x00, count: 4-self.count)
return data.reversed().withUnsafeBytes { $0.load(as: Float.self) }
}
}
}
Tests:
let opData = Data([0x44, 0xFA, 0x00, 0x00])
let nb42 = Data([0x42, 0x28])
let nb42bigEndian = Data([0x28, 0x42])
let tooBig = Data([0x44, 0xFA, 0x00, 0x00, 0x00])
print("opData: \(opData.toFloat())")
print("nb42: \(nb42.toFloat())")
print("nb42bigEndian: \(nb42bigEndian.toFloat(endianess: .big))")
print("tooBig: \(tooBig.toFloat())")
you may find a faster way but this was good enough for my needs
Use this function:
static func returnFloatValue(data: NSMutableData) -> Float {
let bytes = [UInt8](data as Data)
var f: Float = 0
memcpy(&f, bytes, 4)
return f
}
And you can see it in action here:
var initialValue: Float = 19.200
let data = NSMutableData(bytes: &initialValue, length: 4)
func returnFloatValue(data: NSMutableData) -> Float {
let bytes = [UInt8](data as Data)
var f: Float = 0
memcpy(&f, bytes, 4)
return f
}
var result:Float = returnFloatValue(data: data)
print("f=\(result)")// f=19.2
For 64 bit values the code is:
static func longBitsToDouble(x : Int64) -> Float64
{ return Float64(bitPattern: UInt64(x)) }
Swift 4+.

Set left and right headphone volume using two different sliders

I am generating a wave sound for different frequencies and user should hear this wave sound using headphones only and he/she will set left and right headphone volumes using two different sliders. To achieve wave sound I wrote below code which works perfect.
But problem is: From last 5 days I am trying to set volume for left and right headphones separately, but no luck.
class Synth {
// MARK: Properties
public static let shared = Synth()
public var volume: Float {
set {
audioEngine.mainMixerNode.outputVolume = newValue
}
get {
audioEngine.mainMixerNode.outputVolume
}
}
public var frequencyRampValue: Float = 0
public var frequency: Float = 440 {
didSet {
if oldValue != 0 {
frequencyRampValue = frequency - oldValue
} else {
frequencyRampValue = 0
}
}
}
private var audioEngine: AVAudioEngine
private lazy var sourceNode = AVAudioSourceNode { _, _, frameCount, audioBufferList in
let ablPointer = UnsafeMutableAudioBufferListPointer(audioBufferList)
let localRampValue = self.frequencyRampValue
let localFrequency = self.frequency - localRampValue
let period = 1 / localFrequency
for frame in 0..<Int(frameCount) {
let percentComplete = self.time / period
let sampleVal = self.signal(localFrequency + localRampValue * percentComplete, self.time)
self.time += self.deltaTime
self.time = fmod(self.time, period)
for buffer in ablPointer {
let buf: UnsafeMutableBufferPointer<Float> = UnsafeMutableBufferPointer(buffer)
buf[frame] = sampleVal
}
}
self.frequencyRampValue = 0
return noErr
}
private var time: Float = 0
private let sampleRate: Double
private let deltaTime: Float
private var signal: Signal
// MARK: Init
init(signal: #escaping Signal = Oscillator.square) {
audioEngine = AVAudioEngine()
let mainMixer = audioEngine.mainMixerNode
let outputNode = audioEngine.outputNode
let format = outputNode.inputFormat(forBus: 0)
sampleRate = format.sampleRate
deltaTime = 1 / Float(sampleRate)
self.signal = signal
let inputFormat = AVAudioFormat(commonFormat: format.commonFormat,
sampleRate: format.sampleRate,
channels: 1,
interleaved: format.isInterleaved)
audioEngine.attach(sourceNode)
audioEngine.connect(sourceNode, to: mainMixer, format: inputFormat)
audioEngine.connect(mainMixer, to: outputNode, format: nil)
mainMixer.outputVolume = 0
audioEngine.mainMixerNode.pan = 100 // this does not work,
//audioEngine.mainMixerNode.pan = 1.0 // this also does not work
do {
try audioEngine.start()
} catch {
print("Could not start engine: \(error.localizedDescription)")
}
}
//This function will be called in view controller to generate sound
public func setWaveformTo(_ signal: #escaping Signal) {
self.signal = signal
}
}
With the above code I can hear the wave sound as normal in left and right headphone.
I tried to use audioEngine.mainMixerNode.pan for value 100 and -100 also -1.0 and 1.0 but this did not make any change.
I tried to use audioEngine.mainMixerNode.pan for value 100 and -100 but this did not make any change.
The allowable range for the pan value is {-1.0, 1.0}. The values that you say you used are outside that range, so it's not surprising that they had no effect. Try 0.75 or -0.75 instead.

Extract meter levels from audio file

I need to extract audio meter levels from a file so I can render the levels before playing the audio. I know AVAudioPlayer can get this information while playing the audio file through
func averagePower(forChannel channelNumber: Int) -> Float.
But in my case I would like to obtain an [Float] of meter levels beforehand.
Swift 4
It takes on an iPhone:
0.538s to process an 8MByte mp3 player with a 4min47s duration, and 44,100 sampling rate
0.170s to process an 712KByte mp3 player with a 22s duration, and 44,100 sampling rate
0.089s to process caffile created by converting the file above using this command afconvert -f caff -d LEI16 audio.mp3 audio.caf in the terminal.
Let's begin:
A) Declare this class that is going to hold the necessary information about the audio asset:
/// Holds audio information used for building waveforms
final class AudioContext {
/// The audio asset URL used to load the context
public let audioURL: URL
/// Total number of samples in loaded asset
public let totalSamples: Int
/// Loaded asset
public let asset: AVAsset
// Loaded assetTrack
public let assetTrack: AVAssetTrack
private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
self.audioURL = audioURL
self.totalSamples = totalSamples
self.asset = asset
self.assetTrack = assetTrack
}
public static func load(fromAudioURL audioURL: URL, completionHandler: #escaping (_ audioContext: AudioContext?) -> ()) {
let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])
guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
fatalError("Couldn't load AVAssetTrack")
}
asset.loadValuesAsynchronously(forKeys: ["duration"]) {
var error: NSError?
let status = asset.statusOfValue(forKey: "duration", error: &error)
switch status {
case .loaded:
guard
let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
let audioFormatDesc = formatDescriptions.first,
let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
else { break }
let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
completionHandler(audioContext)
return
case .failed, .cancelled, .loading, .unknown:
print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
}
completionHandler(nil)
}
}
}
We are going to use its asynchronous function load, and handle its result to a completion handler.
B) Import AVFoundation and Accelerate in your view controller:
import AVFoundation
import Accelerate
C) Declare the noise level in your view controller (in dB):
let noiseFloor: Float = -80
For example, anything less than -80dB will be considered as silence.
D) The following function takes an audio context and produces the desired dB powers. targetSamples is by default set to 100, you can change that to suit your UI needs:
func render(audioContext: AudioContext?, targetSamples: Int = 100) -> [Float]{
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples
guard let reader = try? AVAssetReader(asset: audioContext.asset)
else {
fatalError("Couldn't initialize the AVAssetReader")
}
reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))
let outputSettingsDict: [String : Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsBigEndianKey: false,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsNonInterleaved: false
]
let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
outputSettings: outputSettingsDict)
readerOutput.alwaysCopiesSampleData = false
reader.add(readerOutput)
var channelCount = 1
let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription]
for item in formatDescriptions {
guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else {
fatalError("Couldn't get the format description")
}
channelCount = Int(fmtDesc.pointee.mChannelsPerFrame)
}
let samplesPerPixel = max(1, channelCount * sampleRange.count / targetSamples)
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
var outputSamples = [Float]()
var sampleBuffer = Data()
// 16-bit samples
reader.startReading()
defer { reader.cancelReading() }
while reader.status == .reading {
guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
break
}
// Append audio sample buffer into our current sample buffer
var readBufferLength = 0
var readBufferPointer: UnsafeMutablePointer<Int8>?
CMBlockBufferGetDataPointer(readBuffer, 0, &readBufferLength, nil, &readBufferPointer)
sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
CMSampleBufferInvalidate(readSampleBuffer)
let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
let downSampledLength = totalSamples / samplesPerPixel
let samplesToProcess = downSampledLength * samplesPerPixel
guard samplesToProcess > 0 else { continue }
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// Process the remaining samples at the end which didn't fit into samplesPerPixel
let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size
if samplesToProcess > 0 {
let downSampledLength = 1
let samplesPerPixel = samplesToProcess
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
guard reader.status == .completed else {
fatalError("Couldn't read the audio file")
}
return outputSamples
}
E) render uses this function to down-sample the data from the audio file, and convert to decibels:
func processSamples(fromData sampleBuffer: inout Data,
outputSamples: inout [Float],
samplesToProcess: Int,
downSampledLength: Int,
samplesPerPixel: Int,
filter: [Float]) {
sampleBuffer.withUnsafeBytes { (samples: UnsafePointer<Int16>) in
var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess)
let sampleCount = vDSP_Length(samplesToProcess)
//Convert 16bit int samples to floats
vDSP_vflt16(samples, 1, &processingBuffer, 1, sampleCount)
//Take the absolute values to get amplitude
vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount)
//get the corresponding dB, and clip the results
getdB(from: &processingBuffer)
//Downsample and average
var downSampledData = [Float](repeating: 0.0, count: downSampledLength)
vDSP_desamp(processingBuffer,
vDSP_Stride(samplesPerPixel),
filter, &downSampledData,
vDSP_Length(downSampledLength),
vDSP_Length(samplesPerPixel))
//Remove processed samples
sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size)
outputSamples += downSampledData
}
}
F) Which in turn calls this function that gets the corresponding dB, and clips the results to [noiseFloor, 0]:
func getdB(from normalizedSamples: inout [Float]) {
// Convert samples to a log scale
var zero: Float = 32768.0
vDSP_vdbcon(normalizedSamples, 1, &zero, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count), 1)
//Clip to [noiseFloor, 0]
var ceil: Float = 0.0
var noiseFloorMutable = noiseFloor
vDSP_vclip(normalizedSamples, 1, &noiseFloorMutable, &ceil, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count))
}
G) Finally you can get the waveform of the audio like so:
guard let path = Bundle.main.path(forResource: "audio", ofType:"mp3") else {
fatalError("Couldn't find the file path")
}
let url = URL(fileURLWithPath: path)
var outputArray : [Float] = []
AudioContext.load(fromAudioURL: url, completionHandler: { audioContext in
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
outputArray = self.render(audioContext: audioContext, targetSamples: 300)
})
Don't forget that AudioContext.load(fromAudioURL:) is asynchronous.
This solution is synthesized from this repo by William Entriken. All credit goes to him.
Swift 5
Here is the same code updated to Swift 5 syntax:
import AVFoundation
import Accelerate
/// Holds audio information used for building waveforms
final class AudioContext {
/// The audio asset URL used to load the context
public let audioURL: URL
/// Total number of samples in loaded asset
public let totalSamples: Int
/// Loaded asset
public let asset: AVAsset
// Loaded assetTrack
public let assetTrack: AVAssetTrack
private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
self.audioURL = audioURL
self.totalSamples = totalSamples
self.asset = asset
self.assetTrack = assetTrack
}
public static func load(fromAudioURL audioURL: URL, completionHandler: #escaping (_ audioContext: AudioContext?) -> ()) {
let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])
guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
fatalError("Couldn't load AVAssetTrack")
}
asset.loadValuesAsynchronously(forKeys: ["duration"]) {
var error: NSError?
let status = asset.statusOfValue(forKey: "duration", error: &error)
switch status {
case .loaded:
guard
let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
let audioFormatDesc = formatDescriptions.first,
let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
else { break }
let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
completionHandler(audioContext)
return
case .failed, .cancelled, .loading, .unknown:
print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
}
completionHandler(nil)
}
}
}
let noiseFloor: Float = -80
func render(audioContext: AudioContext?, targetSamples: Int = 100) -> [Float]{
guard let audioContext = audioContext else {
fatalError("Couldn't create the audioContext")
}
let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples
guard let reader = try? AVAssetReader(asset: audioContext.asset)
else {
fatalError("Couldn't initialize the AVAssetReader")
}
reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))
let outputSettingsDict: [String : Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsBigEndianKey: false,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsNonInterleaved: false
]
let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
outputSettings: outputSettingsDict)
readerOutput.alwaysCopiesSampleData = false
reader.add(readerOutput)
var channelCount = 1
let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription]
for item in formatDescriptions {
guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else {
fatalError("Couldn't get the format description")
}
channelCount = Int(fmtDesc.pointee.mChannelsPerFrame)
}
let samplesPerPixel = max(1, channelCount * sampleRange.count / targetSamples)
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
var outputSamples = [Float]()
var sampleBuffer = Data()
// 16-bit samples
reader.startReading()
defer { reader.cancelReading() }
while reader.status == .reading {
guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
break
}
// Append audio sample buffer into our current sample buffer
var readBufferLength = 0
var readBufferPointer: UnsafeMutablePointer<Int8>?
CMBlockBufferGetDataPointer(readBuffer,
atOffset: 0,
lengthAtOffsetOut: &readBufferLength,
totalLengthOut: nil,
dataPointerOut: &readBufferPointer)
sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
CMSampleBufferInvalidate(readSampleBuffer)
let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
let downSampledLength = totalSamples / samplesPerPixel
let samplesToProcess = downSampledLength * samplesPerPixel
guard samplesToProcess > 0 else { continue }
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// Process the remaining samples at the end which didn't fit into samplesPerPixel
let samplesToProcess = sampleBuffer.count / MemoryLayout<Int16>.size
if samplesToProcess > 0 {
let downSampledLength = 1
let samplesPerPixel = samplesToProcess
let filter = [Float](repeating: 1.0 / Float(samplesPerPixel), count: samplesPerPixel)
processSamples(fromData: &sampleBuffer,
outputSamples: &outputSamples,
samplesToProcess: samplesToProcess,
downSampledLength: downSampledLength,
samplesPerPixel: samplesPerPixel,
filter: filter)
//print("Status: \(reader.status)")
}
// if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
guard reader.status == .completed else {
fatalError("Couldn't read the audio file")
}
return outputSamples
}
func processSamples(fromData sampleBuffer: inout Data,
outputSamples: inout [Float],
samplesToProcess: Int,
downSampledLength: Int,
samplesPerPixel: Int,
filter: [Float]) {
sampleBuffer.withUnsafeBytes { (samples: UnsafeRawBufferPointer) in
var processingBuffer = [Float](repeating: 0.0, count: samplesToProcess)
let sampleCount = vDSP_Length(samplesToProcess)
//Create an UnsafePointer<Int16> from samples
let unsafeBufferPointer = samples.bindMemory(to: Int16.self)
let unsafePointer = unsafeBufferPointer.baseAddress!
//Convert 16bit int samples to floats
vDSP_vflt16(unsafePointer, 1, &processingBuffer, 1, sampleCount)
//Take the absolute values to get amplitude
vDSP_vabs(processingBuffer, 1, &processingBuffer, 1, sampleCount)
//get the corresponding dB, and clip the results
getdB(from: &processingBuffer)
//Downsample and average
var downSampledData = [Float](repeating: 0.0, count: downSampledLength)
vDSP_desamp(processingBuffer,
vDSP_Stride(samplesPerPixel),
filter, &downSampledData,
vDSP_Length(downSampledLength),
vDSP_Length(samplesPerPixel))
//Remove processed samples
sampleBuffer.removeFirst(samplesToProcess * MemoryLayout<Int16>.size)
outputSamples += downSampledData
}
}
func getdB(from normalizedSamples: inout [Float]) {
// Convert samples to a log scale
var zero: Float = 32768.0
vDSP_vdbcon(normalizedSamples, 1, &zero, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count), 1)
//Clip to [noiseFloor, 0]
var ceil: Float = 0.0
var noiseFloorMutable = noiseFloor
vDSP_vclip(normalizedSamples, 1, &noiseFloorMutable, &ceil, &normalizedSamples, 1, vDSP_Length(normalizedSamples.count))
}
Old solution
Here is a function you could use to pre-render the meter levels of an audio file without playing it:
func averagePowers(audioFileURL: URL, forChannel channelNumber: Int, completionHandler: #escaping(_ success: [Float]) -> ()) {
let audioFile = try! AVAudioFile(forReading: audioFileURL)
let audioFilePFormat = audioFile.processingFormat
let audioFileLength = audioFile.length
//Set the size of frames to read from the audio file, you can adjust this to your liking
let frameSizeToRead = Int(audioFilePFormat.sampleRate/20)
//This is to how many frames/portions we're going to divide the audio file
let numberOfFrames = Int(audioFileLength)/frameSizeToRead
//Create a pcm buffer the size of a frame
guard let audioBuffer = AVAudioPCMBuffer(pcmFormat: audioFilePFormat, frameCapacity: AVAudioFrameCount(frameSizeToRead)) else {
fatalError("Couldn't create the audio buffer")
}
//Do the calculations in a background thread, if you don't want to block the main thread for larger audio files
DispatchQueue.global(qos: .userInitiated).async {
//This is the array to be returned
var returnArray : [Float] = [Float]()
//We're going to read the audio file, frame by frame
for i in 0..<numberOfFrames {
//Change the position from which we are reading the audio file, since each frame starts from a different position in the audio file
audioFile.framePosition = AVAudioFramePosition(i * frameSizeToRead)
//Read the frame from the audio file
try! audioFile.read(into: audioBuffer, frameCount: AVAudioFrameCount(frameSizeToRead))
//Get the data from the chosen channel
let channelData = audioBuffer.floatChannelData![channelNumber]
//This is the array of floats
let arr = Array(UnsafeBufferPointer(start:channelData, count: frameSizeToRead))
//Calculate the mean value of the absolute values
let meanValue = arr.reduce(0, {$0 + abs($1)})/Float(arr.count)
//Calculate the dB power (You can adjust this), if average is less than 0.000_000_01 we limit it to -160.0
let dbPower: Float = meanValue > 0.000_000_01 ? 20 * log10(meanValue) : -160.0
//append the db power in the current frame to the returnArray
returnArray.append(dbPower)
}
//Return the dBPowers
completionHandler(returnArray)
}
}
And you can call it like so:
let path = Bundle.main.path(forResource: "audio.mp3", ofType:nil)!
let url = URL(fileURLWithPath: path)
averagePowers(audioFileURL: url, forChannel: 0, completionHandler: { array in
//Use the array
})
Using instruments, this solution makes high cpu usage during 1.2 seconds, takes about 5 seconds to return to the main thread with the returnArray, and up to 10 seconds when on low battery mode.
First of all, this is heavy operation, so it will take some OS time and resources to accomplish this. In below example I will use standard frame rates and sampling, but you should really sample far far less if you for example only want to display bars as an indications
OK so you don't need to play sound to analyze it. So in this i will not use AVAudioPlayer at all I assume that I will take track as URL:
let path = Bundle.main.path(forResource: "example3.mp3", ofType:nil)!
let url = URL(fileURLWithPath: path)
Then I will use AVAudioFile to get track information into AVAudioPCMBuffer. Whenever you have it in buffer you have all information regarding your track:
func buffer(url: URL) {
do {
let track = try AVAudioFile(forReading: url)
let format = AVAudioFormat(commonFormat:.pcmFormatFloat32, sampleRate:track.fileFormat.sampleRate, channels: track.fileFormat.channelCount, interleaved: false)
let buffer = AVAudioPCMBuffer(pcmFormat: format!, frameCapacity: UInt32(track.length))!
try track.read(into : buffer, frameCount:UInt32(track.length))
self.analyze(buffer: buffer)
} catch {
print(error)
}
}
As you may notice there is analyze method for it. You should have close to floatChannelData variable in your buffer. It's a plain data so you'll need to parse it. I will post a method and below explain this:
func analyze(buffer: AVAudioPCMBuffer) {
let channelCount = Int(buffer.format.channelCount)
let frameLength = Int(buffer.frameLength)
var result = Array(repeating: [Float](repeatElement(0, count: frameLength)), count: channelCount)
for channel in 0..<channelCount {
for sampleIndex in 0..<frameLength {
let sqrtV = sqrt(buffer.floatChannelData![channel][sampleIndex*buffer.stride]/Float(buffer.frameLength))
let dbPower = 20 * log10(sqrtV)
result[channel][sampleIndex] = dbPower
}
}
}
There are some calculations (heavy one) involved in it. When I was working on similar solutions couple of moths ago I came across this tutorial: https://www.raywenderlich.com/5154-avaudioengine-tutorial-for-ios-getting-started there is excelent explanation of this calculation there and also parts of the code that I pasted above and also use in my project, so I want to credit author here: Scott McAlister 👏
Based on #Jakub's answer above, here's an Objective-C version.
If you want to increase the accuracy, change the deciblesCount variable, but beware of performance hit. If you want to return more bars, you can increase the divisions variable when you call the function (with no additional performance hit). You should probably put it on a background thread in any case.
A 3:36 minute / 5.2MB song takes about 1.2s. The above images are of a shotgun firing with 30 and 100 divisions respectively
-(NSArray *)returnWaveArrayForFile:(NSString *)filepath numberOfDivisions:(int)divisions{
//pull file
NSError * error;
NSURL * url = [NSURL URLWithString:filepath];
AVAudioFile * file = [[AVAudioFile alloc] initForReading:url error:&error];
//create av stuff
AVAudioFormat * format = [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32 sampleRate:file.fileFormat.sampleRate channels:file.fileFormat.channelCount interleaved:false];
AVAudioPCMBuffer * buffer = [[AVAudioPCMBuffer alloc] initWithPCMFormat:format frameCapacity:(int)file.length];
[file readIntoBuffer:buffer frameCount:(int)file.length error:&error];
//grab total number of decibles, 1000 seems to work
int deciblesCount = MIN(1000,buffer.frameLength);
NSMutableArray * channels = [NSMutableArray new];
float frameIncrement = buffer.frameLength / (float)deciblesCount;
//needed later
float maxDecible = 0;
float minDecible = HUGE_VALF;
NSMutableArray * sd = [NSMutableArray new]; //used for standard deviation
for (int n = 0; n < MIN(buffer.format.channelCount, 2); n++){ //go through channels
NSMutableArray * decibles = [NSMutableArray new]; //holds actual decible values
//go through pulling the decibles
for (int i = 0; i < deciblesCount; i++){
int offset = frameIncrement * i; //grab offset
//equation from stack, no idea the maths
float sqr = sqrtf(buffer.floatChannelData[n][offset * buffer.stride]/(float)buffer.frameLength);
float decible = 20 * log10f(sqr);
decible += 160; //make positive
decible = (isnan(decible) || decible < 0) ? 0 : decible; //if it's not a number or silent, make it zero
if (decible > 0){ //if it has volume
[sd addObject:#(decible)];
}
[decibles addObject:#(decible)];//add to decibles array
maxDecible = MAX(maxDecible, decible); //grab biggest
minDecible = MIN(minDecible, decible); //grab smallest
}
[channels addObject:decibles]; //add to channels array
}
//find standard deviation and then deducted the bottom slag
NSExpression * expression = [NSExpression expressionForFunction:#"stddev:" arguments:#[[NSExpression expressionForConstantValue:sd]]];
float standardDeviation = [[expression expressionValueWithObject:nil context:nil] floatValue];
float deviationDeduct = standardDeviation / (standardDeviation + (maxDecible - minDecible));
//go through calculating deviation percentage
NSMutableArray * deviations = [NSMutableArray new];
NSMutableArray * returning = [NSMutableArray new];
for (int c = 0; c < (int)channels.count; c++){
NSArray * channel = channels[c];
for (int n = 0; n < (int)channel.count; n++){
float decible = [channel[n] floatValue];
float remainder = (maxDecible - decible);
float deviation = standardDeviation / (standardDeviation + remainder) - deviationDeduct;
[deviations addObject:#(deviation)];
}
//go through creating percentage
float maxTotal = 0;
int catchCount = floorf(deciblesCount / divisions); //total decible values within a segment or division
NSMutableArray * totals = [NSMutableArray new];
for (int n = 0; n < divisions; n++){
float total = 0.0f;
for (int k = 0; k < catchCount; k++){ //go through each segment
int index = n * catchCount + k; //create the index
float deviation = [deviations[index] floatValue]; //grab value
total += deviation; //add to total
}
//max out maxTotal var -> used later to calc percentages
maxTotal = MAX(maxTotal, total);
[totals addObject:#(total)]; //add to totals array
}
//normalise percentages and return
NSMutableArray * percentages = [NSMutableArray new];
for (int n = 0; n < divisions; n++){
float total = [totals[n] floatValue]; //grab the total value for that segment
float percentage = total / maxTotal; //divide by the biggest value -> making it a percentage
[percentages addObject:#(percentage)]; //add to the array
}
//add to the returning array
[returning addObject:percentages];
}
//return channel data -> array of two arrays of percentages
return (NSArray *)returning;
}
Call like this:
int divisions = 30; //number of segments you want for your display
NSString * path = [[NSBundle mainBundle] pathForResource:#"satie" ofType:#"mp3"];
NSArray * channels = [_audioReader returnWaveArrayForFile:path numberOfDivisions:divisions];
You get the two channels back in that array, which you can use to update your UI. Values in each array are between 0 and 1 which you can use to build your bars.

Decrypt Media Files in chunks and play via AVPlayer

I have a mp4 video file which i am encrypting to save and decrypting to play via AVPlayer. Using CRYPTOSWIFT Library for encrypting/decrypting
Its working fine when i am decrypting whole file at once but my file is quite big and taking 100% CPU usage and lot of memory. So, I need to decrypt encrypted file in chunks.
I tried to decrypt file in chunks but its not playing video as AVPlayer is not recognizing decrypted chunk data maybe data is not stored sequentially while encrypting file. I have tried chacha20, AES, AES.CTR & AES.CBC protocols to encrypt and decrypt files but to no avail.
extension PlayerController: AVAssetResourceLoaderDelegate {
func resourceLoader(resourceLoader: AVAssetResourceLoader, shouldWaitForLoadingOfRequestedResource loadingRequest: AVAssetResourceLoadingRequest) -> Bool {
let request = loadingRequest.request
guard let path = request.URL?.path where request.URL?.scheme == Constants.customVideoScheme else { return true }
if let contentRequest = loadingRequest.contentInformationRequest {
do {
let fileAttributes = try NSFileManager.defaultManager().attributesOfItemAtPath(path)
if let fileSizeNumber = fileAttributes[NSFileSize] {
contentRequest.contentLength = fileSizeNumber.longLongValue
}
} catch { }
if fileHandle == nil {
fileHandle = NSFileHandle(forReadingAtPath: (request.URL?.path)!)!
}
contentRequest.contentType = "video/mp4"
contentRequest.byteRangeAccessSupported = true
}
if let data = decryptData(loadingRequest, path: path), dataRequest = loadingRequest.dataRequest {
dataRequest.respondWithData(data)
loadingRequest.finishLoading()
return true
}
return true
}
func decryptData(loadingRequest: AVAssetResourceLoadingRequest, path: String) -> NSData? {
print("Current OFFSET: \(loadingRequest.dataRequest?.currentOffset)")
print("requested OFFSET: \(loadingRequest.dataRequest?.requestedOffset)")
print("Current Length: \(loadingRequest.dataRequest?.requestedLength)")
if loadingRequest.contentInformationRequest != nil {
var data = fileHandle!.readDataOfLength((loadingRequest.dataRequest?.requestedLength)!)
fileHandle!.seekToFileOffset(0)
data = decodeVideoData(data)!
return data
} else {
fileHandle?.seekToFileOffset(UInt64((loadingRequest.dataRequest?.currentOffset)!))
let data = fileHandle!.readDataOfLength((loadingRequest.dataRequest?.requestedLength)!)
// let data = fileHandle!.readDataOfLength(length!) ** When I use this its not playing video but play fine when try with requestedLength **
return decodeVideoData(data)
}
}
}
Decode code to decode nsdata :
func decodeVideoData(data: NSData) -> NSData? {
if let cha = ChaCha20(key: Constants.Encryption.SecretKey, iv: Constants.Encryption.IvKey) {
let decrypted: NSData = try! data.decrypt(cha)
return decrypted
}
return nil
}
I need help regarding this issue, Kindly guide me to the right way to achieve this.
For in depth and a more complete CommonCrypto wrapper, check out my CommonCrypto wrapper. I've extracted bits and pieces for this answer.
First of all, we need to define some functions that will do the encryption/decryption. I'm assuming, for now, you use AES(256) CBC with PKCS#7 padding. Summarising the snippet below: we have an update function, that can be called repeatedly to consume the chunks. There's also a final function that will wrap up any left overs (usually deals with padding).
import CommonCrypto
import Foundation
enum CryptoError: Error {
case generic(CCCryptorStatus)
}
func getOutputLength(_ reference: CCCryptorRef?, inputLength: Int, final: Bool) -> Int {
CCCryptorGetOutputLength(reference, inputLength, final)
}
func update(_ reference: CCCryptorRef?, data: Data) throws -> Data {
var output = [UInt8](repeating: 0, count: getOutputLength(reference, inputLength: data.count, final: false))
let status = data.withUnsafeBytes { dataPointer -> CCCryptorStatus in
CCCryptorUpdate(reference, dataPointer.baseAddress, data.count, &output, output.count, nil)
}
guard status == kCCSuccess else {
throw CryptoError.generic(status)
}
return Data(output)
}
func final(_ reference: CCCryptorRef?) throws -> Data {
var output = [UInt8](repeating: 0, count: getOutputLength(reference, inputLength: 0, final: true))
var moved = 0
let status = CCCryptorFinal(reference, &output, output.count, &moved)
guard status == kCCSuccess else {
throw CryptoError.generic(status)
}
output.removeSubrange(moved...)
return Data(output)
}
Next up, for the purpose of demonstration, the encryption.
let key = Data(repeating: 0x0a, count: kCCKeySizeAES256)
let iv = Data(repeating: 0, count: kCCBlockSizeAES128)
let bigFile = (0 ..< 0xffff).map { _ in
return Data(repeating: UInt8.random(in: 0 ... UInt8.max), count: kCCBlockSizeAES128)
}.reduce(Data(), +)
var encryptor: CCCryptorRef?
CCCryptorCreate(CCOperation(kCCEncrypt), CCAlgorithm(kCCAlgorithmAES), CCOptions(kCCOptionPKCS7Padding), Array(key), key.count, Array(iv), &encryptor)
do {
let ciphertext = try update(encryptor, data: bigFile) + final(encryptor)
print(ciphertext) // 1048576 bytes
} catch {
print(error)
}
That appears to me as quite a large file. Now decrypting, would be done in a similar fashion.
var decryptor: CCCryptorRef?
CCCryptorCreate(CCOperation(kCCDecrypt), CCAlgorithm(kCCAlgorithmAES), CCOptions(kCCOptionPKCS7Padding), Array(key), key.count, Array(iv), &decryptor)
do {
var plaintext = Data()
for i in 0 ..< 0xffff {
plaintext += try update(decryptor, data: ciphertext[i * kCCBlockSizeAES128 ..< i * kCCBlockSizeAES128 + kCCBlockSizeAES128])
}
plaintext += try final(decryptor)
print(plaintext == bigFile, plaintext) // true 1048560 bytes
} catch {
print(error)
}
The encryptor can be altered for different modes and should also be released once it's done, and I'm not too sure how arbitrary output on the update function will behave, but this should be enough to give you an idea of how it can be done using CommonCrypto.

AudioFileReadBytes from a memory block, not a file

I'd like to cache CAF files before converting them to PCM whenever they play.
For example,
char *mybuffer = malloc(mysoundsize);
FILE *f = fopen("mysound.caf", "rb");
fread(mybuffer, mysoundsize, 1, f);
fclose(f);
char *pcmBuffer = malloc(pcmsoundsize);
// Convert to PCM for playing
AudioFileReadBytes(mybuffer, false, 0, mysoundsize, &numbytes, pcmBuffer);
This way, whenever the sound plays, the compressed CAF file is already loaded into memory, avoiding disk access. How can I open a block of memory with an 'AudioFileID' to make AudioFileReadBytes happy? Is there another method I can use?
I have not done it myself, but from the documentation I would think that you have to use AudioFileOpenWithCallbacks and implement callback functions that read from your memory buffer.
You can finish it with AudioFileStreamOpen
fileprivate var streamID: AudioFileStreamID?
public func parse(data: Data) throws {
let streamID = self.streamID!
let count = data.count
_ = try data.withUnsafeBytes { (bytes: UnsafePointer<UInt8>) in
let result = AudioFileStreamParseBytes(streamID, UInt32(count), bytes, [])
guard result == noErr else {
throw ParserError.failedToParseBytes(result)
}
}
}
you can store the data in memory within the callback
func ParserPacketCallback(_ context: UnsafeMutableRawPointer, _ byteCount: UInt32, _ packetCount: UInt32, _ data: UnsafeRawPointer, _ packetDescriptions: Optional<UnsafeMutablePointer<AudioStreamPacketDescription>>) {
let parser = Unmanaged<Parser>.fromOpaque(context).takeUnretainedValue()
/// At this point we should definitely have a data format
guard let dataFormat = parser.dataFormatD else {
return
}
let format = dataFormat.streamDescription.pointee
let bytesPerPacket = Int(format.mBytesPerPacket)
for i in 0 ..< Int(packetCount) {
let packetStart = i * bytesPerPacket
let packetSize = bytesPerPacket
let packetData = Data(bytes: data.advanced(by: packetStart), count: packetSize)
parser.packetsX.append(packetData)
}
}
full code in github repo

Resources