Write array of floats to a wav audio file in swift - ios

I have this flow now: i record audio with AudioEngine, send it to an audio processing library and get an audio buffer back, then i have a strong will to write it to a wav file but i'm totally confused how to do that in swift.
I've tried this snippet from another stackoverflow answer but it writes an empty and corrupted file.( load a pcm into a AVAudioPCMBuffer )
//get data from library
var len : CLong = 0
let res: UnsafePointer<Double> = getData(CLong(), &len )
let bufferPointer: UnsafeBufferPointer = UnsafeBufferPointer(start: res, count: len)
//tranform it to Data
let arrayDouble = Array(bufferPointer)
let arrayFloats = arrayDouble.map{Float($0)}
let data = try Data(buffer: bufferPointer)
//attempt to write in file
do {
let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 16000, channels: 2, interleaved: false)
var buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(data.count))
buffer.floatChannelData!.pointee.withMemoryRebound(to: UInt8.self, capacity: data.count) {
let stream = OutputStream(toBuffer: $0, capacity: data.count)
stream.open()
_ = data.withUnsafeBytes {
stream.write($0, maxLength: data.count)
}
stream.close()
}
//settings are from AudioEngine.inputNode!.inputFormat(forBus: 0).settings
var audioFile = try AVAudioFile(forWriting: url, settings: settings)
try audioFile.write(from: buffer)
} catch let error as NSError {
print("ERROR HERE", error.localizedDescription)
}
So, i guess i do this transform of floatChannelData wrong or everything wrong. Any suggestions or pointers where to read about it would be great!

With a great colleague help we've managed to get it to work. Apparently, AudioPCMBuffer after filling also needs to be notified about it's new size.
Also i was using totally wrong formats.
Here is the code:
let SAMPLE_RATE = Float64(16000.0)
let outputFormatSettings = [
AVFormatIDKey:kAudioFormatLinearPCM,
AVLinearPCMBitDepthKey:32,
AVLinearPCMIsFloatKey: true,
// AVLinearPCMIsBigEndianKey: false,
AVSampleRateKey: SAMPLE_RATE,
AVNumberOfChannelsKey: 1
] as [String : Any]
let audioFile = try? AVAudioFile(forWriting: url, settings: outputFormatSettings, commonFormat: AVAudioCommonFormat.pcmFormatFloat32, interleaved: true)
let bufferFormat = AVAudioFormat(settings: outputFormatSettings)
let outputBuffer = AVAudioPCMBuffer(pcmFormat: bufferFormat, frameCapacity: AVAudioFrameCount(buff.count))
// i had my samples in doubles, so convert then write
for i in 0..<buff.count {
outputBuffer.floatChannelData!.pointee[i] = Float( buff[i] )
}
outputBuffer.frameLength = AVAudioFrameCount( buff.count )
do{
try audioFile?.write(from: outputBuffer)
} catch let error as NSError {
print("error:", error.localizedDescription)
}

Update for Swift 5
This is an update for writing array of floats to a wav audio file in swift 5. The function can be used as the following saveWav([channel1, channel2])
func saveWav(_ buf: [[Float]]) {
if let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 44100, channels: 2, interleaved: false) {
let pcmBuf = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(buf[0].count))
memcpy(pcmBuf?.floatChannelData?[0], buf[0], 4 * buf[0].count)
memcpy(pcmBuf?.floatChannelData?[1], buf[1], 4 * buf[1].count)
pcmBuf?.frameLength = UInt32(buf[0].count)
let fileManager = FileManager.default
do {
let documentDirectory = try fileManager.url(for: .documentDirectory, in: .userDomainMask, appropriateFor:nil, create:false)
try FileManager.default.createDirectory(atPath: documentDirectory.path, withIntermediateDirectories: true, attributes: nil)
let fileURL = documentDirectory.appendingPathComponent("out.wav")
print(fileURL.path)
let audioFile = try AVAudioFile(forWriting: fileURL, settings: format.settings)
try audioFile.write(from: pcmBuf!)
} catch {
print(error)
}
}
}
To make sure the above function works properly, use the following function that converts an audio file to an array of floats, and save it back to an audio file with saveWav
do {
guard let url = Bundle.main.url(forResource: "audio_example", withExtension: "wav") else { return }
let file = try AVAudioFile(forReading: url)
if let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: file.fileFormat.sampleRate, channels: file.fileFormat.channelCount, interleaved: false), let buf = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(file.length)) {
try file.read(into: buf)
guard let floatChannelData = buf.floatChannelData else { return }
let frameLength = Int(buf.frameLength)
// we convert audio using audio pcm buffer to arrays of floats with two channels
let channel1 = Array(UnsafeBufferPointer(start:floatChannelData[0], count:frameLength))
let channel2 = Array(UnsafeBufferPointer(start:floatChannelData[1], count:frameLength))
// we save the audio back using saveWave function
saveWav([channel1,channel2])
}
} catch {
print("Audio Error: \(error)")
}

Related

How To Read An Audio Buffer Of NSData And Convert To AVAudioPCMBuffer With Microsoft Azure Cognitive Services Speech SDK

I am currently using Microsoft Azure Cognitive Speech SDK to play text to speech.
I am able to get the data from the Stream which is provided in the following format (reference):
.audio16Khz32KBitRateMonoMp3
This is set like this:
private let inputFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: 16000,
channels: 1,
interleaved: false
)!
I'm using AVAudioEngine & AVAudioPlayerNode:
let engine = AVAudioEngine()
let player = AVAudioPlayerNode()
override func viewDidLoad() {
super.viewDidLoad()
let mainMixer = engine.mainMixerNode
engine.attach(player)
engine.connect(player, to: mainMixer, format: inputFormat)
try! engine.start()
}
I am able to play this back with some success using the following:
func playAudio(dialogue: String, audioPlayer: AVAudioPlayerNode, then completion: #escaping ( () -> Void)) {
audioAsset = nil
try? FileManager.default.removeItem(at: recordingPath)
FileManager.default.createFile(atPath: recordingPath.path, contents: nil, attributes: nil)
do {
let configuration = try SPXSpeechConfiguration(subscription: Microsoft.key, region: Microsoft.region)
configuration.setSpeechSynthesisOutputFormat(.audio16Khz32KBitRateMonoMp3)
let synthesizer = try SPXSpeechSynthesizer(speechConfiguration: configuration, audioConfiguration: nil)
let speechResult = try synthesizer.startSpeakingSsml(dialogue)
let stream = try SPXAudioDataStream(from: speechResult)
guard
let mutableFile = FileHandle(forWritingAtPath: recordingPath.path),
let streamData = NSMutableData(capacity:Int(bufferCapacity))
else {
fatalError()
}
while stream.read(streamData, length:bufferCapacity) > 0 {
mutableFile.write(streamData as Data)
mutableFile.seekToEndOfFile()
do {
let buffer = try readFileIntoBuffer(audioUrl: recordingPath)
audioPlayer.scheduleBuffer(buffer, at: currentBufferTime(buffer: buffer)) { [weak self] in
guard let self = self else { return }
if let audioAsset = self.audioAsset, audioPlayer.currentTime >= CMTimeGetSeconds(audioAsset.duration) {
DispatchQueue.main.async {
audioPlayer.stop()
}
completion()
}
}
audioPlayer.play()
} catch {
print("Unable To Play Azure Buffer Stream \(error)")
}
}
print("Did Complete Azure Buffer Rendering To File")
audioAsset = AVURLAsset.init(url: recordingPath, options: nil)
mutableFile.closeFile()
} catch {
print("Unable To Run Azure Vocder \(error)")
}
}
With my Buffer creation function being as follows:
func currentBufferTime(buffer: AVAudioPCMBuffer) -> AVAudioTime {
let framecount = Double(buffer.frameLength)
let samplerate = buffer.format.sampleRate
let position = TimeInterval(framecount / samplerate)
return AVAudioTime(sampleTime: AVAudioFramePosition(position), atRate: 1)
}
func readFileIntoBuffer(audioUrl: URL) throws -> AVAudioPCMBuffer {
let audioFile = try AVAudioFile(forReading: audioUrl)
let audioFileFormat = audioFile.processingFormat
let audioFileSize = UInt32(audioFile.length)
let audioBuffer = AVAudioPCMBuffer(pcmFormat: audioFileFormat, frameCapacity: audioFileSize)!
try audioFile.read(into: audioBuffer)
return audioBuffer
}
The issue is that this is not performant and the CPU is around 100% for a significant amount of time when running the function.
As such my question is what is a more optimum way of reading the data into a PCM Buffer?
I have looked at many examples and there doesn't seem to be any thing which works. For example:
func toPCMBuffer(format: AVAudioFormat, data: NSData) -> AVAudioPCMBuffer? {
let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: UInt32(data.count) / format.streamDescription.pointee.mBytesPerFrame)
guard let buffer = buffer else { return nil }
buffer.frameLength = buffer.frameCapacity
let channels = UnsafeBufferPointer(start: buffer.int32ChannelData, count: Int(buffer.format.channelCount))
data.getBytes(UnsafeMutableRawPointer(channels[0]) , length: data.count)
return buffer
}

How does one use AVAudioConverter to convert from mono to stereo?

I'm trying to use AVAudioEngine instead of AVAudioPlayer because I need to do some per-packet processing as the audio is playing, but before I can get that far, I need to convert the 16-bit 8khz mono audio data to stereo so the AVAudioEngine will play it. This is my (incomplete) attempt to do it. I'm currently stuck at how to make AVAudioConverter do the mono-to-stereo conversion. If I don't use the AVAudioConverter, the iOS runtime complains that the input format doesn't match the output format. If I do use it (as below), the runtime doesn't complain, but the audio does not play back properly (likely because i'm not doing the mono-to-stereo conversion correctly). Any assistance is appreciated!
private func loadAudioData(audioData: Data?) {
// Load audio data into player
guard let audio = audioData else {return}
do {
let inputAudioFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: Double(sampleRate), channels: 1, interleaved: false)
let outputAudioFormat = self.audioEngine.mainMixerNode.outputFormat(forBus: 0)
if inputAudioFormat != nil {
let inputStreamDescription = inputAudioFormat?.streamDescription.pointee
let outputStreamDescription = outputAudioFormat.streamDescription.pointee
let count = UInt32(audio.count)
if inputStreamDescription != nil && count > 0 {
if let ibpf = inputStreamDescription?.mBytesPerFrame {
let inputFrameCapacity = count / ibpf
let outputFrameCapacity = count / outputStreamDescription.mBytesPerFrame
self.pcmInputBuffer = AVAudioPCMBuffer(pcmFormat: inputAudioFormat!, frameCapacity: inputFrameCapacity)
self.pcmOutputBuffer = AVAudioPCMBuffer(pcmFormat: outputAudioFormat, frameCapacity: outputFrameCapacity)
if let input = self.pcmInputBuffer, let output = self.pcmOutputBuffer {
self.pcmConverter = AVAudioConverter(from: inputAudioFormat!, to: outputAudioFormat)
input.frameLength = input.frameCapacity
let b = UnsafeMutableBufferPointer(start: input.int16ChannelData?[0], count: input.stride * Int(inputFrameCapacity))
let bytesCopied = audio.copyBytes(to: b)
assert(bytesCopied == count)
audioEngine.attach(playerNode)
audioEngine.connect(playerNode, to: audioEngine.mainMixerNode, format: nil)
self.pcmConverter?.convert(to: output, error: nil) { packets, status in
status.pointee = .haveData
return self.pcmInputBuffer // I know this is wrong, but i'm not sure how to do it correctly
}
try audioEngine.start()
}
}
}
}
}
}
Speculative, incorrect answer
How about pcmConverter?.channelMap = [0, 0]?
Actual answer
You don't need to use the audio converter channel map, because mono to stereo AVAudioConverters seem to duplicate the mono channel by default. The main problems were that outputFrameCapacity was wrong, and you use mainMixers outputFormat before calling audioEngine.prepare() or starting the engine.
Assuming sampleRate = 8000, an amended solution looks like this:
private func loadAudioData(audioData: Data?) throws {
// Load audio data into player
guard let audio = audioData else {return}
do {
audioEngine.attach(playerNode)
audioEngine.connect(playerNode, to: audioEngine.mainMixerNode, format: nil)
audioEngine.prepare() // https://stackoverflow.com/a/70392017/22147
let outputAudioFormat = self.audioEngine.mainMixerNode.outputFormat(forBus: 0)
guard let inputAudioFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: Double(sampleRate), channels: 1, interleaved: false) else { return }
let inputStreamDescription = inputAudioFormat.streamDescription.pointee
let outputStreamDescription = outputAudioFormat.streamDescription.pointee
let count = UInt32(audio.count)
if count > 0 {
let ibpf = inputStreamDescription.mBytesPerFrame
let inputFrameCapacity = count / ibpf
let outputFrameCapacity = Float64(inputFrameCapacity) * outputStreamDescription.mSampleRate / inputStreamDescription.mSampleRate
self.pcmInputBuffer = AVAudioPCMBuffer(pcmFormat: inputAudioFormat, frameCapacity: inputFrameCapacity)
self.pcmOutputBuffer = AVAudioPCMBuffer(pcmFormat: outputAudioFormat, frameCapacity: AVAudioFrameCount(outputFrameCapacity))
if let input = self.pcmInputBuffer, let output = self.pcmOutputBuffer {
self.pcmConverter = AVAudioConverter(from: inputAudioFormat, to: outputAudioFormat)
input.frameLength = input.frameCapacity
let b = UnsafeMutableBufferPointer(start: input.int16ChannelData?[0], count: input.stride * Int(inputFrameCapacity))
let bytesCopied = audio.copyBytes(to: b)
assert(bytesCopied == count)
self.pcmConverter?.convert(to: output, error: nil) { packets, status in
status.pointee = .haveData
return self.pcmInputBuffer // I know this is wrong, but i'm not sure how to do it correctly
}
try audioEngine.start()
self.playerNode.scheduleBuffer(output, completionHandler: nil)
self.playerNode.play()
}
}
}
}

Change encoder format version

I have, for the past week, been trying to take audio from the microphone (on iOS), down sample it and write that to a '.aac' file.
I've finally gotten to point where it's almost working
let inputNode = audioEngine.inputNode
let inputFormat = inputNode.outputFormat(forBus: 0)
let bufferSize = UInt32(4096)
//let sampleRate = 44100.0
let sampleRate = 8000
let bitRate = sampleRate * 16
let fileUrl = url(appending: "NewRecording.aac")
print("Write to \(fileUrl)")
do {
outputFile = try AVAudioFile(forWriting: fileUrl,
settings: [
AVFormatIDKey: kAudioFormatMPEG4AAC,
AVSampleRateKey: sampleRate,
AVEncoderBitRateKey: bitRate,
AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue,
AVNumberOfChannelsKey: 1],
commonFormat: .pcmFormatFloat32,
interleaved: false)
} catch let error {
print("Failed to create audio file for \(fileUrl): \(error)")
return
}
recordButton.setImage(RecordingStyleKit.imageOfMicrophone(fill: .red), for: [])
// Down sample the audio to 8kHz
let fmt = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: Double(sampleRate), channels: 1, interleaved: false)!
let converter = AVAudioConverter(from: inputFormat, to: fmt)!
inputNode.installTap(onBus: 0, bufferSize: AVAudioFrameCount(bufferSize), format: inputFormat) { (buffer, time) in
let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer
}
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: fmt,
frameCapacity: AVAudioFrameCount(fmt.sampleRate) * buffer.frameLength / AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError? = nil
let status = converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
assert(status != .error)
if let outputFile = self.outputFile {
do {
try outputFile.write(from: convertedBuffer)
}
catch let error {
print("Write failed: \(error)")
}
}
}
audioEngine.prepare()
do {
try audioEngine.start()
}
catch {
print(error.localizedDescription)
}
The problem is, the resulting file MUST be in MPEG ADTS, AAC, v4 LC, 8 kHz, monaural format, but the code above only generates MPEG ADTS, AAC, v2 LC, 8 kHz, monaural
That is, it MUST be v4, not v2 (I have no choice)
(This result is generated by using file {name} on the command line to dump it's properties. I also use MediaInfo to provide additional information)
I've been trying to figure out if there is someway to provide a hint or setting to AVAudioFile which will change the LC (Low Complexity) version from 2 to 4?
I've been scanning through the docs and examples but can't seem to find any suggestions

Generate Phase Inverse audio file from existing actual audio file to cancel its voice swift

I am recording sound through audio engine and make a file name my_file.caf and trying to make another file which will make its phase inverse that i can cancel its voice in mono.
But when i do some operations and calculations it reversed its sin wave but also reverse the sound.
do {
let inFile: AVAudioFile = try AVAudioFile(forReading: URLFor(filename: "my_file.caf")!)
let format: AVAudioFormat = inFile.processingFormat
let frameCount: AVAudioFrameCount = UInt32(inFile.length)
let outSettings = [AVNumberOfChannelsKey: format.channelCount,
AVSampleRateKey: format.sampleRate,
AVLinearPCMBitDepthKey: 16,
AVFormatIDKey: kAudioFormatMPEG4AAC] as [String : Any]
let outFile: AVAudioFile = try AVAudioFile(forWriting: URLFor(filename: "my_file1.caf")!, settings: outSettings)
let forwardBuffer: AVAudioPCMBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
let reverseBuffer: AVAudioPCMBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
try inFile.read(into: forwardBuffer)
let frameLength = forwardBuffer.frameLength
reverseBuffer.frameLength = frameLength
let audioStride = forwardBuffer.stride
for channelIdx in 0..<forwardBuffer.format.channelCount {
let forwardChannelData = forwardBuffer.floatChannelData?.advanced(by: Int(channelIdx)).pointee
let reverseChannelData = reverseBuffer.floatChannelData?.advanced(by: Int(channelIdx)).pointee
var reverseIdx: Int = 0
for idx in stride(from: frameLength, to: 0, by: -1) {
memcpy(reverseChannelData?.advanced(by: reverseIdx * audioStride), forwardChannelData?.advanced(by: Int(idx) * audioStride), MemoryLayout<Float>.size)
reverseIdx += 1
}
}
try outFile.write(from: reverseBuffer)
} catch let error {
print(error.localizedDescription)
}

Tap installed on audio engine only producing short files

I am working on an app that allows the user to record audio, play it back while changing the pitch then record what they have done as a separate file.
The code seems to be working but the new file has a duration of only 0.37 seconds (original 5 seconds).
I am guessing when I write from the buffer it keeps saving over itself, thus leaving me with just the last segment. If this is my issue, how do I append the file instead of writing over it?
let recordSettings:[String : AnyObject] = [
AVFormatIDKey: NSNumber(unsignedInt:kAudioFormatAppleLossless),
AVEncoderAudioQualityKey : AVAudioQuality.Max.rawValue,
AVEncoderBitRateKey : 320000,
AVNumberOfChannelsKey: 2,
AVSampleRateKey : 44100.0
]
var outputFile = AVAudioFile()
let format = NSDateFormatter()
format.dateFormat="dd-HH-mm-ss"
let currentFileName = "recording-\(format.stringFromDate(NSDate())).m4a"
print(currentFileName)
let documentsDirectory = NSFileManager.defaultManager().URLsForDirectory(.DocumentDirectory, inDomains: .UserDomainMask)[0]
self.url2 = documentsDirectory.URLByAppendingPathComponent(currentFileName)
let inputNode = engine.inputNode
let bus = 0
engine.mainMixerNode.installTapOnBus(bus, bufferSize: 2048, format: self.engine.mainMixerNode.inputFormatForBus(0)) {
(buffer: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in
do {
let outputFile = try AVAudioFile(forWriting: self.url2, settings: recordSettings, commonFormat: AVAudioCommonFormat.PCMFormatFloat32, interleaved: false)
try outputFile.writeFromBuffer(buffer)
outputFile.framePosition = outputFile.length
} catch let error as NSError {
NSLog("Error writing %#", error.localizedDescription)
}
}
Updated code creating a file with a duration of 0.0:
func play() {
let duration = CMTimeGetSeconds(AVAsset(URL: url).duration)
print("Duration")
print(duration)
let file = try! AVAudioFile(forReading: url)
let buffer = AVAudioPCMBuffer(PCMFormat: file.processingFormat, frameCapacity: AVAudioFrameCount(file.length))
do {
try file.readIntoBuffer(buffer)
} catch _ {
}
engine = AVAudioEngine()
player = AVAudioPlayerNode()
pitch.pitch = 500
engine.attachNode(player)
engine.attachNode(pitch)
engine.connect(player, to: pitch, format: buffer.format)
engine.connect(pitch, to: engine.mainMixerNode, format: nil)
let format = NSDateFormatter()
format.dateFormat="dd-HH-mm-ss"
let currentFileName = "recording-\(format.stringFromDate(NSDate())).m4a"
print(currentFileName)
let documentsDirectory = NSFileManager.defaultManager().URLsForDirectory(.DocumentDirectory, inDomains: .UserDomainMask)[0]
self.url2 = documentsDirectory.URLByAppendingPathComponent(currentFileName)
let outputFile = try! AVAudioFile(forWriting: url2, settings: [
AVFormatIDKey: NSNumber(unsignedInt:kAudioFormatAppleLossless),
AVEncoderAudioQualityKey : AVAudioQuality.Max.rawValue,
AVEncoderBitRateKey : 320000,
AVNumberOfChannelsKey: 2,
AVSampleRateKey : 44100.0
])
done = false
distortion.installTapOnBus(0, bufferSize: 2048, format: outputFile.processingFormat) {
(buffer: AVAudioPCMBuffer!, time: AVAudioTime!) in
let dataptrptr = buffer.floatChannelData
let dataptr = dataptrptr.memory
let datum = dataptr[Int(buffer.frameLength) - 1]
if self.done && fabs(datum) < 0.000001 {
print("stopping")
self.engine.stop()
return
}
do {
try outputFile.writeFromBuffer(buffer)
} catch let error as NSError {
NSLog("Error writing %#", error.localizedDescription)
}
}
player.scheduleBuffer(buffer, atTime: nil, options: AVAudioPlayerNodeBufferOptions.Loops, completionHandler: {
dispatch_async(dispatch_get_main_queue(),{
self.done = true
self.player.stop()
self.engine.stop()
print("complete")
})
})
engine.prepare()
do {
try engine.start()
player.play()
} catch _ {
print("Play session Error")
}
}
Keep in mind that the installTapOnBus handler will be called many times: every time the buffer fills up. Think of it as a loop. Thus it makes no sense to create the output file each time through that loop! You want to create the output file once and then write to it repeatedly. Thus, your overall structure needs to look like this:
let outfile = try! AVAudioFile(forWriting: outurl, settings: // ...
node.installTapOnBus(bus, bufferSize: size, format: outfile.processingFormat) {
(buffer : AVAudioPCMBuffer!, time : AVAudioTime!) in
do {
try outfile.writeFromBuffer(buffer)
} catch {
print(error)
}
}
The other thing to remember is that your buffer will fill-and-write exactly so long as the engine keeps running, so don't stop the engine prematurely (I don't know whether you're doing that, but it's important to keep in mind).

Resources