Change encoder format version - ios

I have, for the past week, been trying to take audio from the microphone (on iOS), down sample it and write that to a '.aac' file.
I've finally gotten to point where it's almost working
let inputNode = audioEngine.inputNode
let inputFormat = inputNode.outputFormat(forBus: 0)
let bufferSize = UInt32(4096)
//let sampleRate = 44100.0
let sampleRate = 8000
let bitRate = sampleRate * 16
let fileUrl = url(appending: "NewRecording.aac")
print("Write to \(fileUrl)")
do {
outputFile = try AVAudioFile(forWriting: fileUrl,
settings: [
AVFormatIDKey: kAudioFormatMPEG4AAC,
AVSampleRateKey: sampleRate,
AVEncoderBitRateKey: bitRate,
AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue,
AVNumberOfChannelsKey: 1],
commonFormat: .pcmFormatFloat32,
interleaved: false)
} catch let error {
print("Failed to create audio file for \(fileUrl): \(error)")
return
}
recordButton.setImage(RecordingStyleKit.imageOfMicrophone(fill: .red), for: [])
// Down sample the audio to 8kHz
let fmt = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: Double(sampleRate), channels: 1, interleaved: false)!
let converter = AVAudioConverter(from: inputFormat, to: fmt)!
inputNode.installTap(onBus: 0, bufferSize: AVAudioFrameCount(bufferSize), format: inputFormat) { (buffer, time) in
let inputCallback: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer
}
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: fmt,
frameCapacity: AVAudioFrameCount(fmt.sampleRate) * buffer.frameLength / AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError? = nil
let status = converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputCallback)
assert(status != .error)
if let outputFile = self.outputFile {
do {
try outputFile.write(from: convertedBuffer)
}
catch let error {
print("Write failed: \(error)")
}
}
}
audioEngine.prepare()
do {
try audioEngine.start()
}
catch {
print(error.localizedDescription)
}
The problem is, the resulting file MUST be in MPEG ADTS, AAC, v4 LC, 8 kHz, monaural format, but the code above only generates MPEG ADTS, AAC, v2 LC, 8 kHz, monaural
That is, it MUST be v4, not v2 (I have no choice)
(This result is generated by using file {name} on the command line to dump it's properties. I also use MediaInfo to provide additional information)
I've been trying to figure out if there is someway to provide a hint or setting to AVAudioFile which will change the LC (Low Complexity) version from 2 to 4?
I've been scanning through the docs and examples but can't seem to find any suggestions

Related

Generate Phase Inverse audio file from existing actual audio file to cancel its voice swift

I am recording sound through audio engine and make a file name my_file.caf and trying to make another file which will make its phase inverse that i can cancel its voice in mono.
But when i do some operations and calculations it reversed its sin wave but also reverse the sound.
do {
let inFile: AVAudioFile = try AVAudioFile(forReading: URLFor(filename: "my_file.caf")!)
let format: AVAudioFormat = inFile.processingFormat
let frameCount: AVAudioFrameCount = UInt32(inFile.length)
let outSettings = [AVNumberOfChannelsKey: format.channelCount,
AVSampleRateKey: format.sampleRate,
AVLinearPCMBitDepthKey: 16,
AVFormatIDKey: kAudioFormatMPEG4AAC] as [String : Any]
let outFile: AVAudioFile = try AVAudioFile(forWriting: URLFor(filename: "my_file1.caf")!, settings: outSettings)
let forwardBuffer: AVAudioPCMBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
let reverseBuffer: AVAudioPCMBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
try inFile.read(into: forwardBuffer)
let frameLength = forwardBuffer.frameLength
reverseBuffer.frameLength = frameLength
let audioStride = forwardBuffer.stride
for channelIdx in 0..<forwardBuffer.format.channelCount {
let forwardChannelData = forwardBuffer.floatChannelData?.advanced(by: Int(channelIdx)).pointee
let reverseChannelData = reverseBuffer.floatChannelData?.advanced(by: Int(channelIdx)).pointee
var reverseIdx: Int = 0
for idx in stride(from: frameLength, to: 0, by: -1) {
memcpy(reverseChannelData?.advanced(by: reverseIdx * audioStride), forwardChannelData?.advanced(by: Int(idx) * audioStride), MemoryLayout<Float>.size)
reverseIdx += 1
}
}
try outFile.write(from: reverseBuffer)
} catch let error {
print(error.localizedDescription)
}

Connecting AVAudioMixerNode to AVAudioEngine

I use AVAudioMixerNode to change audio format. this entry helped me a lot. Below code gives me data i want. But i hear my own voice on phone's speaker. How can i prevent it?
func startAudioEngine()
{
engine = AVAudioEngine()
guard let engine = engine, let input = engine.inputNode else {
// #TODO: error out
return
}
let downMixer = AVAudioMixerNode()
//I think you the engine's I/O nodes are already attached to itself by default, so we attach only the downMixer here:
engine.attach(downMixer)
//You can tap the downMixer to intercept the audio and do something with it:
downMixer.installTap(onBus: 0, bufferSize: 2048, format: downMixer.outputFormat(forBus: 0), block: //originally 1024
{ (buffer: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in
//i get audio data here
}
)
//let's get the input audio format right as it is
let format = input.inputFormat(forBus: 0)
//I initialize a 16KHz format I need:
let format16KHzMono = AVAudioFormat.init(commonFormat: AVAudioCommonFormat.pcmFormatInt16, sampleRate: 11025.0, channels: 1, interleaved: true)
//connect the nodes inside the engine:
//INPUT NODE --format-> downMixer --16Kformat--> mainMixer
//as you can see I m downsampling the default 44khz we get in the input to the 16Khz I want
engine.connect(input, to: downMixer, format: format)//use default input format
engine.connect(downMixer, to: engine.outputNode, format: format16KHzMono)//use new audio format
engine.prepare()
do {
try engine.start()
} catch {
// #TODO: error out
}
}
You can hear your microphone recording through your speakers because your microphone is connected to downMixer, which is connected to engine.outputNode. You could probably just mute the output for the downMixer if you aren't using it with other inputs:
downMixer.outputVolume = 0.0
I did it like this to change the frequency to 48000Hz / 16 bit per sample / 2 channels, and save it to wave file:
let outputAudioFileFormat = [AVFormatIDKey: Int(kAudioFormatLinearPCM), AVSampleRateKey: 48000, AVNumberOfChannelsKey: 2, AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue]
let audioRecordingFormat : AVAudioFormat = AVAudioFormat.init(commonFormat: AVAudioCommonFormat.pcmFormatInt16, sampleRate: 48000, channels: 2, interleaved: true)!
do{
try file = AVAudioFile(forWriting: url, settings: outputAudioFileFormat, commonFormat: .pcmFormatInt16, interleaved: true)
let recordingSession = AVAudioSession.sharedInstance()
try recordingSession.setPreferredInput(input)
try recordingSession.setPreferredSampleRate(audioRecordingFormat.sampleRate)
engine.inputNode.installTap(onBus: 0, bufferSize: 1024, format: audioRecordingFormat, block: self.bufferAvailable)
engine.connect(engine.inputNode, to: engine.outputNode, format: audioRecordingFormat) //configure graph
}
catch
{
debugPrint("Could not initialize the audio file: \(error)")
}
And the function block
func bufferAvailable(buffer: AVAudioPCMBuffer, time: AVAudioTime)
{
do
{
try self.file?.write(from: buffer)
if self.onBufferAvailable != nil {
DispatchQueue.main.async {
self.onBufferAvailable!(buffer) // outside function used for analyzing and displaying a wave meter
}
}
}
catch{
self.stopEngine()
DispatchQueue.main.async {
self.onRecordEnd(false)
}
}
}
The stopEngine function is this, you should call it also when you want to stop the recording:
private func stopEngine()
{
self.engine.inputNode.removeTap(onBus: 0)
self.engine.stop()
}

Capture audio samples with a specific sample rate like Android in iOS Swift

I am a beginner in working with sound processing and AVAudioEngine in iOS, and I'm developing an application that captures the audio samples as a buffer and analyzes it. Furthermore, the sample rate must be 8000 kHz and also must be encoded as PCM16Bit, but the default inputNode in the AVAudioEngine is 44.1 kHz.
In Android, the process is quite simple:
AudioRecord audioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC,
8000, AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT, bufferSize);
and then start the reading function for the buffer.
I searched a lot, but I didn't find any similar example. Instead, all the examples in which I encountered are capturing the samples in the default node's sample rate(44.1 kHz) like:
let input = audioEngine.inputNode
let inputFormat = input.inputFormat(forBus: 0)
input.installTap(onBus: 0, bufferSize: 640, format: inputFormat) { (buffer, time) -> Void in
print(inputFormat)
if let channel1Buffer = buffer.floatChannelData?[0] {
for i in 0...Int(buffer.frameLength-1) {
print(channel1Buffer[i])
}
}
}
try! audioEngine.start()
So I would like to capture audio samples using AVAudioEngine with 8000 kHz sample rate and PCM16Bit encoding.
Edit:
I reached a solution to transform the input to 8 kHz:
let inputNode = audioEngine.inputNode
let downMixer = AVAudioMixerNode()
let main = audioEngine.mainMixerNode
let format = inputNode.inputFormat(forBus: 0)
let format16KHzMono = AVAudioFormat(commonFormat: AVAudioCommonFormat.pcmFormatInt16, sampleRate: 8000, channels: 1, interleaved: true)
audioEngine.attach(downMixer)
downMixer.installTap(onBus: 0, bufferSize: 640, format: format16KHzMono) { (buffer, time) -> Void in
do{
print(buffer.description)
if let channel1Buffer = buffer.int16ChannelData?[0] {
// print(channel1Buffer[0])
for i in 0 ... Int(buffer.frameLength-1) {
print((channel1Buffer[i]))
}
}
}
}
audioEngine.connect(inputNode, to: downMixer, format: format)
audioEngine.connect(downMixer, to: main, format: format16KHzMono)
audioEngine.prepare()
try! audioEngine.start()
, but when I use .pcmFormatInt16 it doesn't work. However, when I use .pcmFormatFloat32 it works fine!
Have you checked with settings parameter
let format16KHzMono = AVAudioFormat(settings: [AVFormatIDKey: AVAudioCommonFormat.pcmFormatInt16,
AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue,
AVEncoderBitRateKey: 16,
AVNumberOfChannelsKey: 1,
AVSampleRateKey: 8000.0] as [String : AnyObject])

Write array of floats to a wav audio file in swift

I have this flow now: i record audio with AudioEngine, send it to an audio processing library and get an audio buffer back, then i have a strong will to write it to a wav file but i'm totally confused how to do that in swift.
I've tried this snippet from another stackoverflow answer but it writes an empty and corrupted file.( load a pcm into a AVAudioPCMBuffer )
//get data from library
var len : CLong = 0
let res: UnsafePointer<Double> = getData(CLong(), &len )
let bufferPointer: UnsafeBufferPointer = UnsafeBufferPointer(start: res, count: len)
//tranform it to Data
let arrayDouble = Array(bufferPointer)
let arrayFloats = arrayDouble.map{Float($0)}
let data = try Data(buffer: bufferPointer)
//attempt to write in file
do {
let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 16000, channels: 2, interleaved: false)
var buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(data.count))
buffer.floatChannelData!.pointee.withMemoryRebound(to: UInt8.self, capacity: data.count) {
let stream = OutputStream(toBuffer: $0, capacity: data.count)
stream.open()
_ = data.withUnsafeBytes {
stream.write($0, maxLength: data.count)
}
stream.close()
}
//settings are from AudioEngine.inputNode!.inputFormat(forBus: 0).settings
var audioFile = try AVAudioFile(forWriting: url, settings: settings)
try audioFile.write(from: buffer)
} catch let error as NSError {
print("ERROR HERE", error.localizedDescription)
}
So, i guess i do this transform of floatChannelData wrong or everything wrong. Any suggestions or pointers where to read about it would be great!
With a great colleague help we've managed to get it to work. Apparently, AudioPCMBuffer after filling also needs to be notified about it's new size.
Also i was using totally wrong formats.
Here is the code:
let SAMPLE_RATE = Float64(16000.0)
let outputFormatSettings = [
AVFormatIDKey:kAudioFormatLinearPCM,
AVLinearPCMBitDepthKey:32,
AVLinearPCMIsFloatKey: true,
// AVLinearPCMIsBigEndianKey: false,
AVSampleRateKey: SAMPLE_RATE,
AVNumberOfChannelsKey: 1
] as [String : Any]
let audioFile = try? AVAudioFile(forWriting: url, settings: outputFormatSettings, commonFormat: AVAudioCommonFormat.pcmFormatFloat32, interleaved: true)
let bufferFormat = AVAudioFormat(settings: outputFormatSettings)
let outputBuffer = AVAudioPCMBuffer(pcmFormat: bufferFormat, frameCapacity: AVAudioFrameCount(buff.count))
// i had my samples in doubles, so convert then write
for i in 0..<buff.count {
outputBuffer.floatChannelData!.pointee[i] = Float( buff[i] )
}
outputBuffer.frameLength = AVAudioFrameCount( buff.count )
do{
try audioFile?.write(from: outputBuffer)
} catch let error as NSError {
print("error:", error.localizedDescription)
}
Update for Swift 5
This is an update for writing array of floats to a wav audio file in swift 5. The function can be used as the following saveWav([channel1, channel2])
func saveWav(_ buf: [[Float]]) {
if let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 44100, channels: 2, interleaved: false) {
let pcmBuf = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(buf[0].count))
memcpy(pcmBuf?.floatChannelData?[0], buf[0], 4 * buf[0].count)
memcpy(pcmBuf?.floatChannelData?[1], buf[1], 4 * buf[1].count)
pcmBuf?.frameLength = UInt32(buf[0].count)
let fileManager = FileManager.default
do {
let documentDirectory = try fileManager.url(for: .documentDirectory, in: .userDomainMask, appropriateFor:nil, create:false)
try FileManager.default.createDirectory(atPath: documentDirectory.path, withIntermediateDirectories: true, attributes: nil)
let fileURL = documentDirectory.appendingPathComponent("out.wav")
print(fileURL.path)
let audioFile = try AVAudioFile(forWriting: fileURL, settings: format.settings)
try audioFile.write(from: pcmBuf!)
} catch {
print(error)
}
}
}
To make sure the above function works properly, use the following function that converts an audio file to an array of floats, and save it back to an audio file with saveWav
do {
guard let url = Bundle.main.url(forResource: "audio_example", withExtension: "wav") else { return }
let file = try AVAudioFile(forReading: url)
if let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: file.fileFormat.sampleRate, channels: file.fileFormat.channelCount, interleaved: false), let buf = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(file.length)) {
try file.read(into: buf)
guard let floatChannelData = buf.floatChannelData else { return }
let frameLength = Int(buf.frameLength)
// we convert audio using audio pcm buffer to arrays of floats with two channels
let channel1 = Array(UnsafeBufferPointer(start:floatChannelData[0], count:frameLength))
let channel2 = Array(UnsafeBufferPointer(start:floatChannelData[1], count:frameLength))
// we save the audio back using saveWave function
saveWav([channel1,channel2])
}
} catch {
print("Audio Error: \(error)")
}

Tap installed on audio engine only producing short files

I am working on an app that allows the user to record audio, play it back while changing the pitch then record what they have done as a separate file.
The code seems to be working but the new file has a duration of only 0.37 seconds (original 5 seconds).
I am guessing when I write from the buffer it keeps saving over itself, thus leaving me with just the last segment. If this is my issue, how do I append the file instead of writing over it?
let recordSettings:[String : AnyObject] = [
AVFormatIDKey: NSNumber(unsignedInt:kAudioFormatAppleLossless),
AVEncoderAudioQualityKey : AVAudioQuality.Max.rawValue,
AVEncoderBitRateKey : 320000,
AVNumberOfChannelsKey: 2,
AVSampleRateKey : 44100.0
]
var outputFile = AVAudioFile()
let format = NSDateFormatter()
format.dateFormat="dd-HH-mm-ss"
let currentFileName = "recording-\(format.stringFromDate(NSDate())).m4a"
print(currentFileName)
let documentsDirectory = NSFileManager.defaultManager().URLsForDirectory(.DocumentDirectory, inDomains: .UserDomainMask)[0]
self.url2 = documentsDirectory.URLByAppendingPathComponent(currentFileName)
let inputNode = engine.inputNode
let bus = 0
engine.mainMixerNode.installTapOnBus(bus, bufferSize: 2048, format: self.engine.mainMixerNode.inputFormatForBus(0)) {
(buffer: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in
do {
let outputFile = try AVAudioFile(forWriting: self.url2, settings: recordSettings, commonFormat: AVAudioCommonFormat.PCMFormatFloat32, interleaved: false)
try outputFile.writeFromBuffer(buffer)
outputFile.framePosition = outputFile.length
} catch let error as NSError {
NSLog("Error writing %#", error.localizedDescription)
}
}
Updated code creating a file with a duration of 0.0:
func play() {
let duration = CMTimeGetSeconds(AVAsset(URL: url).duration)
print("Duration")
print(duration)
let file = try! AVAudioFile(forReading: url)
let buffer = AVAudioPCMBuffer(PCMFormat: file.processingFormat, frameCapacity: AVAudioFrameCount(file.length))
do {
try file.readIntoBuffer(buffer)
} catch _ {
}
engine = AVAudioEngine()
player = AVAudioPlayerNode()
pitch.pitch = 500
engine.attachNode(player)
engine.attachNode(pitch)
engine.connect(player, to: pitch, format: buffer.format)
engine.connect(pitch, to: engine.mainMixerNode, format: nil)
let format = NSDateFormatter()
format.dateFormat="dd-HH-mm-ss"
let currentFileName = "recording-\(format.stringFromDate(NSDate())).m4a"
print(currentFileName)
let documentsDirectory = NSFileManager.defaultManager().URLsForDirectory(.DocumentDirectory, inDomains: .UserDomainMask)[0]
self.url2 = documentsDirectory.URLByAppendingPathComponent(currentFileName)
let outputFile = try! AVAudioFile(forWriting: url2, settings: [
AVFormatIDKey: NSNumber(unsignedInt:kAudioFormatAppleLossless),
AVEncoderAudioQualityKey : AVAudioQuality.Max.rawValue,
AVEncoderBitRateKey : 320000,
AVNumberOfChannelsKey: 2,
AVSampleRateKey : 44100.0
])
done = false
distortion.installTapOnBus(0, bufferSize: 2048, format: outputFile.processingFormat) {
(buffer: AVAudioPCMBuffer!, time: AVAudioTime!) in
let dataptrptr = buffer.floatChannelData
let dataptr = dataptrptr.memory
let datum = dataptr[Int(buffer.frameLength) - 1]
if self.done && fabs(datum) < 0.000001 {
print("stopping")
self.engine.stop()
return
}
do {
try outputFile.writeFromBuffer(buffer)
} catch let error as NSError {
NSLog("Error writing %#", error.localizedDescription)
}
}
player.scheduleBuffer(buffer, atTime: nil, options: AVAudioPlayerNodeBufferOptions.Loops, completionHandler: {
dispatch_async(dispatch_get_main_queue(),{
self.done = true
self.player.stop()
self.engine.stop()
print("complete")
})
})
engine.prepare()
do {
try engine.start()
player.play()
} catch _ {
print("Play session Error")
}
}
Keep in mind that the installTapOnBus handler will be called many times: every time the buffer fills up. Think of it as a loop. Thus it makes no sense to create the output file each time through that loop! You want to create the output file once and then write to it repeatedly. Thus, your overall structure needs to look like this:
let outfile = try! AVAudioFile(forWriting: outurl, settings: // ...
node.installTapOnBus(bus, bufferSize: size, format: outfile.processingFormat) {
(buffer : AVAudioPCMBuffer!, time : AVAudioTime!) in
do {
try outfile.writeFromBuffer(buffer)
} catch {
print(error)
}
}
The other thing to remember is that your buffer will fill-and-write exactly so long as the engine keeps running, so don't stop the engine prematurely (I don't know whether you're doing that, but it's important to keep in mind).

Resources