Recording gapless audio with AVAssetWriter - ios

I'm trying to record segments of audio and recombine them without producing a gap in audio.
The eventual goal is to also have video, but I've found that audio itself creates gaps when combined with ffmpeg -f concat -i list.txt -c copy out.mp4
If I put the audio in an HLS playlist, there are also gaps, so I don't think this is unique to ffmpeg.
The idea is that samples come in continuously, and my controller routes samples to the proper AVAssetWriter. How do I eliminate gaps in audio?
import Foundation
import UIKit
import AVFoundation
class StreamController: UIViewController, AVCaptureAudioDataOutputSampleBufferDelegate, AVCaptureVideoDataOutputSampleBufferDelegate {
var closingAudioInput: AVAssetWriterInput?
var closingAssetWriter: AVAssetWriter?
var currentAudioInput: AVAssetWriterInput?
var currentAssetWriter: AVAssetWriter?
var nextAudioInput: AVAssetWriterInput?
var nextAssetWriter: AVAssetWriter?
var videoHelper: VideoHelper?
var startTime: NSTimeInterval = 0
let closeAssetQueue: dispatch_queue_t = dispatch_queue_create("closeAssetQueue", nil);
override func viewDidLoad() {
super.viewDidLoad()
startTime = NSDate().timeIntervalSince1970
createSegmentWriter()
videoHelper = VideoHelper()
videoHelper!.delegate = self
videoHelper!.startSession()
NSTimer.scheduledTimerWithTimeInterval(1, target: self, selector: "createSegmentWriter", userInfo: nil, repeats: true)
}
func createSegmentWriter() {
print("Creating segment writer at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
let outputPath = OutputFileNameHelper.instance.pathForOutput()
OutputFileNameHelper.instance.incrementSegmentIndex()
try? NSFileManager.defaultManager().removeItemAtPath(outputPath)
nextAssetWriter = try! AVAssetWriter(URL: NSURL(fileURLWithPath: outputPath), fileType: AVFileTypeMPEG4)
nextAssetWriter!.shouldOptimizeForNetworkUse = true
let audioSettings: [String:AnyObject] = EncodingSettings.AUDIO
nextAudioInput = AVAssetWriterInput(mediaType: AVMediaTypeAudio, outputSettings: audioSettings)
nextAudioInput!.expectsMediaDataInRealTime = true
nextAssetWriter?.addInput(nextAudioInput!)
nextAssetWriter!.startWriting()
}
func closeWriterIfNecessary() {
if closing && audioFinished {
closing = false
audioFinished = false
let outputFile = closingAssetWriter?.outputURL.pathComponents?.last
closingAssetWriter?.finishWritingWithCompletionHandler() {
let delta = NSDate().timeIntervalSince1970 - self.startTime
print("segment \(outputFile!) finished at t=\(delta)")
}
self.closingAudioInput = nil
self.closingAssetWriter = nil
}
}
var audioFinished = false
var closing = false
func captureOutput(captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBufferRef, fromConnection connection: AVCaptureConnection!) {
if let nextWriter = nextAssetWriter {
if nextWriter.status.rawValue != 0 {
if (currentAssetWriter != nil) {
closing = true
}
var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
print("Switching asset writers at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
closingAssetWriter = currentAssetWriter
closingAudioInput = currentAudioInput
currentAssetWriter = nextAssetWriter
currentAudioInput = nextAudioInput
nextAssetWriter = nil
nextAudioInput = nil
currentAssetWriter?.startSessionAtSourceTime(sampleTiming.presentationTimeStamp)
}
}
if let _ = captureOutput as? AVCaptureVideoDataOutput {
} else if let _ = captureOutput as? AVCaptureAudioDataOutput {
captureAudioSample(sampleBuffer)
}
dispatch_async(closeAssetQueue) {
self.closeWriterIfNecessary()
}
}
func printTimingInfo(sampleBuffer: CMSampleBufferRef, prefix: String) {
var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
let presentationTime = Double(sampleTiming.presentationTimeStamp.value) / Double(sampleTiming.presentationTimeStamp.timescale)
print("\(prefix):\(presentationTime)")
}
func captureAudioSample(sampleBuffer: CMSampleBufferRef) {
printTimingInfo(sampleBuffer, prefix: "A")
if (closing && !audioFinished) {
if closingAudioInput?.readyForMoreMediaData == true {
closingAudioInput?.appendSampleBuffer(sampleBuffer)
}
closingAudioInput?.markAsFinished()
audioFinished = true
} else {
if currentAudioInput?.readyForMoreMediaData == true {
currentAudioInput?.appendSampleBuffer(sampleBuffer)
}
}
}
}

With packet formats like AAC you have silent priming frames (a.k.a encoder delay) at the beginning and remainder frames at the end (when your audio length is not a multiple of the packet size). In your case it's 2112 of them at the beginning of every file. Priming and remainder frames break the possibility of concatenating the files without transcoding them, so you can't really blame ffmpeg -c copy for not producing seamless output.
I'm not sure where this leaves you with video - obviously audio is synced to the video, even in the presence of priming frames.
It all depends on how you intend to concatenate the final audio (and eventually video). If you're doing it yourself using AVFoundation, then you can detect and account for priming/remainder frames using
CMGetAttachment(buffer, kCMSampleBufferAttachmentKey_TrimDurationAtStart, NULL)
CMGetAttachment(audioBuffer, kCMSampleBufferAttachmentKey_TrimDurationAtEnd, NULL)
As a short term solution, you can switch to a non "packetised" to get gapless, concatenatable (with ffmpeg) files.
e.g.
AVFormatIDKey: kAudioFormatAppleIMA4, fileType: AVFileTypeAIFC, suffix ".aifc" or
AVFormatIDKey: kAudioFormatLinearPCM, fileType: AVFileTypeWAVE, suffix ".wav"
p.s. you can see priming & remainder frames and packet sizes using the ubiquitous afinfo tool.
afinfo chunk.mp4
Data format: 2 ch, 44100 Hz, 'aac ' (0x00000000) 0 bits/channel, 0 bytes/packet, 1024 frames/packet, 0 bytes/frame
...
audio 39596 valid frames + 2112 priming + 276 remainder = 41984
...

Not sure if this helps you but if you have a bunch of MP4s you can use this code to combine them:
func mergeAudioFiles(audioFileUrls: NSArray, callback: (url: NSURL?, error: NSError?)->()) {
// Create the audio composition
let composition = AVMutableComposition()
// Merge
for (var i = 0; i < audioFileUrls.count; i++) {
let compositionAudioTrack :AVMutableCompositionTrack = composition.addMutableTrackWithMediaType(AVMediaTypeAudio, preferredTrackID: CMPersistentTrackID())
let asset = AVURLAsset(URL: audioFileUrls[i] as! NSURL)
let track = asset.tracksWithMediaType(AVMediaTypeAudio)[0]
let timeRange = CMTimeRange(start: CMTimeMake(0, 600), duration: track.timeRange.duration)
try! compositionAudioTrack.insertTimeRange(timeRange, ofTrack: track, atTime: composition.duration)
}
// Create output url
let format = NSDateFormatter()
format.dateFormat="yyyy-MM-dd-HH-mm-ss"
let currentFileName = "recording-\(format.stringFromDate(NSDate()))-merge.m4a"
print(currentFileName)
let documentsDirectory = NSFileManager.defaultManager().URLsForDirectory(.DocumentDirectory, inDomains: .UserDomainMask)[0]
let outputUrl = documentsDirectory.URLByAppendingPathComponent(currentFileName)
print(outputUrl.absoluteString)
// Export it
let assetExport = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetAppleM4A)
assetExport?.outputFileType = AVFileTypeAppleM4A
assetExport?.outputURL = outputUrl
assetExport?.exportAsynchronouslyWithCompletionHandler({ () -> Void in
switch assetExport!.status {
case AVAssetExportSessionStatus.Failed:
callback(url: nil, error: assetExport?.error)
default:
callback(url: assetExport?.outputURL, error: nil)
}
})
}

Related

How do you create a new AVAsset video that consists of only frames from given `CMTimeRange`s of another video?

Apple's sample code Identifying Trajectories in Video contains the following delegate callback:
func cameraViewController(_ controller: CameraViewController, didReceiveBuffer buffer: CMSampleBuffer, orientation: CGImagePropertyOrientation) {
let visionHandler = VNImageRequestHandler(cmSampleBuffer: buffer, orientation: orientation, options: [:])
if gameManager.stateMachine.currentState is GameManager.TrackThrowsState {
DispatchQueue.main.async {
// Get the frame of rendered view
let normalizedFrame = CGRect(x: 0, y: 0, width: 1, height: 1)
self.jointSegmentView.frame = controller.viewRectForVisionRect(normalizedFrame)
self.trajectoryView.frame = controller.viewRectForVisionRect(normalizedFrame)
}
// Perform the trajectory request in a separate dispatch queue.
trajectoryQueue.async {
do {
try visionHandler.perform([self.detectTrajectoryRequest])
if let results = self.detectTrajectoryRequest.results {
DispatchQueue.main.async {
self.processTrajectoryObservations(controller, results)
}
}
} catch {
AppError.display(error, inViewController: self)
}
}
}
}
However, instead of drawing UI whenever detectTrajectoryRequest.results exist (https://developer.apple.com/documentation/vision/vndetecttrajectoriesrequest/3675672-results), I'm interested in using the CMTimeRange provided by each result to construct a new video. In effect, this would filter down the original video to only frames with trajectories.
What would be a good approach to transferring only frames with trajectories from an AVAssetReader to an AVAssetWriter?
By the time you identify a trajectory in captured video frames or from frames decoded from a file you may not have the initial frames in memory any more, so the easiest way to create your file containing only trajectories is to keep the original file on hand, and then insert its trajectory snippets into an AVComposition which you then export using AVAssetExportSession.
This sample captures frames from the camera, encodes them to a file whilst analysing them for trajectories and after 20 seconds, it closes the file and then creates the new file containing only trajectory snippets.
If you're interested in detecting trajectories in a pre-existing file, it's not too hard to rewire this code.
import UIKit
import AVFoundation
import Vision
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
let session = AVCaptureSession()
var assetWriter: AVAssetWriter!
var assetWriterInput: AVAssetWriterInput!
var assetWriterStartTime: CMTime = .zero
var assetWriterStarted = false
var referenceFileURL: URL!
var timeRangesOfInterest: [Double : CMTimeRange] = [:]
func startWritingFile(outputURL: URL, initialSampleBuffer: CMSampleBuffer) {
try? FileManager.default.removeItem(at: outputURL)
assetWriter = try! AVAssetWriter(outputURL: outputURL, fileType: .mov)
let dimensions = initialSampleBuffer.formatDescription!.dimensions
assetWriterInput = AVAssetWriterInput(mediaType: .video, outputSettings: [AVVideoCodecKey: AVVideoCodecType.h264, AVVideoWidthKey: dimensions.width, AVVideoHeightKey: dimensions.height])
assetWriter.add(assetWriterInput)
assetWriter.startWriting()
self.assetWriterStartTime = CMSampleBufferGetPresentationTimeStamp(initialSampleBuffer)
assetWriter.startSession(atSourceTime: self.assetWriterStartTime)
}
func stopWritingFile(completion: #escaping (() -> Void)) {
let assetWriterToFinish = self.assetWriter!
self.assetWriterInput = nil
self.assetWriter = nil
assetWriterToFinish.finishWriting {
print("finished writing: \(assetWriterToFinish.status.rawValue)")
completion()
}
}
func exportVideoTimeRanges(inputFileURL: URL, outputFileURL: URL, timeRanges: [CMTimeRange]) {
let inputAsset = AVURLAsset(url: inputFileURL)
let inputVideoTrack = inputAsset.tracks(withMediaType: .video).first!
let composition = AVMutableComposition()
let compositionTrack = composition.addMutableTrack(withMediaType: .video, preferredTrackID: kCMPersistentTrackID_Invalid)!
var insertionPoint: CMTime = .zero
for timeRange in timeRanges {
try! compositionTrack.insertTimeRange(timeRange, of: inputVideoTrack, at: insertionPoint)
insertionPoint = insertionPoint + timeRange.duration
}
let exportSession = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetHighestQuality)!
try? FileManager.default.removeItem(at: outputFileURL)
exportSession.outputURL = outputFileURL
exportSession.outputFileType = .mov
exportSession.exportAsynchronously {
print("export finished: \(exportSession.status.rawValue) - \(exportSession.error)")
}
}
override func viewDidLoad() {
super.viewDidLoad()
let inputDevice = AVCaptureDevice.default(for: .video)!
let input = try! AVCaptureDeviceInput(device: inputDevice)
let output = AVCaptureVideoDataOutput()
output.setSampleBufferDelegate(self, queue: DispatchQueue.main)
session.addInput(input)
session.addOutput(output)
session.startRunning()
DispatchQueue.main.asyncAfter(deadline: .now() + 20) {
self.stopWritingFile {
print("finished writing")
let trajectoriesFileURL = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] .appendingPathComponent("trajectories.mov")
self.exportVideoTimeRanges(inputFileURL: self.referenceFileURL, outputFileURL: trajectoriesFileURL, timeRanges: self.timeRangesOfInterest.map { $0.1 })
}
}
}
// Lazily create a single instance of VNDetectTrajectoriesRequest.
private lazy var request: VNDetectTrajectoriesRequest = {
return VNDetectTrajectoriesRequest(frameAnalysisSpacing: .zero,
trajectoryLength: 10,
completionHandler: completionHandler)
}()
// AVCaptureVideoDataOutputSampleBufferDelegate callback.
func captureOutput(_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection) {
if !assetWriterStarted {
self.referenceFileURL = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] .appendingPathComponent("reference.mov")
startWritingFile(outputURL: self.referenceFileURL, initialSampleBuffer: sampleBuffer)
assetWriterStarted = true
}
if assetWriterInput != nil && assetWriterInput.isReadyForMoreMediaData {
assetWriterInput.append(sampleBuffer)
}
do {
let requestHandler = VNImageRequestHandler(cmSampleBuffer: sampleBuffer)
try requestHandler.perform([request])
} catch {
// Handle the error.
}
}
func completionHandler(request: VNRequest, error: Error?) {
guard let request = request as? VNDetectTrajectoriesRequest else { return }
if let results = request.results,
results.count > 0 {
NSLog("\(results)")
for result in results {
var fileRelativeTimeRange = result.timeRange
fileRelativeTimeRange.start = fileRelativeTimeRange.start - self.assetWriterStartTime
self.timeRangesOfInterest[fileRelativeTimeRange.start.seconds] = fileRelativeTimeRange
}
}
}
}

A way to crop AudioFile while recording?

I'm writing a first in first out recording app that buffers up to 2.5 mins of audio using AudioQueue. I've got most of it figured out but I'm at a roadblock trying to crop audio data.
I've seen people do it with AVAssetExportSession but it seems like it wouldn't be performant to export a new track every time the AudioQueueInputCallback is called.
I'm not married to using AVAssestExportSession by any means if anyone has a better idea.
Here's where I'm doing my write and was hoping to execute the crop.
var beforeSeconds = TimeInterval() // find the current estimated duration (not reliable)
var propertySize = UInt32(MemoryLayout.size(ofValue: beforeSeconds))
var osStatus = AudioFileGetProperty(audioRecorder.recordFile!, kAudioFilePropertyEstimatedDuration, &propertySize, &beforeSeconds)
if numPackets > 0 {
AudioFileWritePackets(audioRecorder.recordFile!, // write to disk
false,
buffer.mAudioDataByteSize,
packetDescriptions,
audioRecorder.recordPacket,
&numPackets,
buffer.mAudioData)
audioRecorder.recordPacket += Int64(numPackets) // up the packet index
var afterSeconds = TimeInterval() // find the after write estimated duration (not reliable)
var propertySize = UInt32(MemoryLayout.size(ofValue: afterSeconds))
var osStatus = AudioFileGetProperty(audioRecorder.recordFile!, kAudioFilePropertyEstimatedDuration, &propertySize, &afterSeconds)
assert(osStatus == noErr, "couldn't get record time")
if afterSeconds >= 150.0 {
print("hit max buffer!")
audioRecorder.onBufferMax?(afterSeconds - beforeSeconds)
}
}
Here's where the callback is executed
func onBufferMax(_ difference: Double){
let asset = AVAsset(url: tempFilePath)
let duration = CMTimeGetSeconds(asset.duration)
guard duration >= 150.0 else { return }
guard let exporter = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetAppleM4A) else {
print("exporter init failed")
return }
exporter.outputURL = getDocumentsDirectory().appendingPathComponent("buffered.caf") // helper function that calls the FileManager
exporter.outputFileType = AVFileTypeAppleM4A
let startTime = CMTimeMake(Int64(difference), 1)
let endTime = CMTimeMake(Int64(WYNDRConstants.maxTimeInterval + difference), 1)
exporter.timeRange = CMTimeRangeFromTimeToTime(startTime, endTime)
exporter.exportAsynchronously(completionHandler: {
switch exporter.status {
case .failed:
print("failed to export")
case .cancelled:
print("canceled export")
default:
print("export successful")
}
})
}
A ring buffer is a useful structure for storing, either in memory or on disk, the most recent n seconds of audio. Here is a simple solution that stores the audio in memory, presented in the traditional UIViewController format.
N.B 2.5 minutes of 44.1kHz audio stored as floats requires about 26MB of RAM, which is on the heavy side for a mobile device.
import AVFoundation
class ViewController: UIViewController {
let engine = AVAudioEngine()
var requiredSamples: AVAudioFrameCount = 0
var ringBuffer: [AVAudioPCMBuffer] = []
var ringBufferSizeInSamples: AVAudioFrameCount = 0
func startRecording() {
let input = engine.inputNode!
let bus = 0
let inputFormat = input.inputFormat(forBus: bus)
requiredSamples = AVAudioFrameCount(inputFormat.sampleRate * 2.5 * 60)
input.installTap(onBus: bus, bufferSize: 512, format: inputFormat) { (buffer, time) -> Void in
self.appendAudioBuffer(buffer)
}
try! engine.start()
}
func appendAudioBuffer(_ buffer: AVAudioPCMBuffer) {
ringBuffer.append(buffer)
ringBufferSizeInSamples += buffer.frameLength
// throw away old buffers if ring buffer gets too large
if let firstBuffer = ringBuffer.first {
if ringBufferSizeInSamples - firstBuffer.frameLength >= requiredSamples {
ringBuffer.remove(at: 0)
ringBufferSizeInSamples -= firstBuffer.frameLength
}
}
}
func stopRecording() {
engine.stop()
let url = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first!.appendingPathComponent("foo.m4a")
let settings: [String : Any] = [AVFormatIDKey: Int(kAudioFormatMPEG4AAC)]
// write ring buffer to file.
let file = try! AVAudioFile(forWriting: url, settings: settings)
for buffer in ringBuffer {
try! file.write(from: buffer)
}
}
override func viewDidLoad() {
super.viewDidLoad()
// example usage
startRecording()
DispatchQueue.main.asyncAfter(deadline: .now() + 4*60) {
print("stopping")
self.stopRecording()
}
}
}

Is there any way to make iOS device play sound signal in a buffer continuously in swift?

I am working with Swift's AVFoundation to launch ultrasonic sinewave and my approach is to play a .wav file. I wonder if there's approach to play the sound continuously instead of using a extra .wav file.
Here follows my code but I don't think the new code will be similar to this:
let myThread = Thread(target: self,
selector: #selector(ZouViewController.play()),
object: nil)
myThread.start()
[...]
func play(){
//rewrite soom
let fileName = Bundle.main.path(forResource: "19kHz", ofType: "wav")
let url = URL(fileURLWithPath: fileName!)
soundPlayer = try? AVAudioPlayer(contentsOf: url)
while true{
soundPlayer?.play()
}
}
The file 19kHz.wav is a sound file playing ultrasonic sinewave at frequency 19kHz, but its duration is not unavoidable. so there would be a sudden change at the begin of sound signal every loop when it is played again. So I want to abandon that approach and try to play the data continuously from a buffer. Is there any way to play a sound signal in a buffer?
I have addressed it by following code:
import Foundation
import AVFoundation
class PlaySineWave{
var audioEngine = AVAudioEngine()
var audioFormat : AVAudioFormat
let FL: AVAudioFrameCount = 44100
let freq : Float = 19000 //19kHz
var pcmBuffer : AVAudioPCMBuffer
init() {
self.audioFormat = AVAudioFormat(standardFormatWithSampleRate: 44100.0, channels: 1)
self.pcmBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat,
frameCapacity:AVAudioFrameCount(FL))
self.pcmBuffer.frameLength = AVAudioFrameCount(FL)
}
func play(){
let floatData = self.pcmBuffer.floatChannelData!.pointee
let step = 2 * Float.pi/Float(FL)
for i in 0 ..< Int(FL) {
floatData[i] = 0.3 * sinf(freq * Float(i) * step)
}
let playerNode = AVAudioPlayerNode()
self.audioEngine.attach(playerNode)
audioEngine.connect(playerNode, to: audioEngine.mainMixerNode,format: pcmBuffer.format)
do {
try audioEngine.start()
} catch let err as NSError {
print("Oh, no! \(err.code) \(err.domain)")
}
playerNode.play()
playerNode.scheduleBuffer(pcmBuffer, at:nil, options: [.loops]) { }
//audioEngine.stop()
}
}
After defined the class, In the ViewController it was called as
override func viewDidLoad() {
[...]
let myThread =
Thread(target:self,selector:#selector(SpectralViewController.play),
object:nil)
myThread.start()
}
[...]
func play(){
var Player = PlaySineWave()
Player.play()
}

iOS swift convert mp3 to aac

I'm converting an mp3 to m4a in Swift with code based on this.
It works when I generate a PCM file. When I change the export format to m4a it generates a file but it won't play. Why is it corrupt?
Here is the code so far:
import AVFoundation
import UIKit
class ViewController: UIViewController {
var rwAudioSerializationQueue:dispatch_queue_t!
var asset:AVAsset!
var assetReader:AVAssetReader!
var assetReaderAudioOutput:AVAssetReaderTrackOutput!
var assetWriter:AVAssetWriter!
var assetWriterAudioInput:AVAssetWriterInput!
var outputURL:NSURL!
override func viewDidLoad() {
super.viewDidLoad()
let rwAudioSerializationQueueDescription = String(self) + " rw audio serialization queue"
// Create the serialization queue to use for reading and writing the audio data.
self.rwAudioSerializationQueue = dispatch_queue_create(rwAudioSerializationQueueDescription, nil)
let paths = NSSearchPathForDirectoriesInDomains(.DocumentDirectory, .UserDomainMask, true)
let documentsPath = paths[0]
print(NSBundle.mainBundle().pathForResource("input", ofType: "mp3"))
self.asset = AVAsset(URL: NSURL(fileURLWithPath: NSBundle.mainBundle().pathForResource("input", ofType: "mp3")! ))
self.outputURL = NSURL(fileURLWithPath: documentsPath + "/output.m4a")
print(self.outputURL)
// [self.asset loadValuesAsynchronouslyForKeys:#[#"tracks"] completionHandler:^{
self.asset.loadValuesAsynchronouslyForKeys(["tracks"], completionHandler: {
print("loaded")
var success = true
var localError:NSError?
success = (self.asset.statusOfValueForKey("tracks", error: &localError) == AVKeyValueStatus.Loaded)
// Check for success of loading the assets tracks.
//success = ([self.asset statusOfValueForKey:#"tracks" error:&localError] == AVKeyValueStatusLoaded);
if (success)
{
// If the tracks loaded successfully, make sure that no file exists at the output path for the asset writer.
let fm = NSFileManager.defaultManager()
let localOutputPath = self.outputURL.path
if (fm.fileExistsAtPath(localOutputPath!)) {
do {
try fm.removeItemAtPath(localOutputPath!)
success = true
} catch {
}
}
}
if (success) {
success = self.setupAssetReaderAndAssetWriter()
}
if (success) {
success = self.startAssetReaderAndWriter()
}
})
}
func setupAssetReaderAndAssetWriter() -> Bool {
do {
try self.assetReader = AVAssetReader(asset: self.asset)
} catch {
}
do {
try self.assetWriter = AVAssetWriter(URL: self.outputURL, fileType: AVFileTypeCoreAudioFormat)
} catch {
}
var assetAudioTrack:AVAssetTrack? = nil
let audioTracks = self.asset.tracksWithMediaType(AVMediaTypeAudio)
if (audioTracks.count > 0) {
assetAudioTrack = audioTracks[0]
}
if (assetAudioTrack != nil)
{
let decompressionAudioSettings:[String : AnyObject] = [
AVFormatIDKey:Int(kAudioFormatLinearPCM)
]
self.assetReaderAudioOutput = AVAssetReaderTrackOutput(track: assetAudioTrack!, outputSettings: decompressionAudioSettings)
self.assetReader.addOutput(self.assetReaderAudioOutput)
var channelLayout = AudioChannelLayout()
memset(&channelLayout, 0, sizeof(AudioChannelLayout));
channelLayout.mChannelLayoutTag = kAudioChannelLayoutTag_Stereo;
/*let compressionAudioSettings:[String : AnyObject] = [
AVFormatIDKey:Int(kAudioFormatMPEG4AAC) ,
AVEncoderBitRateKey:128000,
AVSampleRateKey:44100 ,
// AVEncoderBitRatePerChannelKey:16,
// AVEncoderAudioQualityKey:AVAudioQuality.High.rawValue,
AVNumberOfChannelsKey:2,
AVChannelLayoutKey: NSData(bytes:&channelLayout, length:sizeof(AudioChannelLayout))
]
var outputSettings:[String : AnyObject] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVSampleRateKey: 44100,
AVNumberOfChannelsKey: 2,
AVChannelLayoutKey: NSData(bytes:&channelLayout, length:sizeof(AudioChannelLayout)),
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsNonInterleaved: false,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsBigEndianKey: false
]*/
let outputSettings:[String : AnyObject] = [
AVFormatIDKey: Int(kAudioFormatMPEG4AAC),
AVSampleRateKey: 44100,
AVNumberOfChannelsKey: 2,
AVChannelLayoutKey: NSData(bytes:&channelLayout, length:sizeof(AudioChannelLayout)) ]
self.assetWriterAudioInput = AVAssetWriterInput(mediaType: AVMediaTypeAudio, outputSettings: outputSettings)
self.assetWriter.addInput(self.assetWriterAudioInput)
}
return true
}
func startAssetReaderAndWriter() -> Bool {
self.assetWriter.startWriting()
self.assetReader.startReading()
self.assetWriter.startSessionAtSourceTime(kCMTimeZero)
self.assetWriterAudioInput.requestMediaDataWhenReadyOnQueue(self.rwAudioSerializationQueue, usingBlock: {
while (self.assetWriterAudioInput.readyForMoreMediaData ) {
var sampleBuffer = self.assetReaderAudioOutput.copyNextSampleBuffer()
if (sampleBuffer != nil) {
self.assetWriterAudioInput.appendSampleBuffer(sampleBuffer!)
sampleBuffer = nil
} else {
self.assetWriterAudioInput.markAsFinished()
self.assetReader.cancelReading()
print("done")
break
}
}
})
return true
}
}
Updated the source code in the question to Swift 4 and wrapped it in a class. Credit goes to Castles and Rythmic Fistman for original source code and answer. Left author's comments, added a few assertion's and print statements for debugging. Tested on iOS.
The bit rate for the output file is hardcoded at 96kb/s, you can easily override this value. Most of the audio files I'm converting are 320kb/s, so I'm using this class to compress the files for offline storage. Compression results at the bottom of this answer.
Usage:
let inputFilePath = URL(fileURLWithPath: "/path/to/file.mp3")
let outputFileURL = URL(fileURLWithPath: "/path/to/output/compressed.mp4")
if let audioConverter = AVAudioFileConverter(inputFileURL: inputFilePath, outputFileURL: outputFileURL) {
audioConverter.convert()
}
Class
import AVFoundation
final class AVAudioFileConverter {
var rwAudioSerializationQueue: DispatchQueue!
var asset:AVAsset!
var assetReader:AVAssetReader!
var assetReaderAudioOutput:AVAssetReaderTrackOutput!
var assetWriter:AVAssetWriter!
var assetWriterAudioInput:AVAssetWriterInput!
var outputURL:URL
var inputURL:URL
init?(inputFileURL: URL, outputFileURL: URL) {
inputURL = inputFileURL
outputURL = outputFileURL
if (FileManager.default.fileExists(atPath: inputURL.absoluteString)) {
print("Input file does not exist at file path \(inputURL.absoluteString)")
return nil
}
}
func convert() {
let rwAudioSerializationQueueDescription = " rw audio serialization queue"
// Create the serialization queue to use for reading and writing the audio data.
rwAudioSerializationQueue = DispatchQueue(label: rwAudioSerializationQueueDescription)
assert(rwAudioSerializationQueue != nil, "Failed to initialize Dispatch Queue")
asset = AVAsset(url: inputURL)
assert(asset != nil, "Error creating AVAsset from input URL")
print("Output file path -> ", outputURL.absoluteString)
asset.loadValuesAsynchronously(forKeys: ["tracks"], completionHandler: {
var success = true
var localError:NSError?
success = (self.asset.statusOfValue(forKey: "tracks", error: &localError) == AVKeyValueStatus.loaded)
// Check for success of loading the assets tracks.
if (success) {
// If the tracks loaded successfully, make sure that no file exists at the output path for the asset writer.
let fm = FileManager.default
let localOutputPath = self.outputURL.path
if (fm.fileExists(atPath: localOutputPath)) {
do {
try fm.removeItem(atPath: localOutputPath)
success = true
} catch {
print("Error trying to remove output file at path -> \(localOutputPath)")
}
}
}
if (success) {
success = self.setupAssetReaderAndAssetWriter()
} else {
print("Failed setting up Asset Reader and Writer")
}
if (success) {
success = self.startAssetReaderAndWriter()
return
} else {
print("Failed to start Asset Reader and Writer")
}
})
}
func setupAssetReaderAndAssetWriter() -> Bool {
do {
assetReader = try AVAssetReader(asset: asset)
} catch {
print("Error Creating AVAssetReader")
}
do {
assetWriter = try AVAssetWriter(outputURL: outputURL, fileType: AVFileType.m4a)
} catch {
print("Error Creating AVAssetWriter")
}
var assetAudioTrack:AVAssetTrack? = nil
let audioTracks = asset.tracks(withMediaType: AVMediaType.audio)
if (audioTracks.count > 0) {
assetAudioTrack = audioTracks[0]
}
if (assetAudioTrack != nil) {
let decompressionAudioSettings:[String : Any] = [
AVFormatIDKey:Int(kAudioFormatLinearPCM)
]
assetReaderAudioOutput = AVAssetReaderTrackOutput(track: assetAudioTrack!, outputSettings: decompressionAudioSettings)
assert(assetReaderAudioOutput != nil, "Failed to initialize AVAssetReaderTrackOutout")
assetReader.add(assetReaderAudioOutput)
var channelLayout = AudioChannelLayout()
memset(&channelLayout, 0, MemoryLayout<AudioChannelLayout>.size);
channelLayout.mChannelLayoutTag = kAudioChannelLayoutTag_Stereo;
let outputSettings:[String : Any] = [
AVFormatIDKey: Int(kAudioFormatMPEG4AAC),
AVSampleRateKey: 44100,
AVEncoderBitRateKey: 96000,
AVNumberOfChannelsKey: 2,
AVChannelLayoutKey: NSData(bytes:&channelLayout, length:MemoryLayout<AudioChannelLayout>.size)]
assetWriterAudioInput = AVAssetWriterInput(mediaType: AVMediaType.audio, outputSettings: outputSettings)
assert(rwAudioSerializationQueue != nil, "Failed to initialize AVAssetWriterInput")
assetWriter.add(assetWriterAudioInput)
}
print("Finsihed Setup of AVAssetReader and AVAssetWriter")
return true
}
func startAssetReaderAndWriter() -> Bool {
print("STARTING ASSET WRITER")
assetWriter.startWriting()
assetReader.startReading()
assetWriter.startSession(atSourceTime: kCMTimeZero)
assetWriterAudioInput.requestMediaDataWhenReady(on: rwAudioSerializationQueue, using: {
while(self.assetWriterAudioInput.isReadyForMoreMediaData ) {
var sampleBuffer = self.assetReaderAudioOutput.copyNextSampleBuffer()
if(sampleBuffer != nil) {
self.assetWriterAudioInput.append(sampleBuffer!)
sampleBuffer = nil
} else {
self.assetWriterAudioInput.markAsFinished()
self.assetReader.cancelReading()
self.assetWriter.finishWriting {
print("Asset Writer Finished Writing")
}
break
}
}
})
return true
}
}
Input File: 17.3 MB
// generated with afinfo on mac
File: D290A73C37B777F1.mp3
File type ID: MPG3
Num Tracks: 1
----
Data format: 2 ch, 44100 Hz, '.mp3' (0x00000000) 0 bits/channel, 0 bytes/packet, 1152 frames/packet, 0 bytes/frame
no channel layout.
estimated duration: 424.542025 sec
audio bytes: 16981681
audio packets: 16252
bit rate: 320000 bits per second
packet size upper bound: 1052
maximum packet size: 1045
audio data file offset: 322431
optimized
audio 18720450 valid frames + 576 priming + 1278 remainder = 18722304
----
Output File: 5.1 MB
// generated with afinfo on Mac
File: compressed.m4a
File type ID: m4af
Num Tracks: 1
----
Data format: 2 ch, 44100 Hz, 'aac ' (0x00000000) 0 bits/channel, 0 bytes/packet, 1024 frames/packet, 0 bytes/frame
Channel layout: Stereo (L R)
estimated duration: 424.542041 sec
audio bytes: 5019294
audio packets: 18286
bit rate: 94569 bits per second
packet size upper bound: 763
maximum packet size: 763
audio data file offset: 44
not optimized
audio 18722304 valid frames + 2112 priming + 448 remainder = 18724864
format list:
[ 0] format: 2 ch, 44100 Hz, 'aac ' (0x00000000) 0 bits/channel, 0 bytes/packet, 1024 frames/packet, 0 bytes/frame
Channel layout: Stereo (L R)
----
update
You're creating a caf file instead of an m4a.
Replace AVFileTypeCoreAudioFormat with AVFileTypeAppleM4A in
AVAssetWriter(URL: self.outputURL, fileType: AVFileTypeCoreAudioFormat)
Call self.assetWriter.finishWritingWithCompletionHandler() when you've finished.

iOS Determine Number of Frames in Video

If I have a MPMoviePlayerController in Swift:
MPMoviePlayerController mp = MPMoviePlayerController(contentURL: url)
Is there a way I can get the number of frames within the video located at url? If not, is there some other way to determine the frame count?
I don't think MPMoviePlayerController can help you.
Use an AVAssetReader and count the number of CMSampleBuffers it returns to you. You can configure it to not even decode the frames, effectively parsing the file, so it should be fast and memory efficient.
Something like
var asset = AVURLAsset(URL: url, options: nil)
var reader = AVAssetReader(asset: asset, error: nil)
var videoTrack = asset.tracksWithMediaType(AVMediaTypeVideo)[0] as! AVAssetTrack
var readerOutput = AVAssetReaderTrackOutput(track: videoTrack, outputSettings: nil) // NB: nil, should give you raw frames
reader.addOutput(readerOutput)
reader.startReading()
var nFrames = 0
while true {
var sampleBuffer = readerOutput.copyNextSampleBuffer()
if sampleBuffer == nil {
break
}
nFrames++
}
println("Num frames: \(nFrames)")
Sorry if that's not idiomatic, I don't know swift.
Swift 5
func getNumberOfFrames(url: URL) -> Int {
let asset = AVURLAsset(url: url, options: nil)
do {
let reader = try AVAssetReader(asset: asset)
//AVAssetReader(asset: asset, error: nil)
let videoTrack = asset.tracks(withMediaType: AVMediaType.video)[0]
let readerOutput = AVAssetReaderTrackOutput(track: videoTrack, outputSettings: nil) // NB: nil, should give you raw frames
reader.add(readerOutput)
reader.startReading()
var nFrames = 0
while true {
let sampleBuffer = readerOutput.copyNextSampleBuffer()
if sampleBuffer == nil {
break
}
nFrames = nFrames+1
}
print("Num frames: \(nFrames)")
return nFrames
}catch {
print("Error: \(error)")
}
return 0
}
You could also use frames per second to calculate total frames.
var player: AVPlayer?
var playerController = AVPlayerViewController()
var videoFPS: Int = 0
var totalFrames: Int?
guard let videoURL = "" else { return }
player = AVPlayer(url: videoURL)
playerController.player = player
guard player?.currentItem?.asset != nil else {
return
}
let asset = self.player?.currentItem?.asset
let tracks = asset!.tracks(withMediaType: .video)
let fps = tracks.first?.nominalFrameRate
let duration = self.player?.currentItem?.duration
self.videoFPS = lround(Double(fps!))
self.totalFrames = lround(Double(self!.videoFPS) * durationSeconds)

Resources