Real-time AVAssetWriter synchronise audio and video when pausing/resuming - ios

I am trying to record a video with sound using iPhone's front camera. As I need to also support pause/resume functionality, I need to use AVAssetWriter. I've found an example online, written in Objective-C, which almost achieves the desired functionality (http://www.gdcl.co.uk/2013/02/20/iPhone-Pause.html)
Unfortunately, after converting this example to Swift, I notice that if I pause/resume, at the end of each "section" there is a small but noticeable period during which the video is just a still frame and the audio is playing. So, it seems that when isPaused is triggered, the recorded audio track is longer than the recorded video track.
Sorry if it may seem like a noob question, but I am not a great expert in AVFoundation and some help would be appreciated!
Below I post my implementation of didOutput sampleBuffer.
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
var isVideo = true
if videoConntection != connection {
isVideo = false
}
if (!isCapturing || isPaused) {
return
}
if (encoder == nil) {
if isVideo {
return
}
if let fmt = CMSampleBufferGetFormatDescription(sampleBuffer) {
let desc = CMAudioFormatDescriptionGetStreamBasicDescription(fmt as CMAudioFormatDescription)
if let chan = desc?.pointee.mChannelsPerFrame, let rate = desc?.pointee.mSampleRate {
let path = tempPath()!
encoder = VideoEncoder(path: path, height: Int(cameraSize.height), width: Int(cameraSize.width), channels: chan, rate: rate)
}
}
}
if discont {
if isVideo {
return
}
discont = false
var pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
let last = lastAudio
if last.flags.contains(CMTimeFlags.valid) {
if cmOffset.flags.contains(CMTimeFlags.valid) {
pts = CMTimeSubtract(pts, cmOffset)
}
let off = CMTimeSubtract(pts, last)
print("setting offset from \(isVideo ? "video":"audio")")
print("adding \(CMTimeGetSeconds(off)) to \(CMTimeGetSeconds(cmOffset)) (pts \(CMTimeGetSeconds(cmOffset)))")
if cmOffset.value == 0 {
cmOffset = off
}
else {
cmOffset = CMTimeAdd(cmOffset, off)
}
}
lastVideo.flags = []
lastAudio.flags = []
return
}
var out:CMSampleBuffer?
if cmOffset.value > 0 {
var count:CMItemCount = CMSampleBufferGetNumSamples(sampleBuffer)
let pInfo = UnsafeMutablePointer<CMSampleTimingInfo>.allocate(capacity: count)
CMSampleBufferGetSampleTimingInfoArray(sampleBuffer, entryCount: count, arrayToFill: pInfo, entriesNeededOut: &count)
var i = 0
while i<count {
pInfo[i].decodeTimeStamp = CMTimeSubtract(pInfo[i].decodeTimeStamp, cmOffset)
pInfo[i].presentationTimeStamp = CMTimeSubtract(pInfo[i].presentationTimeStamp, cmOffset)
i+=1
}
CMSampleBufferCreateCopyWithNewTiming(allocator: nil, sampleBuffer: sampleBuffer, sampleTimingEntryCount: count, sampleTimingArray: pInfo, sampleBufferOut: &out)
}
else {
out = sampleBuffer
}
var pts = CMSampleBufferGetPresentationTimeStamp(out!)
let dur = CMSampleBufferGetDuration(out!)
if (dur.value > 0)
{
pts = CMTimeAdd(pts, dur);
}
if (isVideo) {
lastVideo = pts;
}
else {
lastAudio = pts;
}
encoder?.encodeFrame(sampleBuffer: out!, isVideo: isVideo)
}
And this is my VideoEncoder class:
final class VideoEncoder {
var writer:AVAssetWriter
var videoInput:AVAssetWriterInput
var audioInput:AVAssetWriterInput
var path:String
init(path:String, height:Int, width:Int, channels:UInt32, rate:Float64) {
self.path = path
if FileManager.default.fileExists(atPath:path) {
try? FileManager.default.removeItem(atPath: path)
}
let url = URL(fileURLWithPath: path)
writer = try! AVAssetWriter(outputURL: url, fileType: .mp4)
videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey:height,
AVVideoHeightKey:width
])
videoInput.expectsMediaDataInRealTime = true
writer.add(videoInput)
audioInput = AVAssetWriterInput(mediaType: .audio, outputSettings: [
AVFormatIDKey:kAudioFormatMPEG4AAC,
AVNumberOfChannelsKey:channels,
AVSampleRateKey:rate
])
audioInput.expectsMediaDataInRealTime = true
writer.add(audioInput)
}
func finish(with completionHandler:#escaping ()->Void) {
writer.finishWriting(completionHandler: completionHandler)
}
func encodeFrame(sampleBuffer:CMSampleBuffer, isVideo:Bool) -> Bool {
if CMSampleBufferDataIsReady(sampleBuffer) {
if writer.status == .unknown {
writer.startWriting()
writer.startSession(atSourceTime: CMSampleBufferGetPresentationTimeStamp(sampleBuffer))
}
if writer.status == .failed {
QFLogger.shared.addLog(format: "[ERROR initiating AVAssetWriter]", args: [], error: writer.error)
return false
}
if isVideo {
if videoInput.isReadyForMoreMediaData {
videoInput.append(sampleBuffer)
return true
}
}
else {
if audioInput.isReadyForMoreMediaData {
audioInput.append(sampleBuffer)
return true
}
}
}
return false
}
}
The rest of the code should be pretty obvious, but just to make it complete, here is what I have for pausing:
isPaused = true
discont = true
And here is resume:
isPaused = false
If anyone could help me to understand how to align video and audio tracks during such live recording that would be great!

Ok, turns out there was no mistake in the code which I provided. The issue which I experienced was caused by a video smoothing which was turned ON :) I guess it needs extra frames to smooth the video, which is why the video output freezes at the end for a short period of time.

Related

AVAssetWriter async video and audio after calling broadcastPaused()

I am trying to record a video with sound from device screen using ReplayKit and RPBroadcastSampleHandler.
When i record it just using "start broadcast" and "stop" the result i get is great.
But if i try to pause recording (by using red status bar) i got problems. The result i got is video and audio with different length (audio is shorter but have all i need). On the recording i got video and audio that start being async after moment of tapping status bar(ios14). Audio goes good, but video freezing when status bar tapped and continue when modal window closed. As result i got video without audio in the end.
Here is my code:
1.All class fields i have:
class SampleHandler: RPBroadcastSampleHandler {
private let videoService = VideoService()
private let audioService = AudioService()
private var isRecording = false
private let lock = NSLock()
private var finishCalled = false
private var videoWriter: AVAssetWriter!
private var videoWriterInput: AVAssetWriterInput!
private var microphoneWriterInput: AVAssetWriterInput!
private var sessionBeginAtSourceTime: CMTime!
2.Some configure on start capturing:
override func broadcastStarted(withSetupInfo setupInfo: [String : NSObject]?) {
guard !isRecording else { return }
isRecording = true
BroadcastData.clear()
BroadcastData.startVideoDate = Date()
BroadcastData.status = .writing
sessionBeginAtSourceTime = nil
configurateVideoWriter()
}
private func configurateVideoWriter() {
let outputFileLocation = videoService.getVideoFileLocation()
videoWriter = try? AVAssetWriter.init(outputURL: outputFileLocation,
fileType: AVFileType.mov)
configurateVideoWriterInput()
configurateMicrophoneWriterInput()
if videoWriter.canAdd(videoWriterInput) { videoWriter.add(videoWriterInput) }
if videoWriter.canAdd(microphoneWriterInput) { videoWriter.add(microphoneWriterInput) }
videoWriter.startWriting()
}
private func configurateVideoWriterInput() {
let RESOLUTION_COEF: CGFloat = 16
let naturalWidth = UIScreen.main.bounds.width
let naturalHeight = UIScreen.main.bounds.height
let width = naturalWidth - naturalWidth.truncatingRemainder(dividingBy: RESOLUTION_COEF)
let height = naturalHeight - naturalHeight.truncatingRemainder(dividingBy: RESOLUTION_COEF)
let videoSettings: [String: Any] = [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: width,
AVVideoHeightKey: height
]
videoWriterInput = AVAssetWriterInput(mediaType: .video,
outputSettings: videoSettings)
videoWriterInput.expectsMediaDataInRealTime = true
}
private func configurateMicrophoneWriterInput() {
let audioOutputSettings: [String : Any] = [
AVFormatIDKey: kAudioFormatMPEG4AAC,
AVNumberOfChannelsKey : 1,
AVSampleRateKey : 44100.0,
AVEncoderBitRateKey: 96000
]
microphoneWriterInput = AVAssetWriterInput(mediaType: .audio,
outputSettings: audioOutputSettings)
microphoneWriterInput.expectsMediaDataInRealTime = true
}
3.Write process:
override func processSampleBuffer(_ sampleBuffer: CMSampleBuffer, with sampleBufferType:
RPSampleBufferType) {
guard isRecording && videoWriter?.status == .writing else { return }
if BroadcastData.status != .writing {
isRecording = false
finishBroadCast()
return
}
if sessionBeginAtSourceTime == nil {
sessionBeginAtSourceTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
videoWriter.startSession(atSourceTime: sessionBeginAtSourceTime!)
}
switch sampleBufferType {
case .video:
if videoWriterInput.isReadyForMoreMediaData {
videoWriterInput.append(sampleBuffer)
}
case .audioMic:
if microphoneWriterInput.isReadyForMoreMediaData {
microphoneWriterInput.append(sampleBuffer)
}
case .audioApp:
break
#unknown default:
print("unknown")
}
}
4.Pause and resume
override func broadcastPaused() {
super.broadcastPaused()
}
override func broadcastResumed() {
super.broadcastResumed()
}
Pausing and resuming the recording creates a gap in the video presentation timestamps and a discontinuity in the audio, which I believe explains your symptoms.
What you need to do is measure how long the recording was paused for, possibly using the sample buffer timestamps, and then subtract that offset from the presentation timestamps of all the subsequent CMSampleBuffer's that you process. CMSampleBufferCreateCopyWithNewTiming() can help you with this.

AudioKit output changes to ear speakers

I implemented the AudioKit "MICROPHONE ANALYSIS" example https://audiokit.io/examples/MicrophoneAnalysis/ in my App.
I want to analyze the microphone input frequency and then play the correct note which is near the frequency which was determined.
Normally the sound output is the speaker or a Bluetooth device connected to my iPhone but after implementing the "MICROPHONE ANALYSIS" example the sound output changed to the tiny little speaker on the top of the iPhone which is normally used when you get a call.
How can I switch to the "normal" speaker or to the connected Bluetooth device like before?
var mic: AKMicrophone!
var tracker: AKFrequencyTracker!
var silence: AKBooster!
func initFrequencyTracker() {
AKSettings.audioInputEnabled = true
mic = AKMicrophone()
tracker = AKFrequencyTracker(mic)
silence = AKBooster(tracker, gain: 0)
}
func deinitFrequencyTracker() {
plotTimer.invalidate()
do {
try AudioKit.stop()
AudioKit.output = nil
} catch {
print(error)
}
}
func initPlotTimer() {
AudioKit.output = silence
do {
try AudioKit.start()
} catch {
AKLog("AudioKit did not start!")
}
setupPlot()
plotTimer = Timer.scheduledTimer(timeInterval: 0.1, target: self, selector: #selector(updatePlotUI), userInfo: nil, repeats: true)
}
func setupPlot() {
let plot = AKNodeOutputPlot(mic, frame: audioInputPlot.bounds)
plot.translatesAutoresizingMaskIntoConstraints = false
plot.alpha = 0.3
plot.plotType = .rolling
plot.shouldFill = true
plot.shouldCenterYAxis = false
plot.shouldMirror = true
plot.color = UIColor(named: uiFarbe)
audioInputPlot.addSubview(plot)
// Pin the AKNodeOutputPlot to the audioInputPlot
var constraints = [plot.leadingAnchor.constraint(equalTo: audioInputPlot.leadingAnchor)]
constraints.append(plot.trailingAnchor.constraint(equalTo: audioInputPlot.trailingAnchor))
constraints.append(plot.topAnchor.constraint(equalTo: audioInputPlot.topAnchor))
constraints.append(plot.bottomAnchor.constraint(equalTo: audioInputPlot.bottomAnchor))
constraints.forEach { $0.isActive = true }
}
#objc func updatePlotUI() {
if tracker.amplitude > 0.1 {
let trackerFrequency = Float(tracker.frequency)
guard trackerFrequency < 7_000 else {
// This is a bit of hack because of modern Macbooks giving super high frequencies
return
}
var frequency = trackerFrequency
while frequency > Float(noteFrequencies[noteFrequencies.count - 1]) {
frequency /= 2.0
}
while frequency < Float(noteFrequencies[0]) {
frequency *= 2.0
}
var minDistance: Float = 10_000.0
var index = 0
for i in 0..<noteFrequencies.count {
let distance = fabsf(Float(noteFrequencies[i]) - frequency)
if distance < minDistance {
index = i
minDistance = distance
}
}
// let octave = Int(log2f(trackerFrequency / frequency))
frequencyLabel.text = String(format: "%0.1f", tracker.frequency)
if frequencyTranspose(note: notesToTanspose[index]) != droneLabel.text {
note = frequencyTranspose(note: notesToTanspose[index])
droneLabel.text = note
DispatchQueue.main.asyncAfter(deadline: .now() + 0.03, execute: {
self.prepareSinglePlayerFirstForStart(note: self.note)
self.startSinglePlayer()
})
}
}
}
func frequencyTranspose(note: String) -> String {
var indexNote = notesToTanspose.firstIndex(of: note)!
let chosenInstrument = UserDefaults.standard.object(forKey: "whichInstrument") as! String
if chosenInstrument == "Bb" {
if indexNote + 2 >= notesToTanspose.count {
indexNote -= 12
}
return notesToTanspose[indexNote + 2]
} else if chosenInstrument == "Eb" {
if indexNote - 3 < 0 {
indexNote += 12
}
return notesToTanspose[indexNote - 3]
} else {
return note
}
}
It's a good practice to control the session settings, so start by creating a method in your application to take care of that during initialisation.
Following up, there's an example where I set a category and the desired options:
func start() {
do {
let session = AVAudioSession.sharedInstance()
try session.setCategory(.playAndRecord, options: .defaultToSpeaker)
try session.setActive(true, options: .notifyOthersOnDeactivation)
try session.overrideOutputAudioPort(AVAudioSession.PortOverride.speaker)
try AudioKit.start()
} catch {
// your error handler
}
}
You can call the method start where you make the call to AudioKit.Start() in initPlotTimer.
The example above is using the AVAudioSession, which I believe is what AKSettings wraps (please feel free to edit my answer to not mislead future readers, as I'm not looking at the AudioKit source-code at the moment).
Now that AVAudioSession is exposed, let's stick with the method offered by AudioKit since that's what you're dealing with.
Here's another example using AKSettings:
func start() {
do {
AKSettings.channelCount = 2
AKSettings.ioBufferDuration = 0.002
AKSettings.audioInputEnabled = true
AKSettings.bufferLength = .medium
AKSettings.defaultToSpeaker = true
// check docs for other options and settings
try AKSettings.setSession(category: .playAndRecord, with: [.defaultToSpeaker, .allowBluetooth])
try AudioKit.start()
} catch {
// your handler
}
}
Have in mind that you don't necessarily have to call it start, or run AudioKit's start method, I'm just exposing the initialisation phase, to make it readable to you and other use-cases.
Reference:
https://developer.apple.com/documentation/avfoundation/avaudiosession/categoryoptions
https://audiokit.io/docs/Classes/AKSettings.html

Swift AVFoundation timing info for audio measurements

I am creating an application that will take an audio measurement by playing some stimulus data and recording the microphone input, and then analysing the data.
I am having trouble accounting for the time taken to initialise and start the audio engine, as this varies each time and is also dependant on the hardware used, etc.
So, I have an audio engine and have installed a Tap the hardware input, with input 1 being the microphone recording, and input 2 being a reference input (also from the hardware). The output is physically Y-Split and fed back into input 2.
The app initialises the engine, plays the stimulus audio plus 1 second of silence (to allow propagation time for the microphone to record the whole signal back), and then stop and close the engine.
I write the two input buffers as a WAV file so that I can import this into an an existing DAW. to visually examine the signals. I can see that each time I take a measurement, the time difference between the two signals is different (despite the fact the microphone is not moved and the hardware has stayed the same). I am assuming this is to do with the latency of the hardware, the time taken to initialise the engine and the way the divice distributes tasks.
I have tried to capture the absolute time using mach_absolute_time of the first buffer callback on each installTap function and subtracting the two, and I can see that this does vary quite a lot with each call:
class newAVAudioEngine{
var engine = AVAudioEngine()
var audioBuffer = AVAudioPCMBuffer()
var running = true
var in1Buf:[Float]=Array(repeating:0, count:totalRecordSize)
var in2Buf:[Float]=Array(repeating:0, count:totalRecordSize)
var buf1current:Int = 0
var buf2current:Int = 0
var in1firstRun:Bool = false
var in2firstRun:Bool = false
var in1StartTime = 0
var in2startTime = 0
func measure(inputSweep:SweepFilter) -> measurement {
initializeEngine(inputSweep: inputSweep)
while running == true {
}
let measureResult = measurement.init(meas: meas,ref: ref)
return measureResult
}
func initializeEngine(inputSweep:SweepFilter) {
buf1current = 0
buf2current = 0
in1StartTime = 0
in2startTime = 0
in1firstRun = true
in2firstRun = true
in1Buf = Array(repeating:0, count:totalRecordSize)
in2Buf = Array(repeating:0, count:totalRecordSize)
engine.stop()
engine.reset()
engine = AVAudioEngine()
let srcNode = AVAudioSourceNode { _, _, frameCount, AudioBufferList -> OSStatus in
let ablPointer = UnsafeMutableAudioBufferListPointer(AudioBufferList)
if (Int(frameCount) + time) <= inputSweep.stimulus.count {
for frame in 0..<Int(frameCount) {
let value = inputSweep.stimulus[frame + time]
for buffer in ablPointer {
let buf: UnsafeMutableBufferPointer<Float> = UnsafeMutableBufferPointer(buffer)
buf[frame] = value
}
}
time += Int(frameCount)
return noErr
} else {
for frame in 0..<Int(frameCount) {
let value = 0
for buffer in ablPointer {
let buf: UnsafeMutableBufferPointer<Float> = UnsafeMutableBufferPointer(buffer)
buf[frame] = Float(value)
}
}
}
return noErr
}
let format = engine.outputNode.inputFormat(forBus: 0)
let stimulusFormat = AVAudioFormat(commonFormat: format.commonFormat,
sampleRate: Double(sampleRate),
channels: 1,
interleaved: format.isInterleaved)
do {
try AVAudioSession.sharedInstance().setCategory(.playAndRecord)
let ioBufferDuration = 128.0 / 44100.0
try AVAudioSession.sharedInstance().setPreferredIOBufferDuration(ioBufferDuration)
} catch {
assertionFailure("AVAudioSession setup failed")
}
let input = engine.inputNode
let inputFormat = input.inputFormat(forBus: 0)
print("InputNode Format is \(inputFormat)")
engine.attach(srcNode)
engine.connect(srcNode, to: engine.mainMixerNode, format: stimulusFormat)
if internalRefLoop == true {
srcNode.installTap(onBus: 0, bufferSize: 1024, format: stimulusFormat, block: {(buffer: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in
if self.in2firstRun == true {
var info = mach_timebase_info()
mach_timebase_info(&info)
let currentTime = mach_absolute_time()
let nanos = currentTime * UInt64(info.numer) / UInt64(info.denom)
self.in2startTime = Int(nanos)
self.in2firstRun = false
}
do {
let floatData = buffer.floatChannelData?.pointee
for frame in 0..<buffer.frameLength{
if (self.buf2current + Int(frame)) < totalRecordSize{
self.in2Buf[self.buf2current + Int(frame)] = floatData![Int(frame)]
}
}
self.buf2current += Int(buffer.frameLength)
if (self.numberOfSamples + Int(buffer.frameLength)) <= totalRecordSize{
try self.stimulusFile.write(from: buffer)
self.numberOfSamples += Int(buffer.frameLength) } else {
self.engine.stop()
self.running = false
}
} catch {
print(NSString(string: "write failed"))
}
})
}
let micAudioConverter = AVAudioConverter(from: inputFormat, to: stimulusFormat!)
var micChannelMap:[NSNumber] = [0,-1]
micAudioConverter?.channelMap = micChannelMap
let refAudioConverter = AVAudioConverter(from: inputFormat, to: stimulusFormat!)
var refChannelMap:[NSNumber] = [1,-1]
refAudioConverter?.channelMap = refChannelMap
//Measurement Tap
engine.inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat, block: {(buffer2: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in
//print(NSString(string:"writing"))
if self.in1firstRun == true {
var info = mach_timebase_info()
mach_timebase_info(&info)
let currentTime = mach_absolute_time()
let nanos = currentTime * UInt64(info.numer) / UInt64(info.denom)
self.in1StartTime = Int(nanos)
self.in1firstRun = false
}
do {
let micConvertedBuffer = AVAudioPCMBuffer(pcmFormat: stimulusFormat!, frameCapacity: buffer2.frameCapacity)
let micInputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer2
}
var error: NSError? = nil
//let status = audioConverter.convert(to: convertedBuffer!, error: &error, withInputFrom: inputBlock)
let status = micAudioConverter?.convert(to: micConvertedBuffer!, error: &error, withInputFrom: micInputBlock)
//print(status)
let floatData = micConvertedBuffer?.floatChannelData?.pointee
for frame in 0..<micConvertedBuffer!.frameLength{
if (self.buf1current + Int(frame)) < totalRecordSize{
self.in1Buf[self.buf1current + Int(frame)] = floatData![Int(frame)]
}
if (self.buf1current + Int(frame)) >= totalRecordSize {
self.engine.stop()
self.running = false
}
}
self.buf1current += Int(micConvertedBuffer!.frameLength)
try self.measurementFile.write(from: micConvertedBuffer!)
} catch {
print(NSString(string: "write failed"))
}
if internalRefLoop == false {
if self.in2firstRun == true{
var info = mach_timebase_info()
mach_timebase_info(&info)
let currentTime = mach_absolute_time()
let nanos = currentTime * UInt64(info.numer) / UInt64(info.denom)
self.in2startTime = Int(nanos)
self.in2firstRun = false
}
do {
let refConvertedBuffer = AVAudioPCMBuffer(pcmFormat: stimulusFormat!, frameCapacity: buffer2.frameCapacity)
let refInputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer2
}
var error: NSError? = nil
let status = refAudioConverter?.convert(to: refConvertedBuffer!, error: &error, withInputFrom: refInputBlock)
//print(status)
let floatData = refConvertedBuffer?.floatChannelData?.pointee
for frame in 0..<refConvertedBuffer!.frameLength{
if (self.buf2current + Int(frame)) < totalRecordSize{
self.in2Buf[self.buf2current + Int(frame)] = floatData![Int(frame)]
}
}
if (self.numberOfSamples + Int(buffer2.frameLength)) <= totalRecordSize{
self.buf2current += Int(refConvertedBuffer!.frameLength)
try self.stimulusFile.write(from: refConvertedBuffer!) } else {
self.engine.stop()
self.running = false
}
} catch {
print(NSString(string: "write failed"))
}
}
}
)
assert(engine.inputNode != nil)
running = true
try! engine.start()
So The above method is my entire class. Currently each buffer call on installTap writes the input directly to a WAV file. This is where I can see the two end results differing each time. I have tried adding the startTime variable and subtracting the two, but the results still vary.
Do I need to take into account my output will have latency too that may vary with each call? If so, how do I add this time into the equation? What I am looking for is for the two inputs and outputs to all have relative time, so that I can compare them. The different hardware latency will not matter too much, as long as I can identify the end call times.
If you are doing real-time measurements, you might want to use AVAudioSinkNode instead of a Tap. The Sink Node is new and introduced along with AVAudioSourceNode you are using. With installing a Tap you won't be able to get precise timing.

AVPlayerItem.loadedTimeRanges notifications aren't working with custom AVAssetResourceLoaderDelegate

I use AVAssetResourceLoaderDelegate for video caching but the video plays after it will be fully downloaded.
I add KVO observer to loadedTimeRanges property:
[self.player.currentItem addObserver:self
forKeyPath:#"loadedTimeRanges"
options:NSKeyValueObservingOptionNew
context:nil];
Check ranges and start playing:
- (void)checkRanges {
if (self.player.currentItem.asset) {
CMTimeRange loadedTimeRange =
[[self.player.currentItem.loadedTimeRanges firstObject] CMTimeRangeValue];
Float64 videoDuration = CMTimeGetSeconds(self.player.currentItem.asset.duration);
Float64 buffered = CMTimeGetSeconds(loadedTimeRange.duration);
Float64 percent = buffered / videoDuration;
if (percent > 0.2) {
[self.player play];
}
}
}
But notifications aren't working, for some reason, I get a notification when the video is almost downloaded.
Video downloading is implemented by using AVAssetResourceLoaderDelegate that allow use cache within video playing.
The downloading is initialized in the method AVAssetResourceLoaderDelegate:
func resourceLoader(_ resourceLoader: AVAssetResourceLoader,
shouldWaitForLoadingOfRequestedResource loadingRequest: AVAssetResourceLoadingRequest) -> Bool {
if let resourceURL = loadingRequest.request.url, resourceURL.isVCSVideoScheme(),
let originalURL = resourceURL.vcsRemoteVideoURL() {
let assetLoader: VideoAssetLoader
if let loader = assetLoaderForRequest(loadingRequest) {
assetLoader = loader
} else {
assetLoader = VideoAssetLoader(url: originalURL)
assetLoader.delegate = self
assetLoaders[keyForAssetLoaderWithURL(resourceURL)] = assetLoader
}
assetLoader.addRequest(loadingRequest)
return true
}
return false
}
VideoAssetLoader receives a HEAD request with file information and then updates AVAssetResourceLoadingRequest:
private func fillInContentInformation(_ contentInformationRequest: AVAssetResourceLoadingContentInformationRequest?) {
guard let contentInformationRequest = contentInformationRequest,
let contentInformation = self.contentInformation else {
return
}
contentInformationRequest.isByteRangeAccessSupported = contentInformation.byteRangeAccessSupported
contentInformationRequest.contentType = contentInformation.contentType
contentInformationRequest.contentLength = Int64(contentInformation.contentLength)
}
Then VideoAssetLoader downloads requested range of bytes and then updates AVAssetResourceLoadingDataRequest:
private func processPendingRequests() {
for request in pendingRequests {
fillInContentInformation(request.contentInformationRequest)
let didRespondCompletely = respondWithDataForRequest(request.dataRequest)
if didRespondCompletely {
request.finishLoading()
}
}
pendingRequests = pendingRequests.filter({ !$0.isFinished })
}
private func respondWithDataForRequest(_ dataRequest: AVAssetResourceLoadingDataRequest?) -> Bool {
guard let dataRequest = dataRequest, let downloadTask = videoDownloadTask else {
return false
}
var startOffset: UInt64 = UInt64(dataRequest.requestedOffset)
if dataRequest.currentOffset != 0 {
startOffset = UInt64(dataRequest.currentOffset)
}
let numberOfBytesToRespondWith = UInt64(dataRequest.requestedLength)
var didRespondFully = false
if let data = downloadTask.readCachedDataWithOffset(startOffset, lenght: numberOfBytesToRespondWith) {
dataRequest.respond(with: data)
didRespondFully = data.count >= dataRequest.requestedLength
}
return didRespondFully
}
Unfortunately, the video isn't playing while it won't be completely downloaded (AVAssetResourceLoadingRequest.finishLoading()), also not getting notification loadedTimeRanges.
Does anyone have experience with this to point me toward an area where I may be doing something wrong?
Thanks!

Swift AVFoundation Reading and Analyzing a file in real time

I am having trouble reading a file from the operating system using AVFoundation and performing rendering and analysis in real time.
I have a pipe line of code that I know runs in real time does analysis on a video file. This pipe line of code works in realtime via the camera session. However this is not the case when I read the file like so.Can anyone let me know where I might be going wrong ?
protocol VideoStreamTestBenchDelegate {
func frameBuffer(buffer:CMSampleBuffer)
}
class VideoStreamTestBench {
let asset:AVAsset
let assetReader:AVAssetReader
let playAtActualSpeed:Bool
let loop:Bool
var videoEncodingIsFinished = false
var previousFrameTime = kCMTimeZero
var previousActualFrameTime = CFAbsoluteTimeGetCurrent()
var numberOfFramesCaptured = 0
var totalFrameTimeDuringCapture:Double = 0.0
var delegate:VideoStreamTestBenchDelegate?
public convenience init(url:URL, playAtActualSpeed:Bool = false, loop:Bool = false) throws {
let inputOptions = [AVURLAssetPreferPreciseDurationAndTimingKey:NSNumber(value:true)]
let inputAsset = AVURLAsset(url:url, options:inputOptions)
try self.init(asset:inputAsset, playAtActualSpeed:playAtActualSpeed, loop:loop)
}
public init(asset:AVAsset, playAtActualSpeed:Bool = false, loop:Bool = false) throws {
self.asset = asset
self.playAtActualSpeed = playAtActualSpeed
self.loop = loop
assetReader = try AVAssetReader(asset:self.asset)
let outputSettings:[String:AnyObject] = [(kCVPixelBufferPixelFormatTypeKey as String):NSNumber(value:Int32(kCVPixelFormatType_32BGRA))]
let readerVideoTrackOutput = AVAssetReaderTrackOutput(track:self.asset.tracks(withMediaType: AVMediaTypeVideo)[0], outputSettings:outputSettings)
readerVideoTrackOutput.alwaysCopiesSampleData = false
assetReader.add(readerVideoTrackOutput)
// TODO: Audio here
}
public func start() {
asset.loadValuesAsynchronously(forKeys:["tracks"], completionHandler:{
DispatchQueue.global(priority:DispatchQueue.GlobalQueuePriority.default).async(execute: {
guard (self.asset.statusOfValue(forKey: "tracks", error:nil) == .loaded) else { return }
guard self.assetReader.startReading() else {
print("Couldn't start reading")
return
}
var readerVideoTrackOutput:AVAssetReaderOutput? = nil;
for output in self.assetReader.outputs {
if(output.mediaType == AVMediaTypeVideo) {
readerVideoTrackOutput = output;
}
}
while (self.assetReader.status == .reading) {
self.readNextVideoFrame(from:readerVideoTrackOutput!)
}
if (self.assetReader.status == .completed) {
self.assetReader.cancelReading()
if (self.loop) {
// TODO: Restart movie processing
} else {
self.endProcessing()
}
}
})
})
}
public func cancel() {
assetReader.cancelReading()
self.endProcessing()
}
func endProcessing() {
}
func readNextVideoFrame(from videoTrackOutput:AVAssetReaderOutput) {
if ((assetReader.status == .reading) && !videoEncodingIsFinished) {
if let sampleBuffer = videoTrackOutput.copyNextSampleBuffer() {
if (playAtActualSpeed) {
// Do this outside of the video processing queue to not slow that down while waiting
let currentSampleTime = CMSampleBufferGetOutputPresentationTimeStamp(sampleBuffer)
let differenceFromLastFrame = CMTimeSubtract(currentSampleTime, previousFrameTime)
let currentActualTime = CFAbsoluteTimeGetCurrent()
let frameTimeDifference = CMTimeGetSeconds(differenceFromLastFrame)
let actualTimeDifference = currentActualTime - previousActualFrameTime
if (frameTimeDifference > actualTimeDifference) {
usleep(UInt32(round(1000000.0 * (frameTimeDifference - actualTimeDifference))))
}
previousFrameTime = currentSampleTime
previousActualFrameTime = CFAbsoluteTimeGetCurrent()
}
DispatchQueue.global().sync {
self.delegate?.frameBuffer(buffer: sampleBuffer)
CMSampleBufferInvalidate(sampleBuffer)
}
} else {
if (!loop) {
videoEncodingIsFinished = true
if (videoEncodingIsFinished) {
self.endProcessing()
}
}
}
}
}
}
// This is the delegate
public func bufferReader(_ reader: BufferReader!, didGetNextVideoSample bufferRef: CMSampleBuffer!) {
// let posePoints:[Any] = self.visageBackend.posePoints(with: bufferRef)
// var regions:[Any]? = nil
//
// if (posePoints.count > 0) {
// regions = (self.luaBackend?.regions(forPosePoints: posePoints))!
// }
//
// // extract
// if(regions != nil) {
// let rois:[Any] = (self.luaBackend?.extractedRegionInfos(for: bufferRef, andRegions: regions))!
// print(rois)
// }
//
// self.dLibRenderEngine.render(with: bufferRef, andPoints: posePoints, andRegions: regions)
self.backgroundRenderQueue.async { [weak self] in
if self?.previewLayer?.isReadyForMoreMediaData == true {
self?.previewLayer?.enqueue(bufferRef!)
}
}
}

Resources