Decompressed error: Error Domain=NSOSStatusErrorDomain Code=-12909 - ios

I am using H264 algorithm below is the link which I am referring to decompress video using VideoToolbox framework
https://stackoverflow.com/a/29525001/1679255
Not all a few videos are stuck in a specific position.
Below is the error logs which i am recieving during the decompressing the frame.
NALU Raw: 00, 00, 00, 01, 41, 9a, 00, 18
~~~~~~~ Received NALU Type "1: Coded slice of a non-IDR picture (VCL)" ~~~~~~~~
Decompressed error: Error Domain=NSOSStatusErrorDomain Code=-12909 "(null)"
Xocde version is 11.2.1
Development Target is 10.0
If you need more info feel free to ask me.
My VideoDecoder Class Code is below :
#interface VideoDecoder () {
AVSampleBufferDisplayLayer *mVideoLayer;
long videoTimestamp;
}
#property (nonatomic, assign) CMVideoFormatDescriptionRef formatDesc;
#property (nonatomic, assign) VTDecompressionSessionRef decompressionSession;
#property (nonatomic, assign) int spsSize;
#property (nonatomic, assign) int ppsSize;
#property (nonatomic, retain) NSMutableData* streamVideoData;
#end
#implementation VideoDecoder
NSString * const naluTypesStrings[] = {
#"0: Unspecified (non-VCL)",
#"1: Coded slice of a non-IDR picture (VCL)", // P frame
#"2: Coded slice data partition A (VCL)",
#"3: Coded slice data partition B (VCL)",
#"4: Coded slice data partition C (VCL)",
#"5: Coded slice of an IDR picture (VCL)", // I frame
#"6: Supplemental enhancement information (SEI) (non-VCL)",
#"7: Sequence parameter set (non-VCL)", // SPS parameter
#"8: Picture parameter set (non-VCL)", // PPS parameter
#"9: Access unit delimiter (non-VCL)",
#"10: End of sequence (non-VCL)",
#"11: End of stream (non-VCL)",
#"12: Filler data (non-VCL)",
#"13: Sequence parameter set extension (non-VCL)",
#"14: Prefix NAL unit (non-VCL)",
#"15: Subset sequence parameter set (non-VCL)",
#"16: Reserved (non-VCL)",
#"17: Reserved (non-VCL)",
#"18: Reserved (non-VCL)",
#"19: Coded slice of an auxiliary coded picture without partitioning (non-VCL)",
#"20: Coded slice extension (non-VCL)",
#"21: Coded slice extension for depth view components (non-VCL)",
#"22: Reserved (non-VCL)",
#"23: Reserved (non-VCL)",
#"24: STAP-A Single-time aggregation packet (non-VCL)",
#"25: STAP-B Single-time aggregation packet (non-VCL)",
#"26: MTAP16 Multi-time aggregation packet (non-VCL)",
#"27: MTAP24 Multi-time aggregation packet (non-VCL)",
#"28: FU-A Fragmentation unit (non-VCL)",
#"29: FU-B Fragmentation unit (non-VCL)",
#"30: Unspecified (non-VCL)",
#"31: Unspecified (non-VCL)",
};
- (instancetype)init {
self = [super init];
if (self) {
// _videoLayer = [[AVSampleBufferDisplayLayer alloc] init];
//// _videoLayer.frame = self.view.frame;
//// _videoLayer.bounds = self.view.bounds;
// _videoLayer.frame = CGRectMake(0, 0, 1280, 720);
// _videoLayer.bounds = CGRectMake(0, 0, 1280, 720);
// _videoLayer.videoGravity = AVLayerVideoGravityResizeAspect;
//
// // set Timebase, you may need this if you need to display frames at specific times
// // I didn't need it so I haven't verified that the timebase is working
// CMTimebaseRef controlTimebase;
// CMTimebaseCreateWithMasterClock(CFAllocatorGetDefault(), CMClockGetHostTimeClock(), &controlTimebase);
//
// //videoLayer.controlTimebase = controlTimebase;
// CMTimebaseSetTime(_videoLayer.controlTimebase, kCMTimeZero);
// CMTimebaseSetRate(_videoLayer.controlTimebase, 1.0);
self.streamVideoData = nil;
self.startDumpData = NO;
videoTimestamp = 0;
}
return self;
}
- (void)initDebugStreamingVideo
{
self.streamVideoData = [[NSMutableData alloc] init];
}
- (void)releaseVideoDecorder
{
if (_decompressionSession != NULL)
{
VTDecompressionSessionInvalidate(_decompressionSession);
_decompressionSession = NULL;
}
videoTimestamp = 0;
}
- (long)getVideoTimeStamp
{
return videoTimestamp;
}
- (void)setVideoTimestamp:(long)timestamp
{
videoTimestamp = timestamp;
#ifdef DEBUG
NSLog(#"(sync)video: %2.1f", (float)videoTimestamp/1000000.);
#endif
}
- (NSString*)getLiveStreamingMP4Path
{
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory,
NSUserDomainMask, YES);
NSString *rootOfCachepath = [paths objectAtIndex:0];
NSString* nalFilePath = [self getUniqueFilePath:rootOfCachepath FileNamePrefix:#"liveRecord" FileNameSubfix:#"MP4"];
return nalFilePath;
}
- (NSString*)closeAndSaveDebugStreamingVideo
{
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory,
NSUserDomainMask, YES);
NSString *rootOfCachepath = [paths objectAtIndex:0];
NSString* nalFilePath = [self getUniqueFilePath:rootOfCachepath FileNamePrefix:#"liveRecord" FileNameSubfix:#"264"];
if (self.streamVideoData.length > 0)
[self.streamVideoData writeToFile:nalFilePath atomically:YES];
self.streamVideoData = [[NSMutableData alloc] init];
return nalFilePath;
}
- (NSString*)getUniqueFilePath:(NSString*)parentFolder FileNamePrefix:(NSString*)fileName FileNameSubfix:(NSString*)subFix
{
NSString* fullFilePath = nil;
NSString* memoFileName = #"";
NSDate *currentDate = [NSDate date];
NSDateFormatter *currentDateFormat = [[NSDateFormatter alloc] init];
[currentDateFormat setDateFormat:#"yyyyMMddHHmmss"];
NSString *currentDateString = [currentDateFormat stringFromDate:currentDate];
memoFileName = [NSString stringWithFormat:#"%#_%#.%#", fileName, currentDateString, subFix];
fullFilePath = [parentFolder stringByAppendingPathComponent:memoFileName];
return fullFilePath;
}
//- (void)setVideoLayer:(AVSampleBufferDisplayLayer *)layer {
// mVideoLayer = layer;
// CMTimebaseRef controlTimebase;
// CMTimebaseCreateWithMasterClock(CFAllocatorGetDefault(), CMClockGetHostTimeClock(), &controlTimebase);
//
// mVideoLayer.controlTimebase = controlTimebase;
// CMTimebaseSetTime(mVideoLayer.controlTimebase, kCMTimeZero);
// CMTimebaseSetRate(mVideoLayer.controlTimebase, 1.0);
//}
- (BOOL)checkIfThisIsIDRFrame:(uint8_t *)frame withSize:(uint32_t)frameSize {
BOOL isIDRFrame = NO;
int startCodeIndex = 0;
int pi = 0;
if (frame[pi] == 0x00 && frame[pi+1] == 0x00 && frame[pi+2] == 0x00 && frame[pi+3] == 0x01 && frame[pi+4] == 0x09 && frame[pi+5] == 0x50)
startCodeIndex = 6;
// NSLog(#"NALU Raw: %02X, %02x, %02x, %02x, %02X, %02x, %02x, %02x", frame[0],frame[1],frame[2],frame[3],frame[4],frame[5],frame[6],frame[7]);
int nalu_type = (frame[startCodeIndex + 4] & 0x1F);
// NSLog(#"~~~~~~~ Received NALU Type \"%#\" ~~~~~~~~", naluTypesStrings[nalu_type]);
// if we havent already set up our format description with our SPS PPS parameters, we
// can't process any frames except type 7 that has our parameters
if (nalu_type != 7 && _formatDesc == NULL) {
NSLog(#"Video error: Frame is not an I Frame and format description is null");
return isIDRFrame;
}
// NALU type 7 is the SPS parameter NALU
if (nalu_type == 7) {
isIDRFrame = YES;
}
return isIDRFrame;
}
- (void)receivedRawVideoFrame:(uint8_t *)frame withSize:(uint32_t)frameSize {
OSStatus status = 0;
uint8_t *data = NULL;
uint8_t *pps = NULL;
uint8_t *sps = NULL;
#if defined(DEBUG) || defined(_RECORD_USE_LIVE_PACKAGE)
if (self.startDumpData && self.streamVideoData && frameSize <= 512*1024)
[self.streamVideoData appendBytes:(const void*)frame length:frameSize];
//if (self.streamVideoData && frameSize <= 512*1024)
// [self.streamVideoData appendBytes:(const void*)frame length:frameSize];
#endif
// I know how my H.264 data source's NALUs looks like so I know start code index is always 0.
// if you don't know where it starts, you can use a for loop similar to how I find the 2nd and 3rd start codes
int startCodeIndex = 0;
int secondStartCodeIndex = 0;
int thirdStartCodeIndex = 0;
//#ifdef DEBUG
int pi = 0;
if (frame[pi] == 0x00 && frame[pi+1] == 0x00 && frame[pi+2] == 0x00 && frame[pi+3] == 0x01 && frame[pi+4] == 0x09 && frame[pi+5] == 0x50)
startCodeIndex = 6;
//#endif
long blockLength = 0;
BOOL withSPSPPS = NO;
CMSampleBufferRef sampleBuffer = NULL;
CMBlockBufferRef blockBuffer = NULL;
NSLog(#"NALU Raw: %02X, %02x, %02x, %02x, %02X, %02x, %02x, %02x", frame[0],frame[1],frame[2],frame[3],frame[4],frame[5],frame[6],frame[7]);
int nalu_type = (frame[startCodeIndex + 4] & 0x1F);
NSLog(#"~~~~~~~ Received NALU Type \"%#\" ~~~~~~~~", naluTypesStrings[nalu_type]);
// if we havent already set up our format description with our SPS PPS parameters, we
// can't process any frames except type 7 that has our parameters
if (nalu_type != 7 && _formatDesc == NULL) {
NSLog(#"Video error: Frame is not an I Frame and format description is null");
return;
}
// NALU type 7 is the SPS parameter NALU
if (nalu_type == 7) {
// find where the second PPS start code begins, (the 0x00 00 00 01 code)
// from which we also get the length of the first SPS code
for (int i = startCodeIndex + 4; i < startCodeIndex + 40; i++) {
if (frame[i] == 0x00 && frame[i+1] == 0x00 && frame[i+2] == 0x00 && frame[i+3] == 0x01) {
secondStartCodeIndex = i;
//_spsSize = secondStartCodeIndex; // includes the header in the size
//#ifdef DEBUG
_spsSize = secondStartCodeIndex - startCodeIndex; // includes the header in the size
//#endif
break;
}
}
// find what the second NALU type is
nalu_type = (frame[secondStartCodeIndex + 4] & 0x1F);
// NSLog(#"~~~n7~~ Received NALU Type \"%#\" ~~~~~~~~", naluTypesStrings[nalu_type]);
}
// type 8 is the PPS parameter NALU
if(nalu_type == 8) {
// find where the NALU after this one starts so we know how long the PPS parameter is
//#ifdef DEBUG
for (int i = _spsSize + 4 + startCodeIndex; i < _spsSize + 30; i++) {
//#endif
//for (int i = _spsSize + 4; i < _spsSize + 30; i++) {
if (frame[i] == 0x00 && frame[i+1] == 0x00 && frame[i+2] == 0x00 && frame[i+3] == 0x01) {
thirdStartCodeIndex = i;
//_ppsSize = thirdStartCodeIndex - _spsSize;
//#ifdef DEBUG
_ppsSize = thirdStartCodeIndex - _spsSize - startCodeIndex;
//#endif
break;
}
}
// allocate enough data to fit the SPS and PPS parameters into our data objects.
// VTD doesn't want you to include the start code header (4 bytes long) so we add the - 4 here
sps = malloc(_spsSize - 4);
pps = malloc(_ppsSize - 4);
// copy in the actual sps and pps values, again ignoring the 4 byte header
//#ifdef DEBUG
memcpy (sps, &frame[4+startCodeIndex], _spsSize-4);
memcpy (pps, &frame[_spsSize+4+startCodeIndex], _ppsSize-4);
NSLog(#"SPS Raw: %02X, %02x, %02x, %02x, %02X, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x", sps[0],sps[1],sps[2],sps[3],sps[4],sps[5],sps[6],sps[7],sps[8],sps[9],sps[10],sps[11],sps[12],sps[13],sps[14],sps[15]);
NSLog(#"PPS Raw: %02X, %02x, %02x, %02x", pps[0],pps[1],pps[2],pps[3]);
//#endif
// memcpy (sps, &frame[4], _spsSize-4);
// memcpy (pps, &frame[_spsSize+4], _ppsSize-4);
// now we set our H264 parameters
uint8_t* parameterSetPointers[2] = {sps, pps};
size_t parameterSetSizes[2] = {_spsSize-4, _ppsSize-4};
status = CMVideoFormatDescriptionCreateFromH264ParameterSets(kCFAllocatorDefault, 2,
(const uint8_t *const*)parameterSetPointers,
parameterSetSizes, 4,
&_formatDesc);
// NSLog(#"\t\t Creation of CMVideoFormatDescription: %#", (status == noErr) ? #"successful!" : #"failed...");
if(status != noErr)
NSLog(#"\t\t Format Description ERROR type: %d", (int)status);
// See if decomp session can convert from previous format description
// to the new one, if not we need to remake the decomp session.
// This snippet was not necessary for my applications but it could be for yours
/*BOOL needNewDecompSession = (VTDecompressionSessionCanAcceptFormatDescription(_decompressionSession, _formatDesc) == NO);
if(needNewDecompSession)
{
[self createDecompSession];
}*/
// now lets handle the IDR frame that (should) come after the parameter sets
// I say "should" because that's how I expect my H264 stream to work, YMMV
nalu_type = (frame[thirdStartCodeIndex + 4] & 0x1F);
// NSLog(#"~~~n8~~ Received NALU Type \"%#\" ~~~~~~~~", naluTypesStrings[nalu_type]);
withSPSPPS = YES;
}
// create our VTDecompressionSession. This isnt neccessary if you choose to use AVSampleBufferDisplayLayer
//
if (videoTimestamp == 0 && _decompressionSession != NULL)
{
if (_decompressionSession != NULL)
{
VTDecompressionSessionInvalidate(_decompressionSession);
_decompressionSession = NULL;
}
}
if ((status == noErr) && (_decompressionSession == NULL)) {
[self createDecompSession];
}
// type 5 is an IDR frame NALU. The SPS and PPS NALUs should always be followed by an IDR (or IFrame) NALU, as far as I know
if(nalu_type == 5) {
// find the offset, or where the SPS and PPS NALUs end and the IDR frame NALU begins
//#ifdef DEBUG
int offset = _spsSize + _ppsSize + startCodeIndex;
NSLog(#"Start IDR at %d", offset);
//#endif
// int offset = _spsSize + _ppsSize;
blockLength = frameSize - offset;
// NSLog(#"Block Length : %ld", blockLength);
data = malloc(blockLength);
data = memcpy(data, &frame[offset], blockLength);
// replace the start code header on this NALU with its size.
// AVCC format requires that you do this.
// htonl converts the unsigned int from host to network byte order
uint32_t dataLength32 = htonl (blockLength - 4);
memcpy (data, &dataLength32, sizeof (uint32_t));
// create a block buffer from the IDR NALU
status = CMBlockBufferCreateWithMemoryBlock(NULL, data, // memoryBlock to hold buffered data
blockLength, // block length of the mem block in bytes.
kCFAllocatorNull, NULL,
0, // offsetToData
blockLength, // dataLength of relevant bytes, starting at offsetToData
0, &blockBuffer);
// NSLog(#"\t\t BlockBufferCreation: \t %#", (status == kCMBlockBufferNoErr) ? #"successful!" : #"failed...");
}
// NALU type 1 is non-IDR (or PFrame) picture
if (nalu_type == 1) {
// non-IDR frames do not have an offset due to SPS and PSS, so the approach
// is similar to the IDR frames just without the offset
//#ifdef DEBUG
if (withSPSPPS)
{
blockLength = frameSize-(_spsSize + _ppsSize + startCodeIndex);
data = malloc(blockLength);
data = memcpy(data, &frame[0+startCodeIndex+ _spsSize + _ppsSize], blockLength);
}
else
{
blockLength = frameSize-startCodeIndex;
data = malloc(blockLength);
data = memcpy(data, &frame[0+startCodeIndex], blockLength);
}
//#endif
// blockLength = frameSize;
// data = malloc(blockLength);
// data = memcpy(data, &frame[0], blockLength);
// again, replace the start header with the size of the NALU
uint32_t dataLength32 = htonl (blockLength - 4);
memcpy (data, &dataLength32, sizeof (uint32_t));
status = CMBlockBufferCreateWithMemoryBlock(NULL, data, // memoryBlock to hold data. If NULL, block will be alloc when needed
blockLength, // overall length of the mem block in bytes
kCFAllocatorNull, NULL,
0, // offsetToData
blockLength, // dataLength of relevant data bytes, starting at offsetToData
0, &blockBuffer);
// NSLog(#"\t\t BlockBufferCreation: \t %#", (status == kCMBlockBufferNoErr) ? #"successful!" : #"failed...");
}
// now create our sample buffer from the block buffer,
if(status == noErr) {
// here I'm not bothering with any timing specifics since in my case we displayed all frames immediately
const size_t sampleSize = blockLength;
status = CMSampleBufferCreate(kCFAllocatorDefault,
blockBuffer, true, NULL, NULL,
_formatDesc, 1, 0, NULL, 1,
&sampleSize, &sampleBuffer);
// NSLog(#"\t\t SampleBufferCreate: \t %#", (status == noErr) ? #"successful!" : #"failed...");
}
if(status == noErr) {
// set some values of the sample buffer's attachments
CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, YES);
CFMutableDictionaryRef dict = (CFMutableDictionaryRef)CFArrayGetValueAtIndex(attachments, 0);
CFDictionarySetValue(dict, kCMSampleAttachmentKey_DisplayImmediately, kCFBooleanTrue);
// either send the samplebuffer to a VTDecompressionSession or to an AVSampleBufferDisplayLayer
[self render:sampleBuffer];
}
// free memory to avoid a memory leak, do the same for sps, pps and blockbuffer
if (NULL != data) {
free (data);
data = NULL;
}
return;
}
- (void) createDecompSession {
// make sure to destroy the old VTD session
_decompressionSession = NULL;
VTDecompressionOutputCallbackRecord callBackRecord;
callBackRecord.decompressionOutputCallback = decompressionSessionDecodeFrameCallback;
// this is necessary if you need to make calls to Objective C "self" from within in the callback method.
callBackRecord.decompressionOutputRefCon = (__bridge void *)self;
// you can set some desired attributes for the destination pixel buffer. I didn't use this but you may
// if you need to set some attributes, be sure to uncomment the dictionary in VTDecompressionSessionCreate
/*NSDictionary *destinationImageBufferAttributes = [NSDictionary dictionaryWithObjectsAndKeys:
[NSNumber numberWithBool:YES],
(id)kCVPixelBufferOpenGLESCompatibilityKey,
nil];*/
OSStatus status = VTDecompressionSessionCreate(NULL, _formatDesc, NULL,
NULL, // (__bridge CFDictionaryRef)(destinationImageBufferAttributes)
&callBackRecord, &_decompressionSession);
NSLog(#"Video Decompression Session Create: \t %#", (status == noErr) ? #"successful!" : #"failed...");
if(status != noErr)
NSLog(#"\t\t VTD ERROR type: %d", (int)status);
}
void decompressionSessionDecodeFrameCallback(void *decompressionOutputRefCon,
void *sourceFrameRefCon,
OSStatus status,
VTDecodeInfoFlags infoFlags,
CVImageBufferRef imageBuffer,
CMTime presentationTimeStamp,
CMTime presentationDuration) {
if (status != noErr) {
NSError *error = [NSError errorWithDomain:NSOSStatusErrorDomain code:status userInfo:nil];
NSLog(#"Decompressed error: %#", error);
}
else {
//NSLog(#"Decompressed sucessfully: pts: %f", CMTimeGetSeconds(presentationTimeStamp));
// NSLog(#"Decompressed sucessfully");
CIImage *ciImage = [CIImage imageWithCVPixelBuffer:imageBuffer];
CIContext *temporaryContext = [CIContext contextWithOptions:nil];
CGImageRef videoImage = [temporaryContext
createCGImage:ciImage
fromRect:CGRectMake(0, 0,
CVPixelBufferGetWidth(imageBuffer),
CVPixelBufferGetHeight(imageBuffer))];
UIImage *image = [[UIImage alloc] initWithCGImage:videoImage];
CGImageRelease(videoImage);
VideoDecoder *decoder = (__bridge VideoDecoder *)decompressionOutputRefCon;
[decoder.delegate videoDecoderImage:image];
// [decoder renderImage:image];
}
}
- (void)renderImage:(UIImage *)img {
dispatch_async(dispatch_get_main_queue(), ^{
// [self->mVideoImageView setImage:img];
[self->_delegate videoDecoderImage:img];
});
}
- (void)render:(CMSampleBufferRef)sampleBuffer {
VTDecodeFrameFlags flags = kVTDecodeFrame_EnableAsynchronousDecompression;
VTDecodeInfoFlags flagOut;
NSDate* currentTime = [NSDate date];
OSStatus status = VTDecompressionSessionDecodeFrame(_decompressionSession, sampleBuffer, flags,
(void*)CFBridgingRetain(currentTime), &flagOut);
if (noErr != status)
NSLog(#"video decode error: %d", status);
CFRelease(sampleBuffer);
// if you're using AVSampleBufferDisplayLayer, you only need to use this line of code
// if (mVideoLayer) {
// [mVideoLayer enqueueSampleBuffer:sampleBuffer];
// }
}
#end
Thanks in advance

In the cases where you are receiving 12909 error, please make sure that you are successfully able to create CMBlockBufferCreateWithMemoryBlock using sps & pps values.
In some NALUs (containing IDR Frame), you will not get SPS + PPS + IDR sequence, rather you might receive SPS + PPS + SEI + IDR sequence.
Hence if you are following the reference link that you mentioned above, it might not work in case of SPS+PPS+SEI+IDR NALU. Hence due to unsuccessful/no creation of CMBlockBufferCreateWithMemoryBlock, the later non-IDR frame may be failing to be decompressed.
In case of SEI, as it is non-VCL, simply iterate over it to find the next start code, you don't necessarily need to do anything with SEI other than that for successful decompression.

Related

CMVideoFormatDescription extensions for MPEG4 streams

I've managed to decode and play H264 videos, however I'm having a difficult time with MPEG4 videos.
What CMVideoFormatDescription extensions does it need? I'm getting -8971 error (codecExtensionNotFoundErr) when trying to create a VTDecompressionSession.
This is how I create a VideoFormatDescription
OSStatus success = CMVideoFormatDescriptionCreate(kCFAllocatorDefault,
self.mediaCodec,
message.frameSize.width,
message.frameSize.height,
NULL,
&mediaDescriptor);
Instead of that NULL, I assume I need to specify a CFDictionaryRef, however I don't know what it should contain. Any idea?
After much pain and agony, I've finally managed to make it work.
I need to provide a CFDictionaryRef with at least a value for the kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms key. The value for this key also has to be a CFDictionaryRef. For H264 types this is created inside the CMVideoFormatDescriptionCreateFromH264ParameterSets and looks like this:
avcC = <014d401e ffe10016 674d401e 9a660a0f ff350101 01400000 fa000013 88010100 0468ee3c 80>
However for the MPEG4 type, you need to create this on your own. The end result should look like this:
esds = <00000000 038081e6 00000003 8081e611 00000000 00000000 058081e5 060102>
Now the way to create this is still fuzzy to me, however it somehow works. I was inspired by this link. This is the code:
- (CMFormatDescriptionRef)createFormatDescriptorFromMPEG4Message:(MessageContainer *)message {
CMVideoFormatDescriptionRef mediaDescriptor = NULL;
NSData *esdsData = [self newESDSFromData:message.frameData];
CFMutableDictionaryRef esdsDictionary = CFDictionaryCreateMutable(kCFAllocatorDefault, 1,
&kCFTypeDictionaryKeyCallBacks,
&kCFTypeDictionaryValueCallBacks);
CFDictionarySetValue(esdsDictionary, CFSTR("esds"), (__bridge const void *)(esdsData));
NSDictionary *dictionary = #{(__bridge NSString *)kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms : (__bridge NSDictionary *)esdsDictionary};
OSStatus status = CMVideoFormatDescriptionCreate(kCFAllocatorDefault,
self.mediaCodec,
message.frameSize.width,
message.frameSize.height,
(__bridge CFDictionaryRef)dictionary,
&mediaDescriptor);
if (status) {
NSLog(#"CMVideoFormatDesciprionCreate failed with %zd", status);
}
return mediaDescriptor;
}
- (NSData *)newESDSFromData:(NSData *)data {
NSInteger dataLength = data.length;
int full_size = 3 + 5 + 13 + 5 + dataLength + 3;
// ES_DescrTag data + DecoderConfigDescrTag + data + DecSpecificInfoTag + size + SLConfigDescriptor
int config_size = 13 + 5 + dataLength;
int padding = 12;
int8_t *esdsInfo = calloc(full_size + padding, sizeof(int8_t));
//Version
esdsInfo[0] = 0;
//Flags
esdsInfo[1] = 0;
esdsInfo[2] = 0;
esdsInfo[3] = 0;
//ES_DescrTag
esdsInfo[4] |= 0x03;
[self addMPEG4DescriptionLength:full_size
toPointer:esdsInfo + 5];
//esid
esdsInfo[8] = 0;
esdsInfo[9] = 0;
//Stream priority
esdsInfo[10] = 0;
//DecoderConfigDescrTag
esdsInfo[11] = 0x03;
[self addMPEG4DescriptionLength:config_size
toPointer:esdsInfo + 12];
//Stream Type
esdsInfo[15] = 0x11;
//Buffer Size
esdsInfo[16] = 0;
esdsInfo[17] = 0;
//Max bitrate
esdsInfo[18] = 0;
esdsInfo[19] = 0;
esdsInfo[20] = 0;
//Avg bitrate
esdsInfo[21] = 0;
esdsInfo[22] = 0;
esdsInfo[23] = 0;
//< DecSpecificInfoTag
esdsInfo[24] |= 0x05;
[self addMPEG4DescriptionLength:dataLength
toPointer:esdsInfo + 25];
//SLConfigDescrTag
esdsInfo[28] = 0x06;
//Length
esdsInfo[29] = 0x01;
esdsInfo[30] = 0x02;
NSData *esdsData = [NSData dataWithBytes:esdsInfo length:31 * sizeof(int8_t)];
free(esdsInfo);
return esdsData;
}
- (void)addMPEG4DescriptionLength:(NSInteger)length
toPointer:(int8_t *)ptr {
for (int i = 3; i >= 0; i--) {
uint8_t b = (length >> (i * 7)) & 0x7F;
if (i != 0) {
b |= 0x80;
}
ptr[3 - i] = b;
}
}
The message container is a simple wrapper around the data received from the server:
#interface MessageContainer : NSObject
#property (nonatomic) CGSize frameSize;
#property (nonatomic) NSData *frameData;
#end
Where frameSize is the size of the frame (received separately from the server) and frameData is the data itself.

IOS App to decode a frame and display using AVSampleBufferDisplayLayer fails

I am writing a IOS App which will decode a H.264 frame and render using AVSampleBufferDisplayLayer. I have already modified the frame to not have the NAL start code but have a 4 byte NAL size. This has been verified.
But all i see is a white frame in my IOS simulator, No error; is there a possibility to dump the decoded frame and verify? Any other debug points will really help.
#implementation DecodeClass
- (void) viewDidLoad {
}
/* method to decode and render a frame */
- (void)decodeFrame{
NSLog(#"Decode Start");
/* local variable declaration */
//OSStatus status;
size_t spsSize, ppsSize,dataLen;
//_frameSize = 320*240*1.5;
uint8_t sps[] = {0x67, 0x42, 0xC0, 0x0D, 0x96, 0x64, 0x0A, 0x0F, 0xDF, 0xF8, 0x00, 0x20, 0x00, 0x18, 0x80, 0x00,
0x00, 0x7D, 0x00, 0x00, 0x0B, 0xB5, 0x47, 0x8A, 0x15, 0x50};
uint8_t pps[] = {0x68, 0xCE, 0x32, 0xC8};
const uint8_t* props[] = {sps, pps};
spsSize = (sizeof(sps)/sizeof(uint8_t));
ppsSize = (sizeof(pps)/sizeof(uint8_t));
const size_t sizes[] = {spsSize,ppsSize};
FILE* pFile;
int result;
pFile = fopen("/Documents/input_mod1.264","r");
fseeko(pFile, 0, SEEK_END);
unsigned long fileSize = ftello(pFile);
fseek(pFile, 0, SEEK_SET);
_dataBuf = (uint8_t*)malloc(sizeof(uint8_t) * (fileSize));
memset(_dataBuf,0,sizeof(uint8_t) * (fileSize));
if (pFile ){
result = fread(_dataBuf,sizeof(uint8_t),fileSize,pFile);
fclose(pFile);
}
else
NSLog(#"Can't open file");
[self MUX_Modify_AVC_Start_Code:_dataBuf size:&fileSize Header:false];
dataLen = fileSize;
//construct h.264 parameter set
CMVideoFormatDescriptionRef formatDesc;
OSStatus formatCreateResult = CMVideoFormatDescriptionCreateFromH264ParameterSets(kCFAllocatorDefault, 2, props, sizes, 4, &formatDesc);
if (formatCreateResult)
{
NSLog(#"construct CMVideoFormatDescriptionCreateFromH264ParameterSets Failed :%ld",(long)formatCreateResult);
}
//construct cmBlockbuffer .
CMBlockBufferRef blockBufferOut = nil;
CMBlockBufferCreateEmpty (0,0,kCMBlockBufferAlwaysCopyDataFlag, &blockBufferOut);
CMBlockBufferAppendMemoryBlock(blockBufferOut,
_dataBuf,
dataLen,
NULL,
NULL,
0,
dataLen,
kCMBlockBufferAlwaysCopyDataFlag);
//construct cmsamplebuffer ok
size_t sampleSizeArray[1] = {0};
sampleSizeArray[0] = CMBlockBufferGetDataLength(blockBufferOut);
CMSampleTimingInfo tmInfos[1] = {
{CMTimeMake(5,1), CMTimeMake(5,1), CMTimeMake(5,1)}
};
CMSampleBufferRef sampBuf = nil;
formatCreateResult = CMSampleBufferCreate(kCFAllocatorDefault,
blockBufferOut,
YES,
NULL,
NULL,
formatDesc,
1,
1,
tmInfos,
1,
sampleSizeArray,
&sampBuf);
NSLog(#"Decode End :: Construct CMSampleBufferRef value of formatCreateResult is %d", formatCreateResult);
if(!_dspLayer)
{
_dspLayer = [[AVSampleBufferDisplayLayer alloc]init];
[_dspLayer setFrame:CGRectMake(0,0,320,240)];
_dspLayer.bounds = CGRectMake(0, 0, 300, 300);
_dspLayer.videoGravity = AVLayerVideoGravityResizeAspect;
_dspLayer.position = CGPointMake(500, 500);
_dspLayer.backgroundColor = [UIColor blueColor].CGColor;
CMTimebaseRef tmBase = nil;
CMTimebaseCreateWithMasterClock(NULL,CMClockGetHostTimeClock(),&tmBase);
_dspLayer.controlTimebase = tmBase;
CMTimebaseSetTime(_dspLayer.controlTimebase, kCMTimeZero);
CMTimebaseSetRate(_dspLayer.controlTimebase, 1.0);
[self.layerView.layer addSublayer:_dspLayer];
}
//put to AVSampleBufferdisplayLayer,just one frame.
if([self.dspLayer isReadyForMoreMediaData])
{
[self.dspLayer enqueueSampleBuffer:sampBuf];
}
[self.dspLayer setNeedsDisplay];
}
-(void)MUX_Modify_AVC_Start_Code:(uint8_t*)pData size:(uint32_t *)nSize Header:(bool)bHeader{
....
}
-(uint32_t)MUX_FindNextPattern:(uint8_t*)streamBuf buffSize:(uint32_t)bufSize startCode:(uint32_t)startcode{
....
}
- (void)dealloc{
//free(_dataBuf);
}
#end
int main(int argc, char * argv[]) {
//[decodeClass release];
#autoreleasepool {
DecodeClass *decodeClass = [[DecodeClass alloc]init];
[decodeClass decodeFrame];
decodeClass = nil;
return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class]));
}
}

How to play and read .caf PCM audio file

I have an app that selects a song from the iPod Library then copies that song into the app's directory as a '.caf' file. I now need to play and at the same time read that file into Apples FFT from the Accelerate framework so I can visualize the data like a spectrogram. Here is the code for the FFT:
void FFTAccelerate::doFFTReal(float samples[], float amp[], int numSamples)
{
int i;
vDSP_Length log2n = log2f(numSamples);
//Convert float array of reals samples to COMPLEX_SPLIT array A
vDSP_ctoz((COMPLEX*)samples,2,&A,1,numSamples/2);
//Perform FFT using fftSetup and A
//Results are returned in A
vDSP_fft_zrip(fftSetup, &A, 1, log2n, FFT_FORWARD);
//Convert COMPLEX_SPLIT A result to float array to be returned
amp[0] = A.realp[0]/(numSamples*2);
for(i=1;i<numSamples;i++)
amp[i]=sqrt(A.realp[i]*A.realp[i]+A.imagp[i]*A.imagp[i])/numSamples;
}
//Constructor
FFTAccelerate::FFTAccelerate (int numSamples)
{
vDSP_Length log2n = log2f(numSamples);
fftSetup = vDSP_create_fftsetup(log2n, FFT_RADIX2);
int nOver2 = numSamples/2;
A.realp = (float *) malloc(nOver2*sizeof(float));
A.imagp = (float *) malloc(nOver2*sizeof(float));
}
My question is how to I loop through the '.caf' audio file to feed the FFT while at the same time playing the song? I only need one channel. Im guessing I need to get 1024 samples of the song, process that in the FTT and then move further down the file and grab another 1024 samples. But I dont understand how to read an audio file to do this. The file has a sample rate of 44100.0 hz, is in linear PCM format, 16 Bit and I believe is also interleaved if that helps...
Try the ExtendedAudioFile API (requires AudioToolbox.framework).
#include <AudioToolbox/ExtendedAudioFile.h>
NSURL *urlToCAF = ...;
ExtAudioFileRef caf;
OSStatus status;
status = ExtAudioFileOpenURL((__bridge CFURLRef)urlToCAF, &caf);
if(noErr == status) {
// request float format
const UInt32 NumFrames = 1024;
const int ChannelsPerFrame = 1; // Mono, 2 for Stereo
// request float format
AudioStreamBasicDescription clientFormat;
clientFormat.mChannelsPerFrame = ChannelsPerFrame;
clientFormat.mSampleRate = 44100;
clientFormat.mFormatID = kAudioFormatLinearPCM;
clientFormat.mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagIsNonInterleaved; // float
int cmpSize = sizeof(float);
int frameSize = cmpSize*ChannelsPerFrame;
clientFormat.mBitsPerChannel = cmpSize*8;
clientFormat.mBytesPerPacket = frameSize;
clientFormat.mFramesPerPacket = 1;
clientFormat.mBytesPerFrame = frameSize;
status = ExtAudioFileSetProperty(caf, kExtAudioFileProperty_ClientDataFormat, sizeof(clientFormat), &clientFormat);
if(noErr != status) { /* handle it */ }
while(1) {
float buf[ChannelsPerFrame*NumFrames];
AudioBuffer ab = { ChannelsPerFrame, sizeof(buf), buf };
AudioBufferList abl;
abl.mNumberBuffers = 1;
abl.mBuffers[0] = ab;
UInt32 ioNumFrames = NumFrames;
status = ExtAudioFileRead(caf, &ioNumFrames, &abl);
if(noErr == status) {
// process ioNumFrames here in buf
if(0 == ioNumFrames) {
// EOF!
break;
} else if(ioNumFrames < NumFrames) {
// TODO: pad buf with zeroes out to NumFrames
} else {
float amp[NumFrames]; // scratch space
doFFTReal(buf, amp, NumFrames);
}
}
}
// later
status = ExtAudioFileDispose(caf);
if(noErr != status) { /* hmm */ }
}

H264 Video Streaming over RTMP on iOS

With a bit of digging, I have found a library that extracts NAL units from .mp4 file while it is being written. I'm attempting to packetize this information to flv over RTMP using libavformat and libavcodec. I setup a video stream using:
-(void)setupVideoStream {
int ret = 0;
videoCodec = avcodec_find_decoder(STREAM_VIDEO_CODEC);
if (videoCodec == nil) {
NSLog(#"Could not find encoder %i", STREAM_VIDEO_CODEC);
return;
}
videoStream = avformat_new_stream(oc, videoCodec);
videoCodecContext = videoStream->codec;
videoCodecContext->codec_type = AVMEDIA_TYPE_VIDEO;
videoCodecContext->codec_id = STREAM_VIDEO_CODEC;
videoCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
videoCodecContext->profile = FF_PROFILE_H264_BASELINE;
videoCodecContext->bit_rate = 512000;
videoCodecContext->bit_rate_tolerance = 0;
videoCodecContext->width = STREAM_WIDTH;
videoCodecContext->height = STREAM_HEIGHT;
videoCodecContext->time_base.den = STREAM_TIME_BASE;
videoCodecContext->time_base.num = 1;
videoCodecContext->gop_size = STREAM_GOP;
videoCodecContext->has_b_frames = 0;
videoCodecContext->ticks_per_frame = 2;
videoCodecContext->qcompress = 0.6;
videoCodecContext->qmax = 51;
videoCodecContext->qmin = 10;
videoCodecContext->max_qdiff = 4;
videoCodecContext->i_quant_factor = 0.71;
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
videoCodecContext->flags |= CODEC_FLAG_GLOBAL_HEADER;
videoCodecContext->extradata = avcCHeader;
videoCodecContext->extradata_size = avcCHeaderSize;
ret = avcodec_open2(videoStream->codec, videoCodec, NULL);
if (ret < 0)
NSLog(#"Could not open codec!");
}
Then I connect, and each time the library extracts a NALU, it returns an array holding one or two NALUs to my RTMPClient. The method that handles the actual streaming looks like this:
-(void)writeNALUToStream:(NSArray*)data time:(double)pts {
int ret = 0;
uint8_t *buffer = NULL;
int bufferSize = 0;
// Number of NALUs within the data array
int numNALUs = [data count];
// First NALU
NSData *fNALU = [data objectAtIndex:0];
int fLen = [fNALU length];
// If there is more than one NALU...
if (numNALUs > 1) {
// Second NALU
NSData *sNALU = [data objectAtIndex:1];
int sLen = [sNALU length];
// Allocate a buffer the size of first data and second data
buffer = av_malloc(fLen + sLen);
// Copy the first data bytes of fLen into the buffer
memcpy(buffer, [fNALU bytes], fLen);
// Copy the second data bytes of sLen into the buffer + fLen + 1
memcpy(buffer + fLen + 1, [sNALU bytes], sLen);
// Update the size of the buffer
bufferSize = fLen + sLen;
}else {
// Allocate a buffer the size of first data
buffer = av_malloc(fLen);
// Copy the first data bytes of fLen into the buffer
memcpy(buffer, [fNALU bytes], fLen);
// Update the size of the buffer
bufferSize = fLen;
}
// Initialize the packet
av_init_packet(&pkt);
//av_packet_from_data(&pkt, buffer, bufferSize);
// Set the packet data to the buffer
pkt.data = buffer;
pkt.size = bufferSize;
pkt.pts = pts;
// Stream index 0 is the video stream
pkt.stream_index = 0;
// Add a key frame flag every 15 frames
if ((processedFrames % 15) == 0)
pkt.flags |= AV_PKT_FLAG_KEY;
// Write the frame to the stream
ret = av_interleaved_write_frame(oc, &pkt);
if (ret < 0)
NSLog(#"Error writing frame %i to stream", processedFrames);
else {
// Update the number of frames successfully streamed
frameCount++;
// Update the number of bytes successfully sent
bytesSent += pkt.size;
}
// Update the number of frames processed
processedFrames++;
// Update the number of bytes processed
processedBytes += pkt.size;
free((uint8_t*)buffer);
// Free the packet
av_free_packet(&pkt);
}
After about 100 or so frames, I get an error:
malloc: *** error for object 0xe5bfa0: incorrect checksum for freed object - object was probably modified after being freed.
*** set a breakpoint in malloc_error_break to debug
Which I cannot seem to stop from happening. I've tried commenting out the av_free_packet() method and the free() along with trying to use av_packet_from_data() rather than initializing the packet and setting the data and size values.
My question is; how can I stop this error from happening, and according to wireshark, these are proper RTMP h264 packets, but they do not play anything more than a black screen. Is there some glaring error that I am overlooking?
It looks to me like you are overflowing your buffer and corrupting you stream here:
memcpy(buffer + fLen + 1, [sNALU bytes], sLen);
You are allocating fLen + sLen bytes then writing fLen + sLen + 1 bytes. Just get rid of the + 1.
Because your AVPacket is allocated on the stack av_free_packet() is not needed.
Finally, it is considered good practice to to allocate extra bytes for libav. av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE )

AudioConverter number of packets is wrong

I've set up a class to convert audio from one format to another given an input and output AudioStreamBasicDescription. When I convert Linear PCM from the mic to iLBC, it works and gives me 6 packets when I give it 1024 packets from the AudioUnitRender function. I then send those 226 bytes via UDP to the same app running on a different device. The problem is that when I use the same class to convert back into Linear PCM for giving to an audio unit input, the AudioConverterFillComplexBuffer function doesn't give 1024 packets, it gives 960... This means that the audio unit input is expecting 4096 bytes (2048 x 2 for stereo) but I can only give it 3190 or so, and so it sounds really crackly and distorted...
If I give AudioConverter 1024 packets of LinearPCM, convert to iLBC, convert back to LinearPCM, surely I should get 1024 packets again?
Audio converter function:
-(void) doConvert {
// Start converting
if (converting) return;
converting = YES;
while (true) {
// Get next buffer
id bfr = [buffers getNextBuffer];
if (!bfr) {
converting = NO;
return;
}
// Get info
NSArray* bfrs = ([bfr isKindOfClass:[NSArray class]] ? bfr : #[bfr]);
int bfrSize = 0;
for (NSData* dat in bfrs) bfrSize += dat.length;
int inputPackets = bfrSize / self.inputFormat.mBytesPerPacket;
int outputPackets = (inputPackets * self.inputFormat.mFramesPerPacket) / self.outputFormat.mFramesPerPacket;
// Create output buffer
AudioBufferList* bufferList = (AudioBufferList*) malloc(sizeof(AudioBufferList) * self.outputFormat.mChannelsPerFrame);
bufferList -> mNumberBuffers = self.outputFormat.mChannelsPerFrame;
for (int i = 0 ; i < self.outputFormat.mChannelsPerFrame ; i++) {
bufferList -> mBuffers[i].mNumberChannels = 1;
bufferList -> mBuffers[i].mDataByteSize = 4*1024;
bufferList -> mBuffers[i].mData = malloc(bufferList -> mBuffers[i].mDataByteSize);
}
// Create input buffer
AudioBufferList* inputBufferList = (AudioBufferList*) malloc(sizeof(AudioBufferList) * bfrs.count);
inputBufferList -> mNumberBuffers = bfrs.count;
for (int i = 0 ; i < bfrs.count ; i++) {
inputBufferList -> mBuffers[i].mNumberChannels = 1;
inputBufferList -> mBuffers[i].mDataByteSize = [[bfrs objectAtIndex:i] length];
inputBufferList -> mBuffers[i].mData = (void*) [[bfrs objectAtIndex:i] bytes];
}
// Create sound data payload
struct SoundDataPayload payload;
payload.data = inputBufferList;
payload.numPackets = inputPackets;
payload.packetDescriptions = NULL;
payload.used = NO;
// Convert data
UInt32 numPackets = outputPackets;
OSStatus err = AudioConverterFillComplexBuffer(converter, acvConverterComplexInput, &payload, &numPackets, bufferList, NULL);
if (err)
continue;
// Check how to output
if (bufferList -> mNumberBuffers > 1) {
// Output as array
NSMutableArray* array = [NSMutableArray arrayWithCapacity:bufferList -> mNumberBuffers];
for (int i = 0 ; i < bufferList -> mNumberBuffers ; i++)
[array addObject:[NSData dataWithBytes:bufferList -> mBuffers[i].mData length:bufferList -> mBuffers[i].mDataByteSize]];
// Save
[convertedBuffers addBuffer:array];
} else {
// Output as data
NSData* newData = [NSData dataWithBytes:bufferList -> mBuffers[0].mData length:bufferList -> mBuffers[0].mDataByteSize];
// Save
[convertedBuffers addBuffer:newData];
}
// Free memory
for (int i = 0 ; i < bufferList -> mNumberBuffers ; i++)
free(bufferList -> mBuffers[i].mData);
free(inputBufferList);
free(bufferList);
// Tell delegate
if (self.convertHandler)
//dispatch_async(dispatch_get_main_queue(), self.convertHandler);
self.convertHandler();
}
}
Formats when converting to iLBC:
// Get input format from mic
UInt32 size = sizeof(AudioStreamBasicDescription);
AudioStreamBasicDescription inputDesc;
AudioUnitGetProperty(self.ioUnit, kAudioUnitProperty_StreamFormat, kAudioUnitScope_Output, 1, &inputDesc, &size);
// Set output stream description
size = sizeof(AudioStreamBasicDescription);
AudioStreamBasicDescription outputDescription;
memset(&outputDescription, 0, size);
outputDescription.mFormatID = kAudioFormatiLBC;
OSStatus err = AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0, NULL, &size, &outputDescription);
Formats when converting from iLBC:
// Set input stream description
size = sizeof(AudioStreamBasicDescription);
AudioStreamBasicDescription inputDescription;
memset(&inputDescription, 0, size);
inputDescription.mFormatID = kAudioFormatiLBC;
AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0, NULL, &size, &inputDescription);
// Set output stream description
UInt32 size = sizeof(AudioStreamBasicDescription);
AudioStreamBasicDescription outputDesc;
AudioUnitGetProperty(unit, kAudioUnitProperty_StreamFormat, kAudioUnitScope_Input, 0, &outputDesc, &size);
You have to use an intermediate buffer to save up enough bytes from enough incoming packets to exactly match the number requested by the audio unit input. Depending on any one UDP packet in compressed format to be exactly the right size won't work.
The AudioConverter may buffer samples and change the packet sizes depending on the compression format.

Resources