Unhandled exception at 0x52f9e470 in project1.exe : 0xC000001D : Illegal instruction - opencv

i am trying to detect an object using opencv in c++ but i am getting an error :
Unhandled exception at 0x52f9e470 in project1.exe : 0xC000001D : Illegal instruction.
using windows 7 32 bit,opencv 2.4.3,visual studio (c++) 2010 and my code is :
#include <opencv\cv.h>
#include <opencv\highgui.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <time.h>
#include <ctype.h>
// Create a string that contains the exact cascade name
// Contains the trained classifer for detecting hand
const char *cascade_name="D:/31dec12/hand.xml";
//The function detects the hand from input frame and draws a rectangle around the detected portion of the frame
void detect_and_draw( IplImage* img )
{
// Create memory for calculations
static CvMemStorage* storage = 0;
// Create a new Haar classifier
static CvHaarClassifierCascade* cascade = 0;
// Sets the scale with which the rectangle is drawn with
int scale = 1;
// Create two points to represent the hand locations
CvPoint pt1, pt2;
// Looping variable
int i;
// Load the HaarClassifierCascade
cascade = (CvHaarClassifierCascade*)cvLoad( cascade_name, 0, 0, 0 );
// Check whether the cascade has loaded successfully. Else report and error and quit
if( !cascade )
{
fprintf( stderr, "ERROR: Could not load classifier cascade\n" );
return;
}
// Allocate the memory storage
storage = cvCreateMemStorage(0);
// Create a new named window with title: result
cvNamedWindow( "result", 1 );
// Clear the memory storage which was used before
cvClearMemStorage( storage );
// Find whether the cascade is loaded, to find the hands. If yes, then:
if( cascade )
{
// There can be more than one hand in an image. So create a growable sequence of hands.
// Detect the objects and store them in the sequence
CvSeq* hands = cvHaarDetectObjects( img, cascade, storage,
1.1, 2, CV_HAAR_DO_CANNY_PRUNING,
cvSize(40, 40) );
// Loop the number of hands found.
for( i = 0; i < (hands ? hands->total : 0); i++ )
{
// Create a new rectangle for drawing the hand
CvRect* r = (CvRect*)cvGetSeqElem( hands, i );
// Find the dimensions of the hand,and scale it if necessary
pt1.x = r->x*scale;
pt2.x = (r->x+r->width)*scale;
pt1.y = r->y*scale;
pt2.y = (r->y+r->height)*scale;
// Draw the rectangle in the input image
cvRectangle( img, pt1, pt2, CV_RGB(230,20,232), 3, 8, 0 );
}
}
// Show the image in the window named "result"
cvShowImage( "result", img );
}
// A Simple Camera Capture Framework
int main()
{
// Gets the input video stream from camera
CvCapture* capture = cvCaptureFromCAM( CV_CAP_ANY );
// Checks if the input stream is obtained
if( !capture )
{
fprintf( stderr, "ERROR: capture is NULL \n" );
getchar();
return -1;
}
// Show the image captured from the camera in the window and repeat
while( 1 )
{
// Get one frame
IplImage* frame = cvQueryFrame( capture );
// Cecks if a frame is obtained
if( !frame )
{
fprintf( stderr, "ERROR: frame is null...\n" );
getchar();
break;
}
// Flips the frame into mirror image
cvFlip(frame,frame,1);
// Call the function to detect and draw the hand positions
detect_and_draw(frame);
//If ESC key pressed, Key=0x10001B under OpenCV 0.9.7(linux version),
//remove higher bits using AND operator
if( (cvWaitKey(10) & 255) == 27 )
break;
}
// Release the capture device housekeeping
cvReleaseCapture( &capture );
return 0;
}

What kind of cpu are you using? Last time I had the error: 0xC000001D : Illegal instruction was related to the SSE instruction used in the code. Some new SSE instruction are not implemented at AMD processors e.g. So you can fix this by rebuilding opencv without SSE support.

I also have this problem when using cv::Mat(...)
The same exception throw at
size_t esz = CV_ELEM_SIZE(_type), esz1 = CV_ELEM_SIZE1(_type);
Not so sure why but after changing Visual C++ Project floating point model from precise to fast, the problem solve.

Related

accumulatedweight throws cv:Exception error

I am new to OpenCV and trying to find contours and draw rectangle on them, here's my code but its throwing cv::Exception when it comes to accumulatedweighted().
#include "opencv2/video/tracking.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
#include <ctype.h>
using namespace cv;
using namespace std;
static void help()
{
cout << "\nThis is a Example to implement CAMSHIFT to detect multiple motion objects.\n";
}
Rect rect;
VideoCapture capture;
Mat currentFrame, currentFrame_grey, differenceImg, oldFrame_grey,background;
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
bool first = true;
int main(int argc, char* argv[])
{
//Create a new movie capture object.
capture.open(0);
if(!capture.isOpened())
{
//error in opening the video input
cerr << "Unable to open video file: " /*<< videoFilename*/ << endl;
exit(EXIT_FAILURE);
}
//capture current frame from webcam
capture >> currentFrame;
//Size of the image.
CvSize imgSize;
imgSize.width = currentFrame.size().width; //img.size().width
imgSize.height = currentFrame.size().height; ////img.size().height
//Images to use in the program.
currentFrame_grey.create( imgSize, IPL_DEPTH_8U);//image.create().
while(1)
{
capture >> currentFrame;//VideoCapture& VideoCapture::operator>>(Mat& image)
//Convert the image to grayscale.
cvtColor(currentFrame,currentFrame_grey,CV_RGB2GRAY);//cvtColor()
// Converting Original image to make both background n original image same
currentFrame.convertTo(currentFrame,CV_32FC3);
background = Mat::zeros(currentFrame.size(), CV_32FC3);
//Here its throwing exception
accumulateWeighted(currentFrame,background,1.0,NULL);
imshow("Background",background);
if(first) //Capturing Background for the first time
{
differenceImg = currentFrame_grey.clone();//img1 = img.clone()
oldFrame_grey = currentFrame_grey.clone();//img2 = img.clone()
convertScaleAbs(currentFrame_grey, oldFrame_grey, 1.0, 0.0);//convertscaleabs()
first = false;
continue;
}
//Minus the current frame from the moving average.
absdiff(oldFrame_grey,currentFrame_grey,differenceImg);//absDiff()
//bluring the differnece image
blur(differenceImg, differenceImg, imgSize);//blur()
//apply threshold to discard small unwanted movements
threshold(differenceImg, differenceImg, 25, 255, CV_THRESH_BINARY);//threshold()
//find contours
findContours(differenceImg,contours,hierarchy,CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0)); //findcontours()
//draw bounding box around each contour
//for(; contours! = 0; contours = contours->h_next)
for(int i = 0; i < contours.size(); i++)
{
rect = boundingRect(contours[i]); //extract bounding box for current contour
//drawing rectangle
rectangle(currentFrame, cvPoint(rect.x, rect.y), cvPoint(rect.x+rect.width, rect.y+rect.height), cvScalar(0, 0, 255, 0), 2, 8, 0);
}
//New Background
convertScaleAbs(currentFrame_grey, oldFrame_grey, 1.0, 0.0);
//display colour image with bounding box
imshow("Output Image", currentFrame);//imshow()
//display threshold image
imshow("Difference image", differenceImg);//imshow()
//clear memory and contours
//cvClearMemStorage( storage );
//contours = 0;
contours.clear();
//background = currentFrame;
//press Esc to exit
char c = cvWaitKey(33);
if( c == 27 ) break;
}
// Destroy All Windows.
destroyAllWindows();
return 0;
}
Please Help to solve this.
First of all, I don't really get the idea of calling accumulateWeighted with alpha = 1.0. If you look at the definition of accumulateWeighted in the doc, you will see that with alpha = 1.0 it is basically equivalent to copy currentFrame into background at each iteration.
Moreover, it is an accumulation function, to accumulate image changes over time into a new image. What is the interest of it if you reset background at every loop with background = Mat::zeros(currentFrame.size(), CV_32FC3); ?
This being said, there is a little flaw in your code with the 4th argument of the function. You wrote accumulateWeighted(currentFrame,background,1.0,NULL);. If you look into the documentation you will find that the 4th argument is a Mask, and is optional. Passing a NULL pointer here might be the source of your exception. Why don't you call the function like this : accumulateWeighted(currentFrame,background,1.0); ?
Hope this helps,
Ben

How to get better results with OpenCV face recognition Module

I'm trying to use OpenCV's face recognition module to recognize 2 subjects from a video. I cropped 30 face images of the first subject and 20 face images of the second subject from the video and I use these as my training set.
I've tested all three approaches (Eigenfaces, Fisherfaces and LBP histograms), but I'm not getting good results in neither of the approaches. Sometimes the first subject is classified as the second subject and vice-verse, sometimes false detections are classified as one of the two subjects and sometimes other people in the video are classified as one of the two subjects.
How can I improve performance? Would enlarging the training set help in improving the results? Are there any other packages I can consider that performs face recognition in C++? I think it should be an easy task as I'm trying to recognize only two different subjects.
Here is my code (I'm using OpenCV 2.4.7 on windows 8 with VS2012):
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/contrib/contrib.hpp"
#include <iostream>
#include <stdio.h>
#include <fstream>
#include <sstream>
#define EIGEN 0
#define FISHER 0
#define LBPH 1;
using namespace std;
using namespace cv;
/** Function Headers */
void detectAndDisplay( Mat frame , int i,Ptr<FaceRecognizer> model);
static Mat toGrayscale(InputArray _src) {
Mat src = _src.getMat();
// only allow one channel
if(src.channels() != 1) {
CV_Error(CV_StsBadArg, "Only Matrices with one channel are supported");
}
// create and return normalized image
Mat dst;
cv::normalize(_src, dst, 0, 255, NORM_MINMAX, CV_8UC1);
return dst;
}
static void read_csv(const string& filename, vector<Mat>& images, vector<int>& labels, char separator = ';') {
std::ifstream file(filename.c_str(), ifstream::in);
if (!file) {
string error_message = "No valid input file was given, please check the given filename.";
CV_Error(CV_StsBadArg, error_message);
}
string line, path, classlabel;
while (getline(file, line)) {
stringstream liness(line);
getline(liness, path, separator);
getline(liness, classlabel);
if(!path.empty() && !classlabel.empty()) {
images.push_back(imread(path, 0));
labels.push_back(atoi(classlabel.c_str()));
}
}
}
/** Global variables */
String face_cascade_name = "C:\\OIM\\code\\OIM2 - face detection\\Debug\\haarcascade_frontalface_alt.xml";
//String face_cascade_name = "C:\\OIM\\code\\OIM2 - face detection\\Debug\\NewCascade.xml";
//String face_cascade_name = "C:\\OIM\\code\\OIM2 - face detection\\Debug\\haarcascade_eye_tree_eyeglasses.xml";
String eyes_cascade_name = "C:\\OIM\\code\\OIM2 - face detection\\Debug\\haarcascade_eye_tree_eyeglasses.xml";
CascadeClassifier face_cascade;
CascadeClassifier eyes_cascade;
string window_name = "Capture - Face detection";
RNG rng(12345);
/** #function main */
int main( int argc, const char** argv )
{
string fn_csv = "C:\\OIM\\faces_org.csv";
// These vectors hold the images and corresponding labels.
vector<Mat> images;
vector<int> labels;
// Read in the data. This can fail if no valid
// input filename is given.
try {
read_csv(fn_csv, images, labels);
} catch (cv::Exception& e) {
cerr << "Error opening file \"" << fn_csv << "\". Reason: " << e.msg << endl;
// nothing more we can do
exit(1);
}
// Quit if there are not enough images for this demo.
if(images.size() <= 1) {
string error_message = "This demo needs at least 2 images to work. Please add more images to your data set!";
CV_Error(CV_StsError, error_message);
}
// Get the height from the first image. We'll need this
// later in code to reshape the images to their original
// size:
int height = images[0].rows;
// The following lines create an Eigenfaces model for
// face recognition and train it with the images and
// labels read from the given CSV file.
// This here is a full PCA, if you just want to keep
// 10 principal components (read Eigenfaces), then call
// the factory method like this:
//
// cv::createEigenFaceRecognizer(10);
//
// If you want to create a FaceRecognizer with a
// confidennce threshold, call it with:
//
// cv::createEigenFaceRecognizer(10, 123.0);
//
//Ptr<FaceRecognizer> model = createEigenFaceRecognizer();
#if EIGEN
Ptr<FaceRecognizer> model = createEigenFaceRecognizer(10,2000000000);
#elif FISHER
Ptr<FaceRecognizer> model = createFisherFaceRecognizer(0, 200000000);
#elif LBPH
Ptr<FaceRecognizer> model =createLBPHFaceRecognizer(1,8,8,8,200000000);
#endif
model->train(images, labels);
Mat frame;
//-- 1. Load the cascades
if( !face_cascade.load( face_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
// Get the frame rate
bool stop(false);
int count=1;
char filename[512];
for (int i=1;i<=517;i++){
sprintf(filename,"C:\\OIM\\original_frames2\\image%d.jpg",i);
Mat frame=imread(filename);
detectAndDisplay(frame,i,model);
waitKey(0);
}
return 0;
}
/** #function detectAndDisplay */
void detectAndDisplay( Mat frame ,int i, Ptr<FaceRecognizer> model)
{
std::vector<Rect> faces;
Mat frame_gray;
cvtColor( frame, frame_gray, CV_BGR2GRAY );
equalizeHist( frame_gray, frame_gray );
//-- Detect faces
//face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30) );
face_cascade.detectMultiScale( frame_gray, faces, 1.1, 1, 0|CV_HAAR_SCALE_IMAGE, Size(10, 10) );
for( size_t i = 0; i < faces.size(); i++ )
{
Rect roi = Rect(faces[i].x,faces[i].y,faces[i].width,faces[i].height);
Mat face=frame_gray(roi);
resize(face,face,Size(200,200));
int predictedLabel = -1;
double confidence = 0.0;
model->predict(face, predictedLabel, confidence);
//imshow("gil",face);
//waitKey(0);
#if EIGEN
int M=10000;
#elif FISHER
int M=500;
#elif LBPH
int M=300;
#endif
Point center( faces[i].x + faces[i].width*0.5, faces[i].y + faces[i].height*0.5 );
if ((predictedLabel==1)&& (confidence<M))
ellipse( frame, center, Size( faces[i].width*0.5, faces[i].height*0.5), 0, 0, 360, Scalar( 0, 0, 255 ), 4, 8, 0 );
if ((predictedLabel==0)&& (confidence<M))
ellipse( frame, center, Size( faces[i].width*0.5, faces[i].height*0.5), 0, 0, 360, Scalar( 255, 0, 0), 4, 8, 0 );
if (confidence>M)
ellipse( frame, center, Size( faces[i].width*0.5, faces[i].height*0.5), 0, 0, 360, Scalar( 0, 255, 0), 4, 8, 0 );
Mat faceROI = frame_gray( faces[i] );
std::vector<Rect> eyes;
//-- In each face, detect eyes
eyes_cascade.detectMultiScale( faceROI, eyes, 1.1, 2, 0 |CV_HAAR_SCALE_IMAGE, Size(30, 30) );
for( size_t j = 0; j < eyes.size(); j++ )
{
Point center( faces[i].x + eyes[j].x + eyes[j].width*0.5, faces[i].y + eyes[j].y + eyes[j].height*0.5 );
int radius = cvRound( (eyes[j].width + eyes[j].height)*0.25 );
//circle( frame, center, radius, Scalar( 255, 0, 0 ), 4, 8, 0 );
}
}
//-- Show what you got
//imshow( window_name, frame );
char filename[512];
sprintf(filename,"C:\\OIM\\FaceRecognitionResults\\image%d.jpg",i);
imwrite(filename,frame);
}
Thanks in advance,
Gil.
First thing, as commented, increase the number of samples if possible. Also include the variations (like illumination, slight poses etc) you expect to be in the video. However, especially for eigenfaces/ fisherfaces so many images will not help to increase performance. Sadly, the best number of training samples can depend on your data.
The more important point is the hardness of the problem is totally depends on your video. If your video contains variations like illumination, pose; then you can't expect using purely appearance based methods(e.g Eigenfaces) and texture descriptor(LBP) will be succesful. First, you might want to detect faces. Then:
You might want to estimate face position and warp to frontal; check
for Active Appearance Model and Active Shape Model
Use histogram of equalization to attenuate illumination problem
Fitting an ellipse to detected face region will help against background noise.
Of course, there are many other methods available in literature; the steps I wrote is implemented in OpenCV and commonly known.
Hope it helps.

Undefined symbols for architecture x86_64: "_cvHaarDetectObjects" Eclipse

I'm trying to build the following sample code:
#include "cv.h"
#include "highgui.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <time.h>
#include <ctype.h>
// Create a string that contains the exact cascade name
const char* cascade_name = "/usr/share/src/OpenCV-2.3.1/data/haarcascades/haarcascade_frontalface_alt.xml";
// "C:/Program Files/OpenCV/data/haarcascades/haarcascade_frontalface_alt.xml";
/* "haarcascade_profileface.xml";*/
// Function prototype for detecting and drawing an object from an image
void detect_and_draw( IplImage* image );
// Main function, defines the entry point for the program.
int main( int argc, char** argv )
{
// Create a sample image
IplImage *img = cvLoadImage("1.pgm");
// Call the function to detect and draw the face positions
detect_and_draw(img);
// Wait for user input before quitting the program
cvWaitKey();
// Release the image
cvReleaseImage(&img);
// Destroy the window previously created with filename: "result"
cvDestroyWindow("result");
// return 0 to indicate successfull execution of the program
return 0;
}
// Function to detect and draw any faces that is present in an image
void detect_and_draw( IplImage* img )
{
// Create memory for calculations
static CvMemStorage* storage = 0;
// Create a new Haar classifier
static CvHaarClassifierCascade* cascade = 0;
int scale = 1;
// Create a new image based on the input image
IplImage* temp = cvCreateImage( cvSize(img->width/scale,img->height/scale), 8, 3 );
// Create two points to represent the face locations
CvPoint pt1, pt2;
int i;
// Load the HaarClassifierCascade
cascade = (CvHaarClassifierCascade*)cvLoad( cascade_name, 0, 0, 0 );
// Check whether the cascade has loaded successfully. Else report and error and quit
if( !cascade )
{
fprintf( stderr, "ERROR: Could not load classifier cascade\n" );
return;
}
// Allocate the memory storage
storage = cvCreateMemStorage(0);
// Create a new named window with title: result
cvNamedWindow( "result", 1 );
// Clear the memory storage which was used before
cvClearMemStorage( storage );
// Find whether the cascade is loaded, to find the faces. If yes, then:
if( cascade )
{
// There can be more than one face in an image. So create a growable sequence of faces.
// Detect the objects and store them in the sequence
CvSeq* faces = cvHaarDetectObjects( img, cascade, storage,
1.1, 2, CV_HAAR_DO_CANNY_PRUNING,
cvSize(40, 40) );
// Loop the number of faces found.
for( i = 0; i < (faces ? faces->total : 0); i++ )
{
// Create a new rectangle for drawing the face
CvRect* r = (CvRect*)cvGetSeqElem( faces, i );
// Find the dimensions of the face,and scale it if necessary
pt1.x = r->x*scale;
pt2.x = (r->x+r->width)*scale;
pt1.y = r->y*scale;
pt2.y = (r->y+r->height)*scale;
// Draw the rectangle in the input image
cvRectangle( img, pt1, pt2, CV_RGB(255,0,0), 3, 8, 0 );
}
}
// Show the image in the window named "result"
cvShowImage( "result", img );
// Release the temp image created.
cvReleaseImage( &temp );
}
And the path /usr/share/src....is the right path where the xml file is. I've linked the opencv libraries: opencv_core, opencv_imgproc, opencv_highgui and opencv_video to eclipse (and I think they are correctly linked since I build other opencv projects in this way). But Eclipse keeps throwing the following errors:
Invoking: MacOS X C++ Linker
g++ -L/usr/local/include/opencv -L/usr/local/include/opencv2 -L/usr/local/lib -o "OpenCVFace" ./main.o -lopencv_core -lopencv_highgui -lopencv_video -lopencv_imgproc
Undefined symbols for architecture x86_64:
"_cvHaarDetectObjects", referenced from:
detect_and_draw(_IplImage*) in main.o
ld: symbol(s) not found for architecture x86_64
collect2: ld returned 1 exit status
make: *** [OpenCVFace] Error 1
**** Build Finished ****
I can see the cvHaarDetectObjects function is highlighted in Eclipse (which turned to purple color). Any ideas of how to solve the problem? Thanks!
You need to link to opencv_objdetect; include -lopencv_objdetect in the link flags.

OpenCV cannot find video device

I cannot use webcam as input device for OpenCV 2.3.1 in Ubuntu 11.04, this code works fine on windows:
#include "cv.h"
#include "highgui.h"
#include <stdio.h>
// A Simple Camera Capture Framework
int main() {
CvCapture* capture = cvCaptureFromCAM(-1);
if( !capture ) {
fprintf( stderr, "ERROR: capture is NULL \n" );
getchar();
return -1;
}
// Create a window in which the captured images will be presented
cvNamedWindow( "mywindow", CV_WINDOW_AUTOSIZE );
// Show the image captured from the camera in the window and repeat
while( 1 ) {
// Get one frame
IplImage* frame = cvQueryFrame( capture );
if( !frame ) {
fprintf( stderr, "ERROR: frame is null...\n" );
getchar();
break;
}
cvShowImage( "mywindow", frame );
// Do not release the frame!
//If ESC key pressed, Key=0x10001B under OpenCV 0.9.7(linux version),
//remove higher bits using AND operator
if( (cvWaitKey(10) & 255) == 27 ) break;
}
// Release the capture device housekeeping
cvReleaseCapture( &capture );
cvDestroyWindow( "mywindow" );
return 0;
}
It returns "ERROR: capture is NULL "
I found the solution, I need to install libv4l-0 and libv4l-dev then compile OpenCV with USE_V4L=ON.

Extracting DCT coefficients from encoded images and video

Is there a way to easily extract the DCT coefficients (and quantization parameters) from encoded images and video? Any decoder software must be using them to decode block-DCT encoded images and video. So I'm pretty sure the decoder knows what they are. Is there a way to expose them to whomever is using the decoder?
I'm implementing some video quality assessment algorithms that work directly in the DCT domain. Currently, the majority of my code uses OpenCV, so it would be great if anyone knows of a solution using that framework. I don't mind using other libraries (perhaps libjpeg, but that seems to be for still images only), but my primary concern is to do as little format-specific work as possible (I don't want to reinvent the wheel and write my own decoders). I want to be able to open any video/image (H.264, MPEG, JPEG, etc) that OpenCV can open, and if it's block DCT-encoded, to get the DCT coefficients.
In the worst case, I know that I can write up my own block DCT code, run the decompressed frames/images through it and then I'd be back in the DCT domain. That's hardly an elegant solution, and I hope I can do better.
Presently, I use the fairly common OpenCV boilerplate to open images:
IplImage *image = cvLoadImage(filename);
// Run quality assessment metric
The code I'm using for video is equally trivial:
CvCapture *capture = cvCaptureFromAVI(filename);
while (cvGrabFrame(capture))
{
IplImage *frame = cvRetrieveFrame(capture);
// Run quality assessment metric on frame
}
cvReleaseCapture(&capture);
In both cases, I get a 3-channel IplImage in BGR format. Is there any way I can get the DCT coefficients as well?
Well, I did a bit of reading and my original question seems to be an instance of wishful thinking.
Basically, it's not possible to get the DCT coefficients from H.264 video frames for the simple reason that H.264 doesn't use DCT. It uses a different transform (integer transform). Next, the coefficients for that transform don't necessarily change on a frame-by-frame basis -- H.264 is smarter cause it splits up frames into slices. It should be possible to get those coefficients through a special decoder, but I doubt OpenCV exposes it for the user.
For JPEG, things are a bit more positive. As I suspected, libjpeg exposes the DCT coefficients for you. I wrote a small app to show that it works (source at the end). It makes a new image using the DC term from each block. Because the DC term is equal to the block average (after proper scaling), the DC images are downsampled versions of the input JPEG image.
EDIT: fixed scaling in source
Original image (512 x 512):
DC images (64x64): luma Cr Cb RGB
Source (C++):
#include <stdio.h>
#include <assert.h>
#include <cv.h>
#include <highgui.h>
extern "C"
{
#include "jpeglib.h"
#include <setjmp.h>
}
#define DEBUG 0
#define OUTPUT_IMAGES 1
/*
* Extract the DC terms from the specified component.
*/
IplImage *
extract_dc(j_decompress_ptr cinfo, jvirt_barray_ptr *coeffs, int ci)
{
jpeg_component_info *ci_ptr = &cinfo->comp_info[ci];
CvSize size = cvSize(ci_ptr->width_in_blocks, ci_ptr->height_in_blocks);
IplImage *dc = cvCreateImage(size, IPL_DEPTH_8U, 1);
assert(dc != NULL);
JQUANT_TBL *tbl = ci_ptr->quant_table;
UINT16 dc_quant = tbl->quantval[0];
#if DEBUG
printf("DCT method: %x\n", cinfo->dct_method);
printf
(
"component: %d (%d x %d blocks) sampling: (%d x %d)\n",
ci,
ci_ptr->width_in_blocks,
ci_ptr->height_in_blocks,
ci_ptr->h_samp_factor,
ci_ptr->v_samp_factor
);
printf("quantization table: %d\n", ci);
for (int i = 0; i < DCTSIZE2; ++i)
{
printf("% 4d ", (int)(tbl->quantval[i]));
if ((i + 1) % 8 == 0)
printf("\n");
}
printf("raw DC coefficients:\n");
#endif
JBLOCKARRAY buf =
(cinfo->mem->access_virt_barray)
(
(j_common_ptr)cinfo,
coeffs[ci],
0,
ci_ptr->v_samp_factor,
FALSE
);
for (int sf = 0; (JDIMENSION)sf < ci_ptr->height_in_blocks; ++sf)
{
for (JDIMENSION b = 0; b < ci_ptr->width_in_blocks; ++b)
{
int intensity = 0;
intensity = buf[sf][b][0]*dc_quant/DCTSIZE + 128;
intensity = MAX(0, intensity);
intensity = MIN(255, intensity);
cvSet2D(dc, sf, (int)b, cvScalar(intensity));
#if DEBUG
printf("% 2d ", buf[sf][b][0]);
#endif
}
#if DEBUG
printf("\n");
#endif
}
return dc;
}
IplImage *upscale_chroma(IplImage *quarter, CvSize full_size)
{
IplImage *full = cvCreateImage(full_size, IPL_DEPTH_8U, 1);
cvResize(quarter, full, CV_INTER_NN);
return full;
}
GLOBAL(int)
read_JPEG_file (char * filename, IplImage **dc)
{
/* This struct contains the JPEG decompression parameters and pointers to
* working space (which is allocated as needed by the JPEG library).
*/
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
/* More stuff */
FILE * infile; /* source file */
/* In this example we want to open the input file before doing anything else,
* so that the setjmp() error recovery below can assume the file is open.
* VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
* requires it in order to read binary files.
*/
if ((infile = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
return 0;
}
/* Step 1: allocate and initialize JPEG decompression object */
cinfo.err = jpeg_std_error(&jerr);
/* Now we can initialize the JPEG decompression object. */
jpeg_create_decompress(&cinfo);
/* Step 2: specify data source (eg, a file) */
jpeg_stdio_src(&cinfo, infile);
/* Step 3: read file parameters with jpeg_read_header() */
(void) jpeg_read_header(&cinfo, TRUE);
/* We can ignore the return value from jpeg_read_header since
* (a) suspension is not possible with the stdio data source, and
* (b) we passed TRUE to reject a tables-only JPEG file as an error.
* See libjpeg.txt for more info.
*/
/* Step 4: set parameters for decompression */
/* In this example, we don't need to change any of the defaults set by
* jpeg_read_header(), so we do nothing here.
*/
jvirt_barray_ptr *coeffs = jpeg_read_coefficients(&cinfo);
IplImage *y = extract_dc(&cinfo, coeffs, 0);
IplImage *cb_q = extract_dc(&cinfo, coeffs, 1);
IplImage *cr_q = extract_dc(&cinfo, coeffs, 2);
IplImage *cb = upscale_chroma(cb_q, cvGetSize(y));
IplImage *cr = upscale_chroma(cr_q, cvGetSize(y));
cvReleaseImage(&cb_q);
cvReleaseImage(&cr_q);
#if OUTPUT_IMAGES
cvSaveImage("y.png", y);
cvSaveImage("cb.png", cb);
cvSaveImage("cr.png", cr);
#endif
*dc = cvCreateImage(cvGetSize(y), IPL_DEPTH_8U, 3);
assert(dc != NULL);
cvMerge(y, cr, cb, NULL, *dc);
cvReleaseImage(&y);
cvReleaseImage(&cb);
cvReleaseImage(&cr);
/* Step 7: Finish decompression */
(void) jpeg_finish_decompress(&cinfo);
/* We can ignore the return value since suspension is not possible
* with the stdio data source.
*/
/* Step 8: Release JPEG decompression object */
/* This is an important step since it will release a good deal of memory. */
jpeg_destroy_decompress(&cinfo);
fclose(infile);
return 1;
}
int
main(int argc, char **argv)
{
int ret = 0;
if (argc != 2)
{
fprintf(stderr, "usage: %s filename.jpg\n", argv[0]);
return 1;
}
IplImage *dc = NULL;
ret = read_JPEG_file(argv[1], &dc);
assert(dc != NULL);
IplImage *rgb = cvCreateImage(cvGetSize(dc), IPL_DEPTH_8U, 3);
cvCvtColor(dc, rgb, CV_YCrCb2RGB);
#if OUTPUT_IMAGES
cvSaveImage("rgb.png", rgb);
#else
cvNamedWindow("DC", CV_WINDOW_AUTOSIZE);
cvShowImage("DC", rgb);
cvWaitKey(0);
#endif
cvReleaseImage(&dc);
cvReleaseImage(&rgb);
return 0;
}
You can use, libjpeg to extract dct data of your jpeg file, but for h.264 video file, I can't find any open source code that give you dct data (actully Integer dct data). But you can use h.264 open source software like JM, JSVM or x264. In these two source file, you have to find their specific function that make use of dct function, and change it to your desire form, to get your output dct data.
For Image:
use the following code, and after read_jpeg_file( infilename, v, quant_tbl ), v and quant_tbl will have dct data and quantization table of your jpeg image respectively.
I used Qvector to store my output data, change it to your preferred c++ array list.
#include <iostream>
#include <stdio.h>
#include <jpeglib.h>
#include <stdlib.h>
#include <setjmp.h>
#include <fstream>
#include <QVector>
int read_jpeg_file( char *filename, QVector<QVector<int> > &dct_coeff, QVector<unsigned short> &quant_tbl)
{
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
FILE * infile;
if ((infile = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
return 0;
}
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
jpeg_stdio_src(&cinfo, infile);
(void) jpeg_read_header(&cinfo, TRUE);
jvirt_barray_ptr *coeffs_array = jpeg_read_coefficients(&cinfo);
for (int ci = 0; ci < 1; ci++)
{
JBLOCKARRAY buffer_one;
JCOEFPTR blockptr_one;
jpeg_component_info* compptr_one;
compptr_one = cinfo.comp_info + ci;
for (int by = 0; by < compptr_one->height_in_blocks; by++)
{
buffer_one = (cinfo.mem->access_virt_barray)((j_common_ptr)&cinfo, coeffs_array[ci], by, (JDIMENSION)1, FALSE);
for (int bx = 0; bx < compptr_one->width_in_blocks; bx++)
{
blockptr_one = buffer_one[0][bx];
QVector<int> tmp;
for (int bi = 0; bi < 64; bi++)
{
tmp.append(blockptr_one[bi]);
}
dct_coeff.push_back(tmp);
}
}
}
// coantization table
j_decompress_ptr dec_cinfo = (j_decompress_ptr) &cinfo;
jpeg_component_info *ci_ptr = &dec_cinfo->comp_info[0];
JQUANT_TBL *tbl = ci_ptr->quant_table;
for(int ci =0 ; ci < 64; ci++){
quant_tbl.append(tbl->quantval[ci]);
}
return 1;
}
int main()
{
QVector<QVector<int> > v;
QVector<unsigned short> quant_tbl;
char *infilename = "your_image.jpg";
std::ofstream out;
out.open("out_dct.txt");
if( read_jpeg_file( infilename, v, quant_tbl ) > 0 ){
for(int j = 0; j < v.size(); j++ ){
for (int i = 0; i < v[0].size(); ++i){
out << v[j][i] << "\t";
}
out << "---------------" << std::endl;
}
out << "\n\n\n" << std::string(10,'-') << std::endl;
out << "\nQauntization Table:" << std::endl;
for(int i = 0; i < quant_tbl.size(); i++ ){
out << quant_tbl[i] << "\t";
}
}
else{
std::cout << "Can not read, Returned With Error";
return -1;
}
out.close();
return 0;
}

Resources