Is there a more efficient way to load a large Mat object into memory than the FileStorage method in OpenCV?
I have a large Mat with 192 columns and 1 million rows I want to store locally in a file and load into memory then my application starts. There is no problem using the FileStorage, but I was wondering if there exists a more efficient method to do this. At the moment it takes about 5 minutes to load the Mat into memory using the Debug mode in Visual Studio and around 3 minutes in the Release mode and the size of the data file is around 1.2GB.
Is the FileStorage method the only method available to do this task?
Are you ok with a 100x speedup?
You should save and load your images in binary format. You can do that with the matwrite and matread function in the code below.
I tested both loading from a FileStorage and the binary file, and for a smaller image with 250K rows, 192 columns, type CV_8UC1 I got these results (time in ms):
// Mat: 250K rows, 192 cols, type CV_8UC1
Using FileStorage: 5523.45
Using Raw: 50.0879
On a image with 1M rows and 192 cols using the binary mode I got (time in ms):
// Mat: 1M rows, 192 cols, type CV_8UC1
Using FileStorage: (can't load, out of memory)
Using Raw: 197.381
NOTE
Never measure performance in debug.
3 minutes to load a matrix seems way too much, even for FileStorages. However, you'll gain a lot switching to binary mode.
Here the code with the functions matwrite and matread, and the test:
#include <opencv2\opencv.hpp>
#include <iostream>
#include <fstream>
using namespace std;
using namespace cv;
void matwrite(const string& filename, const Mat& mat)
{
ofstream fs(filename, fstream::binary);
// Header
int type = mat.type();
int channels = mat.channels();
fs.write((char*)&mat.rows, sizeof(int)); // rows
fs.write((char*)&mat.cols, sizeof(int)); // cols
fs.write((char*)&type, sizeof(int)); // type
fs.write((char*)&channels, sizeof(int)); // channels
// Data
if (mat.isContinuous())
{
fs.write(mat.ptr<char>(0), (mat.dataend - mat.datastart));
}
else
{
int rowsz = CV_ELEM_SIZE(type) * mat.cols;
for (int r = 0; r < mat.rows; ++r)
{
fs.write(mat.ptr<char>(r), rowsz);
}
}
}
Mat matread(const string& filename)
{
ifstream fs(filename, fstream::binary);
// Header
int rows, cols, type, channels;
fs.read((char*)&rows, sizeof(int)); // rows
fs.read((char*)&cols, sizeof(int)); // cols
fs.read((char*)&type, sizeof(int)); // type
fs.read((char*)&channels, sizeof(int)); // channels
// Data
Mat mat(rows, cols, type);
fs.read((char*)mat.data, CV_ELEM_SIZE(type) * rows * cols);
return mat;
}
int main()
{
// Save the random generated data
{
Mat m(1024*256, 192, CV_8UC1);
randu(m, 0, 1000);
FileStorage fs("fs.yml", FileStorage::WRITE);
fs << "m" << m;
matwrite("raw.bin", m);
}
// Load the saved matrix
{
// Method 1: using FileStorage
double tic = double(getTickCount());
FileStorage fs("fs.yml", FileStorage::READ);
Mat m1;
fs["m"] >> m1;
double toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << "Using FileStorage: " << toc << endl;
}
{
// Method 2: usign raw binary data
double tic = double(getTickCount());
Mat m2 = matread("raw.bin");
double toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << "Using Raw: " << toc << endl;
}
int dummy;
cin >> dummy;
return 0;
}
Related
I compiled this simple color tracking Image Processing program using OpenCV and Visual Studio 2012.
First I compiled it using CPU.
Program:
#include <iostream>
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <time.h>
using namespace cv;
using namespace std;
int main( int argc, char** argv )
{
time_t t= time(0);
VideoCapture cap(0); //capture the video from web cam
if ( !cap.isOpened() ) // if not success, exit program
{
cout << "Cannot open the web cam" << endl;
return -1;
}
double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
cout << "Frame size : " << dWidth << " x " << dHeight << endl;
namedWindow("Control", CV_WINDOW_AUTOSIZE); //create a window called "Control"
int iLowH = 0;
int iHighH = 179;
int iLowS = 0;
int iHighS = 255;
int iLowV = 0;
int iHighV = 255;
//Create track bars in "Control" window
cvCreateTrackbar("LowH", "Control", &iLowH, 179); //Hue (0 - 179)
cvCreateTrackbar("HighH", "Control", &iHighH, 179);
cvCreateTrackbar("LowS", "Control", &iLowS, 255); //Saturation (0 - 255)
cvCreateTrackbar("HighS", "Control", &iHighS, 255);
cvCreateTrackbar("LowV", "Control", &iLowV, 255); //Value (0 - 255)
cvCreateTrackbar("HighV", "Control", &iHighV, 255);
int fps=0;
int cur=0;
while (true)
{
fps++;
t=time(0);
struct tm *tmp = gmtime(&t);
int h= (t/360) %24;
int m= (t/60) %60;
int s = t%60;
if(cur !=s)
{
cout<<fps<<endl;
fps=0;
cur=s;
}
Mat imgOriginal;
bool bSuccess = cap.read(imgOriginal); // read a new frame from video
if (!bSuccess) //if not success, break loop
{
cout << "Cannot read a frame from video stream" << endl;
break;
}
Mat imgHSV;
cvtColor(imgOriginal, imgHSV, COLOR_BGR2HSV); //Convert the captured frame from BGR to HSV
Mat imgThresholded;
inRange(imgHSV, Scalar(iLowH, iLowS, iLowV), Scalar(iHighH, iHighS, iHighV), imgThresholded); //Threshold the image
//morphological opening (remove small objects from the foreground)
erode(imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
dilate( imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
//morphological closing (fill small holes in the foreground)
dilate( imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
erode(imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
imshow("Thresholded Image", imgThresholded); //show the thresholded image
imshow("Original", imgOriginal); //show the original image
if (waitKey(30) == 27) //wait for 'esc' key press for 30ms. If 'esc' key is pressed, break loop
{
cout << "esc key is pressed by user" << endl;
break;
}
}
return 0;
}
My Camera was giving an fps of 16.
Then I compiled this program using OpenCL (GPU Support).
Program:
#include <iostream>
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <opencv2/ocl/ocl.hpp>
#include <time.h>
using namespace cv;
using namespace std;
int main( int argc, char** argv )
{
time_t t= time(0);
VideoCapture cap(0); //capture the video from web cam
if ( !cap.isOpened() ) // if not success, exit program
{
cout << "Cannot open the web cam" << endl;
return -1;
}
double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
cout << "Frame size : " << dWidth << " x " << dHeight << endl;
namedWindow("Control", CV_WINDOW_AUTOSIZE); //create a window called "Control"
int iLowH = 0;
int iHighH = 179;
int iLowS = 0;
int iHighS = 255;
int iLowV = 0;
int iHighV = 255;
//Create track bars in "Control" window
cvCreateTrackbar("LowH", "Control", &iLowH, 179); //Hue (0 - 179)
cvCreateTrackbar("HighH", "Control", &iHighH, 179);
cvCreateTrackbar("LowS", "Control", &iLowS, 255); //Saturation (0 - 255)
cvCreateTrackbar("HighS", "Control", &iHighS, 255);
cvCreateTrackbar("LowV", "Control", &iLowV, 255); //Value (0 - 255)
cvCreateTrackbar("HighV", "Control", &iHighV, 255);
int fps=0;
int cur=0;
while (true)
{
fps++;
t=time(0);
struct tm *tmp = gmtime(&t);
int h= (t/360) %24;
int m= (t/60) %60;
int s = t%60;
if(cur !=s)
{
cout<<fps<<endl;
fps=0;
cur=s;
}
Mat imgOriginal;
bool bSuccess = cap.read(imgOriginal); // read a new frame from video
if (!bSuccess) //if not success, break loop
{
cout << "Cannot read a frame from video stream" << endl;
break;
}
Mat imgHSV;
cvtColor(imgOriginal, imgHSV, COLOR_BGR2HSV); //Convert the captured frame from BGR to HSV
Mat imgThresholded;
inRange(imgHSV, Scalar(iLowH, iLowS, iLowV), Scalar(iHighH, iHighS, iHighV), imgThresholded); //Threshold the image
//morphological opening (remove small objects from the foreground)
ocl::oclMat alpha(imgThresholded);
ocl::erode(alpha,alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
ocl::dilate( alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
//morphological closing (fill small holes in the foreground)
ocl::dilate( alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
ocl::erode(alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
imgThresholded = Mat(alpha);
imshow("Thresholded Image", imgThresholded); //show the thresholded image
imshow("Original", imgOriginal); //show the original image
if (waitKey(30) == 27) //wait for 'esc' key press for 30ms. If 'esc' key is pressed, break loop
{
cout << "esc key is pressed by user" << endl;
break;
}
}
return 0;
}
But now i am getting a fps = 10 . Please can someone tell Why is this Happening.
I read somewhere that GPU Support improves the fps performance. Graphic Card I am Using Is AMD RAEDON .
GPU is designed for massive throughput, but it takes lot of time to move the data from CPU memory to GPU memory. You should not think that GPU is always increasing the fps. It all depends on how well the power of GPU is harvested.
In your case it seems like you are doing quite little work for each frame. So my guess is that your system is using most of the time moving frames to the GPU and moving results back.
(as maZZZu commented)
You are doing serial computations. Add pipelining. Then when a frame is being captured, one last frame is being computed by opencl at the same time. You could overlap even more steps like:
get video data
copy to gpu
compute
get to cpu
visualise?
then only the biggest time consuming step will be visible on FPS. If copy to gpu is taking 20ms, then others will be hidden and program will show 50FPS.
- Time 1: get video data 1
- (Time 2: get video data 2) and (copy data 1 to gpu)
- (Time 3: get video data 3) and (copy data 2 to gpu) and (compute data 1)
- (Time 4: get video data 4) and (copy data 3 to gpu) and (compute data 2) and ..
- (Time 5: get video data 5) and (copy data 4 to gpu) and (compute data 3) and ..
- (Time 6: get video data 6) and (copy data 5 to gpu) and (compute data 4) and ..
- (Time 7: get video data 8) and (copy data 6 to gpu) and (compute data 5) and ..
so if copying to gpu takes %45 and getting result back takes %45 of time, FPS should increase by %90 with just hiding one of them behind other.
How do I compute ELA for an image? I would like to get similar ELA image using opencv http://fotoforensics.com/tutorial-ela.php
As per this tutorial, I resaved the image at 95% quality jpeg image and using absDiff method to compute the difference between the source image and the resaved image but all I am getting is zero difference.
Any help on how to compute the difference between two images so as to obtain the error level just like sample images in the tutorial?
The key to achieve a similar result is to use a variable value for the compression rate and a scale factor to make it easier to visualize the data.
Here's an example: we have the input image (left) and the processed image after some parameter adjustments (right):
As expected, the region with the christmas hat presents a different compression rate from the rest of the image. This result is very similar to what FotoForensics presents:
With a few tweaks on this code you can achieve an even closer result. The source code of this project can be found on my Github:
main.cpp:
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
#include <vector>
// Control
int scale = 15,
quality = 75;
// Image containers
cv::Mat input_image,
compressed_image;
void processImage(int, void*)
{
// Setting up parameters and JPEG compression
std::vector<int> parameters;
parameters.push_back(CV_IMWRITE_JPEG_QUALITY);
parameters.push_back(quality);
cv::imwrite("temp.jpg", input_image, parameters);
// Reading temp image from the disk
compressed_image = cv::imread("temp.jpg");
if (compressed_image.empty())
{
std::cout << "> Error loading temp image" << std::endl;
exit(EXIT_FAILURE);
}
cv::Mat output_image = cv::Mat::zeros(input_image.size(), CV_8UC3);
// Compare values through matrices
for (int row = 0; row < input_image.rows; ++row)
{
const uchar* ptr_input = input_image.ptr<uchar>(row);
const uchar* ptr_compressed = compressed_image.ptr<uchar>(row);
uchar* ptr_out = output_image.ptr<uchar>(row);
for (int column = 0; column < input_image.cols; column++)
{
// Calc abs diff for each color channel multiplying by a scale factor
ptr_out[0] = abs(ptr_input[0] - ptr_compressed[0]) * scale;
ptr_out[1] = abs(ptr_input[1] - ptr_compressed[1]) * scale;
ptr_out[2] = abs(ptr_input[2] - ptr_compressed[2]) * scale;
ptr_input += 3;
ptr_compressed += 3;
ptr_out += 3;
}
}
// Shows processed image
cv::imshow("Error Level Analysis", output_image);
}
int main (int argc, char* argv[])
{
// Verifica se o número de parâmetros necessário foi informado
if (argc < 2)
{
std::cout << "> You need to provide an image as parameter" << std::endl;
return EXIT_FAILURE;
}
// Read the image
input_image = cv::imread(argv[1]);
// Check image load
if (input_image.empty())
{
std::cout << "> Error loading input image" << std::endl;
return EXIT_FAILURE;
}
// Set up window and trackbar
cv::namedWindow("Error Level Analysis", CV_WINDOW_AUTOSIZE);
cv::imshow("Error Level Analysis", input_image);
cv::createTrackbar("Scale", "Error Level Analysis", &scale, 100, processImage);
cv::createTrackbar("Quality", "Error Level Analysis", &quality, 100, processImage);
// Press 'q' to quit
while (char(cv::waitKey(0)) != 'q') {};
return EXIT_SUCCESS;
}
Here are some nice references that were used to build this mash-up:
ELA with HTML5
FotoForensics Tutorial
Blackhat USA '07 Paper
I have a buffer which contains an image in YV12 format. Now I want to either convert this buffer to RGB format or create a Mat object from it directly! Can someone help me? I tried this code :
cv::Mat input(widthOfImg, heightOfImg, CV_8UC1, vy12Buffer);
cv::Mat converted;
cv::cvtColor(input, converted, CV_YUV2RGB_YV12);
That's possible.
cv::Mat picYV12 = cv::Mat(nHeight * 3/2, nWidth, CV_8UC1, yv12DataBuffer);
cv::Mat picBGR;
cv::cvtColor(picYV12, picBGR, CV_YUV2BGR_YV12);
cv::imwrite("test.bmp", picBGR); //only for test
Opencv color conversion flags
The height is multiplied by 3/2 because there are 4 Y samples, and 1 U and 1 V sample stored for every 2x2 square of pixels. This results in a byte sample to pixel ratio of 3/2
4*1+1+1 samples per 2*2 pixels = 6/4 = 3/2
YV12 Format
Correction: In the last version of OpenCV (i use oldest 2.4.13 version) is color conversion code changed to
COLOR_YUV2BGR_YV12
cv::cvtColor(picYV12, picBGR, COLOR_YUV2BGR_YV12);
here is the corresponding version in java (Android)...
This method was faster than other techniques like renderscript or opengl(glReadPixels) for getting bitmap from yuv12/i420 data stream (tested with webrtc i420 ).
long startTimei = SystemClock.uptimeMillis();
Mat picyv12 = new Mat(768,512,CV_8UC1); //(im_height*3/2,im_width), should be even no...
picyv12.put(0,0,return_buff); // buffer - byte array with i420 data
Imgproc.cvtColor(picyv12,picyv12,COLOR_YUV2RGB_YV12);// or use COLOR_YUV2BGR_YV12 depending on output result
long endTimei = SystemClock.uptimeMillis();
Log.d("i420_time", Long.toString(endTimei - startTimei));
Log.d("picyv12_size", picyv12.size().toString()); // Check size
Log.d("picyv12_type", String.valueOf(picyv12.type())); // Check type
Utils.matToBitmap(picyv12,tbmp2); // Convert mat to bitmap (height, width) i.e (512,512) - ARGB_888
save(tbmp2,"itest"); // Save bitmap
That's impossible.
Y'UV420p is a planar format, meaning that the Y', U, and V values are
grouped together instead of interspersed. The reason for this is that
by grouping the U and V values together, the image becomes much more
compressible. When given an array of an image in the Y'UV420p format,
all the Y' values come first, followed by all the U values, followed
finally by all the V values.
but cv::Mat is a RGB color model, and arranged like B0 G0 R0 B1 G1 R1... So,we can't create a Mat object from a YV12 buffer directly.
Here is an example:
cv::Mat Yv12ToRgb( uchar *pBuffer,long bufferSize, int width,int height )
{
cv::Mat result(height,width,CV_8UC3);
uchar y,cb,cr;
long ySize=width*height;
long uSize;
uSize=ySize>>2;
assert(bufferSize==ySize+uSize*2);
uchar *output=result.data;
uchar *pY=pBuffer;
uchar *pU=pY+ySize;
uchar *pV=pU+uSize;
uchar r,g,b;
for (int i=0;i<uSize;++i)
{
for(int j=0;j<4;++j)
{
y=pY[i*4+j];
cb=ucharpU[i];
cr=ucharpV[i];
//ITU-R standard
b=saturate_cast<uchar>(y+1.772*(cb-128));
g=saturate_cast<uchar>(y-0.344*(cb-128)-0.714*(cr-128));
r=saturate_cast<uchar>(y+1.402*(cr-128));
*output++=b;
*output++=g;
*output++=r;
}
}
return result;
}
You can try as YUV_I420 array
char filePath[3000];
int width, height;
cout << "file path = ";
cin >> filePath;
cout << "width = ";
cin >> width;
cout << "height = ";
cin >> height;
FILE *pFile = fopen(filePath, "rb");
unsigned char* buff = new unsigned char[width * height *3 / 2];
fread(buff, 1, width * height* 3 / 2, pFile);
fclose(pFile);
cv::Mat imageRGB;
cv::Mat picI420 = cv::Mat(height * 3 / 2, width, CV_8UC1, buff);
cv::cvtColor(picI420, imageRGB, CV_YUV2BGRA_I420);
imshow("imageRGB", imageRGB);
waitKey(0);
i began to implement some simple image processing using cuda but i have an error in my code
the error happens when i copy pixels from device to host
this is my try
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <opencv2\core\core.hpp>
#include <opencv2\highgui\highgui.hpp>
#include <stdio.h>
using namespace cv;
unsigned char *h_pixels;
unsigned char *d_pixels;
int bufferSize;
int width,height;
const int BLOCK_SIZE = 32;
Mat image;
void get_pixels(const char* fileName)
{
image = imread(fileName);
bufferSize = image.size().width * image.size().height * 3 * sizeof(unsigned char);
width = image.size().width;
height = image.size().height;
h_pixels = new unsigned char[bufferSize];
memcpy(h_pixels,image.data,bufferSize);
}
__global__ void invert_image(unsigned char* pixels,int width,int height)
{
int row = blockIdx.y * BLOCK_SIZE + threadIdx.y;
int col = blockIdx.x * BLOCK_SIZE + threadIdx.x;
int cidx = (row * width + col) * 3;
pixels[cidx] = 255 - pixels[cidx];
pixels[cidx + 1] = 255 - pixels[cidx + 1];
pixels[cidx + 2] = 255 - pixels[cidx + 2];
}
int main()
{
get_pixels("D:\\photos\\z.jpg");
cudaError_t err = cudaMalloc((void**)&d_pixels,bufferSize);
err = cudaMemcpy(d_pixels,h_pixels,bufferSize,cudaMemcpyHostToDevice);
dim3 dimBlock(BLOCK_SIZE,BLOCK_SIZE);
dim3 dimGrid(width/dimBlock.x,height/dimBlock.y);
invert_image<<<dimBlock,dimGrid>>>(d_pixels,width,height);
unsigned char *pixels = new unsigned char[bufferSize];
err= cudaMemcpy(pixels,d_pixels,bufferSize,cudaMemcpyDeviceToHost);// unknown error
const char * errStr = cudaGetErrorString(err);
cudaFree(d_pixels);
image.data = pixels;
namedWindow("display image");
imshow("display image",image);
waitKey();
return 0;
}
also how can i find out error that occurs in cuda device
thanks for your help
OpenCV images are not continuous. Each row is 4 byte or 8 byte aligned. You should also pass the step field of the Mat to the CUDA kernel, so that you can calculate the cidx correctly. The generic formula to calculate the output index is:
cidx = row * (step/elementSize) + (NumberOfChannels * col);
in your case, it will be:
cidx = row * step + (3 * col);
Referring to the alignment of images, you buffer size is equal to image.step * image.size().height.
Next thing is the one pointed out by #phoad in the third point. You should create enough number of thread blocks to cover the whole image.
Here is a generic formula for Grid which will create enough number of blocks for any image size.
dim3 block(BLOCK_SIZE,BLOCK_SIZE);
dim3 grid((width + block.x - 1)/block.x,(height + block.y - 1)/block.y);
First of all be sure that the image file is read correctly.
Check if the device memory is allocated with CUDA_SAFE_CALL(cudaMalloc(..))
Check the dimensions of the image. If the dimension of the image is not multiples of BLOCKSIZE than you might be missing some indices and the image is not fully inverted.
Call cudaDeviceSynchronize after the kernel call and check its return value.
Do you get any error when you run the code without calling the kernel anyway?
You are not freeing the h_pixels and might have a memory leak.
Instead of using BLOCKSIZE in the kernel you might use "blockDim.x". So calculating indices like "blockIdx.x * blockDim.x + threadIdx.x"
Try to do not touch the memory area in the kernel code, namely comment out the memory updates at the kernel (the lines where you access the pixels array) and check if the program continues to fail. If it does not continue to fail you might be accessing out of the bounds.
Use this command immediately after the kernel invocation to print the kernel errors:
printf("error code: %s\n",cudaGetErrorString(cudaGetLastError()))
Is there a way to easily extract the DCT coefficients (and quantization parameters) from encoded images and video? Any decoder software must be using them to decode block-DCT encoded images and video. So I'm pretty sure the decoder knows what they are. Is there a way to expose them to whomever is using the decoder?
I'm implementing some video quality assessment algorithms that work directly in the DCT domain. Currently, the majority of my code uses OpenCV, so it would be great if anyone knows of a solution using that framework. I don't mind using other libraries (perhaps libjpeg, but that seems to be for still images only), but my primary concern is to do as little format-specific work as possible (I don't want to reinvent the wheel and write my own decoders). I want to be able to open any video/image (H.264, MPEG, JPEG, etc) that OpenCV can open, and if it's block DCT-encoded, to get the DCT coefficients.
In the worst case, I know that I can write up my own block DCT code, run the decompressed frames/images through it and then I'd be back in the DCT domain. That's hardly an elegant solution, and I hope I can do better.
Presently, I use the fairly common OpenCV boilerplate to open images:
IplImage *image = cvLoadImage(filename);
// Run quality assessment metric
The code I'm using for video is equally trivial:
CvCapture *capture = cvCaptureFromAVI(filename);
while (cvGrabFrame(capture))
{
IplImage *frame = cvRetrieveFrame(capture);
// Run quality assessment metric on frame
}
cvReleaseCapture(&capture);
In both cases, I get a 3-channel IplImage in BGR format. Is there any way I can get the DCT coefficients as well?
Well, I did a bit of reading and my original question seems to be an instance of wishful thinking.
Basically, it's not possible to get the DCT coefficients from H.264 video frames for the simple reason that H.264 doesn't use DCT. It uses a different transform (integer transform). Next, the coefficients for that transform don't necessarily change on a frame-by-frame basis -- H.264 is smarter cause it splits up frames into slices. It should be possible to get those coefficients through a special decoder, but I doubt OpenCV exposes it for the user.
For JPEG, things are a bit more positive. As I suspected, libjpeg exposes the DCT coefficients for you. I wrote a small app to show that it works (source at the end). It makes a new image using the DC term from each block. Because the DC term is equal to the block average (after proper scaling), the DC images are downsampled versions of the input JPEG image.
EDIT: fixed scaling in source
Original image (512 x 512):
DC images (64x64): luma Cr Cb RGB
Source (C++):
#include <stdio.h>
#include <assert.h>
#include <cv.h>
#include <highgui.h>
extern "C"
{
#include "jpeglib.h"
#include <setjmp.h>
}
#define DEBUG 0
#define OUTPUT_IMAGES 1
/*
* Extract the DC terms from the specified component.
*/
IplImage *
extract_dc(j_decompress_ptr cinfo, jvirt_barray_ptr *coeffs, int ci)
{
jpeg_component_info *ci_ptr = &cinfo->comp_info[ci];
CvSize size = cvSize(ci_ptr->width_in_blocks, ci_ptr->height_in_blocks);
IplImage *dc = cvCreateImage(size, IPL_DEPTH_8U, 1);
assert(dc != NULL);
JQUANT_TBL *tbl = ci_ptr->quant_table;
UINT16 dc_quant = tbl->quantval[0];
#if DEBUG
printf("DCT method: %x\n", cinfo->dct_method);
printf
(
"component: %d (%d x %d blocks) sampling: (%d x %d)\n",
ci,
ci_ptr->width_in_blocks,
ci_ptr->height_in_blocks,
ci_ptr->h_samp_factor,
ci_ptr->v_samp_factor
);
printf("quantization table: %d\n", ci);
for (int i = 0; i < DCTSIZE2; ++i)
{
printf("% 4d ", (int)(tbl->quantval[i]));
if ((i + 1) % 8 == 0)
printf("\n");
}
printf("raw DC coefficients:\n");
#endif
JBLOCKARRAY buf =
(cinfo->mem->access_virt_barray)
(
(j_common_ptr)cinfo,
coeffs[ci],
0,
ci_ptr->v_samp_factor,
FALSE
);
for (int sf = 0; (JDIMENSION)sf < ci_ptr->height_in_blocks; ++sf)
{
for (JDIMENSION b = 0; b < ci_ptr->width_in_blocks; ++b)
{
int intensity = 0;
intensity = buf[sf][b][0]*dc_quant/DCTSIZE + 128;
intensity = MAX(0, intensity);
intensity = MIN(255, intensity);
cvSet2D(dc, sf, (int)b, cvScalar(intensity));
#if DEBUG
printf("% 2d ", buf[sf][b][0]);
#endif
}
#if DEBUG
printf("\n");
#endif
}
return dc;
}
IplImage *upscale_chroma(IplImage *quarter, CvSize full_size)
{
IplImage *full = cvCreateImage(full_size, IPL_DEPTH_8U, 1);
cvResize(quarter, full, CV_INTER_NN);
return full;
}
GLOBAL(int)
read_JPEG_file (char * filename, IplImage **dc)
{
/* This struct contains the JPEG decompression parameters and pointers to
* working space (which is allocated as needed by the JPEG library).
*/
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
/* More stuff */
FILE * infile; /* source file */
/* In this example we want to open the input file before doing anything else,
* so that the setjmp() error recovery below can assume the file is open.
* VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
* requires it in order to read binary files.
*/
if ((infile = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
return 0;
}
/* Step 1: allocate and initialize JPEG decompression object */
cinfo.err = jpeg_std_error(&jerr);
/* Now we can initialize the JPEG decompression object. */
jpeg_create_decompress(&cinfo);
/* Step 2: specify data source (eg, a file) */
jpeg_stdio_src(&cinfo, infile);
/* Step 3: read file parameters with jpeg_read_header() */
(void) jpeg_read_header(&cinfo, TRUE);
/* We can ignore the return value from jpeg_read_header since
* (a) suspension is not possible with the stdio data source, and
* (b) we passed TRUE to reject a tables-only JPEG file as an error.
* See libjpeg.txt for more info.
*/
/* Step 4: set parameters for decompression */
/* In this example, we don't need to change any of the defaults set by
* jpeg_read_header(), so we do nothing here.
*/
jvirt_barray_ptr *coeffs = jpeg_read_coefficients(&cinfo);
IplImage *y = extract_dc(&cinfo, coeffs, 0);
IplImage *cb_q = extract_dc(&cinfo, coeffs, 1);
IplImage *cr_q = extract_dc(&cinfo, coeffs, 2);
IplImage *cb = upscale_chroma(cb_q, cvGetSize(y));
IplImage *cr = upscale_chroma(cr_q, cvGetSize(y));
cvReleaseImage(&cb_q);
cvReleaseImage(&cr_q);
#if OUTPUT_IMAGES
cvSaveImage("y.png", y);
cvSaveImage("cb.png", cb);
cvSaveImage("cr.png", cr);
#endif
*dc = cvCreateImage(cvGetSize(y), IPL_DEPTH_8U, 3);
assert(dc != NULL);
cvMerge(y, cr, cb, NULL, *dc);
cvReleaseImage(&y);
cvReleaseImage(&cb);
cvReleaseImage(&cr);
/* Step 7: Finish decompression */
(void) jpeg_finish_decompress(&cinfo);
/* We can ignore the return value since suspension is not possible
* with the stdio data source.
*/
/* Step 8: Release JPEG decompression object */
/* This is an important step since it will release a good deal of memory. */
jpeg_destroy_decompress(&cinfo);
fclose(infile);
return 1;
}
int
main(int argc, char **argv)
{
int ret = 0;
if (argc != 2)
{
fprintf(stderr, "usage: %s filename.jpg\n", argv[0]);
return 1;
}
IplImage *dc = NULL;
ret = read_JPEG_file(argv[1], &dc);
assert(dc != NULL);
IplImage *rgb = cvCreateImage(cvGetSize(dc), IPL_DEPTH_8U, 3);
cvCvtColor(dc, rgb, CV_YCrCb2RGB);
#if OUTPUT_IMAGES
cvSaveImage("rgb.png", rgb);
#else
cvNamedWindow("DC", CV_WINDOW_AUTOSIZE);
cvShowImage("DC", rgb);
cvWaitKey(0);
#endif
cvReleaseImage(&dc);
cvReleaseImage(&rgb);
return 0;
}
You can use, libjpeg to extract dct data of your jpeg file, but for h.264 video file, I can't find any open source code that give you dct data (actully Integer dct data). But you can use h.264 open source software like JM, JSVM or x264. In these two source file, you have to find their specific function that make use of dct function, and change it to your desire form, to get your output dct data.
For Image:
use the following code, and after read_jpeg_file( infilename, v, quant_tbl ), v and quant_tbl will have dct data and quantization table of your jpeg image respectively.
I used Qvector to store my output data, change it to your preferred c++ array list.
#include <iostream>
#include <stdio.h>
#include <jpeglib.h>
#include <stdlib.h>
#include <setjmp.h>
#include <fstream>
#include <QVector>
int read_jpeg_file( char *filename, QVector<QVector<int> > &dct_coeff, QVector<unsigned short> &quant_tbl)
{
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
FILE * infile;
if ((infile = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
return 0;
}
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
jpeg_stdio_src(&cinfo, infile);
(void) jpeg_read_header(&cinfo, TRUE);
jvirt_barray_ptr *coeffs_array = jpeg_read_coefficients(&cinfo);
for (int ci = 0; ci < 1; ci++)
{
JBLOCKARRAY buffer_one;
JCOEFPTR blockptr_one;
jpeg_component_info* compptr_one;
compptr_one = cinfo.comp_info + ci;
for (int by = 0; by < compptr_one->height_in_blocks; by++)
{
buffer_one = (cinfo.mem->access_virt_barray)((j_common_ptr)&cinfo, coeffs_array[ci], by, (JDIMENSION)1, FALSE);
for (int bx = 0; bx < compptr_one->width_in_blocks; bx++)
{
blockptr_one = buffer_one[0][bx];
QVector<int> tmp;
for (int bi = 0; bi < 64; bi++)
{
tmp.append(blockptr_one[bi]);
}
dct_coeff.push_back(tmp);
}
}
}
// coantization table
j_decompress_ptr dec_cinfo = (j_decompress_ptr) &cinfo;
jpeg_component_info *ci_ptr = &dec_cinfo->comp_info[0];
JQUANT_TBL *tbl = ci_ptr->quant_table;
for(int ci =0 ; ci < 64; ci++){
quant_tbl.append(tbl->quantval[ci]);
}
return 1;
}
int main()
{
QVector<QVector<int> > v;
QVector<unsigned short> quant_tbl;
char *infilename = "your_image.jpg";
std::ofstream out;
out.open("out_dct.txt");
if( read_jpeg_file( infilename, v, quant_tbl ) > 0 ){
for(int j = 0; j < v.size(); j++ ){
for (int i = 0; i < v[0].size(); ++i){
out << v[j][i] << "\t";
}
out << "---------------" << std::endl;
}
out << "\n\n\n" << std::string(10,'-') << std::endl;
out << "\nQauntization Table:" << std::endl;
for(int i = 0; i < quant_tbl.size(); i++ ){
out << quant_tbl[i] << "\t";
}
}
else{
std::cout << "Can not read, Returned With Error";
return -1;
}
out.close();
return 0;
}