Error level analysis in Image - opencv

How do I compute ELA for an image? I would like to get similar ELA image using opencv http://fotoforensics.com/tutorial-ela.php
As per this tutorial, I resaved the image at 95% quality jpeg image and using absDiff method to compute the difference between the source image and the resaved image but all I am getting is zero difference.
Any help on how to compute the difference between two images so as to obtain the error level just like sample images in the tutorial?

The key to achieve a similar result is to use a variable value for the compression rate and a scale factor to make it easier to visualize the data.
Here's an example: we have the input image (left) and the processed image after some parameter adjustments (right):
As expected, the region with the christmas hat presents a different compression rate from the rest of the image. This result is very similar to what FotoForensics presents:
With a few tweaks on this code you can achieve an even closer result. The source code of this project can be found on my Github:
main.cpp:
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
#include <vector>
// Control
int scale = 15,
quality = 75;
// Image containers
cv::Mat input_image,
compressed_image;
void processImage(int, void*)
{
// Setting up parameters and JPEG compression
std::vector<int> parameters;
parameters.push_back(CV_IMWRITE_JPEG_QUALITY);
parameters.push_back(quality);
cv::imwrite("temp.jpg", input_image, parameters);
// Reading temp image from the disk
compressed_image = cv::imread("temp.jpg");
if (compressed_image.empty())
{
std::cout << "> Error loading temp image" << std::endl;
exit(EXIT_FAILURE);
}
cv::Mat output_image = cv::Mat::zeros(input_image.size(), CV_8UC3);
// Compare values through matrices
for (int row = 0; row < input_image.rows; ++row)
{
const uchar* ptr_input = input_image.ptr<uchar>(row);
const uchar* ptr_compressed = compressed_image.ptr<uchar>(row);
uchar* ptr_out = output_image.ptr<uchar>(row);
for (int column = 0; column < input_image.cols; column++)
{
// Calc abs diff for each color channel multiplying by a scale factor
ptr_out[0] = abs(ptr_input[0] - ptr_compressed[0]) * scale;
ptr_out[1] = abs(ptr_input[1] - ptr_compressed[1]) * scale;
ptr_out[2] = abs(ptr_input[2] - ptr_compressed[2]) * scale;
ptr_input += 3;
ptr_compressed += 3;
ptr_out += 3;
}
}
// Shows processed image
cv::imshow("Error Level Analysis", output_image);
}
int main (int argc, char* argv[])
{
// Verifica se o número de parâmetros necessário foi informado
if (argc < 2)
{
std::cout << "> You need to provide an image as parameter" << std::endl;
return EXIT_FAILURE;
}
// Read the image
input_image = cv::imread(argv[1]);
// Check image load
if (input_image.empty())
{
std::cout << "> Error loading input image" << std::endl;
return EXIT_FAILURE;
}
// Set up window and trackbar
cv::namedWindow("Error Level Analysis", CV_WINDOW_AUTOSIZE);
cv::imshow("Error Level Analysis", input_image);
cv::createTrackbar("Scale", "Error Level Analysis", &scale, 100, processImage);
cv::createTrackbar("Quality", "Error Level Analysis", &quality, 100, processImage);
// Press 'q' to quit
while (char(cv::waitKey(0)) != 'q') {};
return EXIT_SUCCESS;
}
Here are some nice references that were used to build this mash-up:
ELA with HTML5
FotoForensics Tutorial
Blackhat USA '07 Paper

Related

How to use cv::decode (access image) correct?

I need help with the following problem:
Task script:
read in the message sensor_msgs/PointCloud2, display Bird Eye View image and save (png or jpg).
Desired new function:
Send out the displayed images directly as an image message.
Problem:
cv::Mat *bgr is the matrix that contains the image and gives it to a map (for visualisation only).
Solutions by others/so far:
opencv read jpeg image from buffer //
How to use cv::imdecode, if the contents of an image file are in a char array?
Using different member functions, but unsuccessful.
Code reduced to necessary snippets
(complete version here: https://drive.google.com/file/d/1HI3E4nM9mQ--oNh1Q7zfwRFGJB5JRiGD/view?usp=sharing)
// Global Publishers/Subscribers
ros::Subscriber subPointCloud;
ros::Publisher pubPointCloud;
image_transport::Publisher pubImage;
pcl::PointCloud<pcl::PointXYZ>::Ptr cloud (new pcl::PointCloud<pcl::PointXYZ>);
pcl::PointCloud<pcl::PointXYZ>::Ptr cloud_grid (new pcl::PointCloud<pcl::PointXYZ>);
sensor_msgs::PointCloud2 output;
// create Matrix to store pointcloud data
cv::Mat *heightmap, *hsv, *bgr;
std::vector<int> compression_params;
std::vector<String> fn; //filename
cv::Mat image;
// main generation function
void DEM(const sensor_msgs::PointCloud2ConstPtr& pointCloudMsg)
{
ROS_DEBUG("Point Cloud Received");
// clear cloud and height map array
lowest = FLT_MAX;
for(int i = 0; i < IMAGE_HEIGHT; ++i){
for(int j = 0; j < IMAGE_WIDTH; ++j){
heightArray[i][j] = (double)(-FLT_MAX);
}
}
// Convert from ROS message to PCL point cloud
pcl::fromROSMsg(*pointCloudMsg, *cloud);
// Populate the DEM grid by looping through every point
int row, column;
for(size_t j = 0; j < cloud->points.size(); ++j){
// If the point is within the image size bounds
if(map_pc2rc(cloud->points[j].x, cloud->points[j].y, &row, &column) == 1 && row >= 0 && row < IMAGE_HEIGHT && column >=0 && column < IMAGE_WIDTH){
if(cloud->points[j].z > heightArray[row][column]){
heightArray[row][column] = cloud->points[j].z;
}
// Keep track of lowest point in cloud for flood fill
else if(cloud->points[j].z < lowest){
lowest = cloud->points[j].z;
}
}
}
// Create "point cloud" and opencv image to be published for visualization
int index = 0;
double x, y;
for(int i = 0; i < IMAGE_HEIGHT; ++i){
for(int j = 0; j < IMAGE_WIDTH; ++j){
// Add point to cloud
(void)map_rc2pc(&x, &y, i, j);
cloud_grid->points[index].x = x;
cloud_grid->points[index].y = y;
cloud_grid->points[index].z = heightArray[i][j];
++index;
// Add point to image
cv::Vec3b &pixel_hsv = hsv->at<cv::Vec3b>(i,j); // access pixels vector HSV
cv::Vec3b &pixel_bgr = heightmap->at<cv::Vec3b>(i,j); // access pixels vector BGR
if(heightArray[i][j] > -FLT_MAX){
//Coloured Pixel Pointcloud
pixel_hsv[0] = map_m2i(heightArray[i][j]); // H - color value (hue)
pixel_hsv[1] = 255; // S -color saturation
pixel_hsv[2] = 255; // V - brightness
// White Pixel PointCloud
pixel_bgr[0] = map_m2i(heightArray[i][j]); // B
pixel_bgr[1] = map_m2i(heightArray[i][j]); // G
pixel_bgr[2] = map_m2i(heightArray[i][j]); // R
}
else{
// Coloured Pixel Pointcloud
pixel_hsv[0] = 0;
pixel_hsv[1] = 0;
pixel_hsv[2] = 0;
// White Pixel Pointcloud
pixel_bgr[0] = 0;
pixel_bgr[1] = 0;
pixel_bgr[2] = 0; //map_m2i(lowest);
}
}
}
// Display image
cv::cvtColor(*hsv, *bgr, cv::COLOR_HSV2BGR); // HSV matrix (src) to BGR matrix (dst)
// Image denoising (filter strength, pixel size template patch, pixel size window)
//cv::fastNlMeansDenoising(*hsv,*bgr,30 , 7, 11);
// Image denoising (filter strength luminance, same colored, pixel size template patch, pixel size window)
//cv::fastNlMeansDenoisingColored(*hsv,*bgr,30 ,1, 7, 11);
// Plot HSV(colored) and BGR (b/w)
cv::imshow(WIN_NAME, *bgr); // show new HSV matrix
cv::imshow(WIN_NAME2, *heightmap); // show old BGR matrix
// Save image to disk
char filename[100];
// FLAG enable/disable saving function
if (save_to_disk == true)
{
// save JPG format
snprintf(filename, 100, "/home/pkatsoulakos/catkin_ws/images/image_%d.jpg", fnameCounter);
std::cout << filename << std::endl;
// JPG image writing
cv::imwrite(filename, *bgr, compression_params);
/* // generate pathnames matching a pattern
glob("/home/pkatsoulakos/catkin_ws/images/*.jpg",fn); // directory, filter pattern
// range based for loop
for (auto f:fn) // range declaration:range_expression
{
image = cv::imread(f, IMREAD_COLOR);
if (image.empty())
{
std::cout << "!!! Failed imread(): image not found" << std::endl;
}
}*/
// Approach 2
//cv::Mat rawdata(1, bgr,CV_8UC1,(void*)bgr);
image = cv::imdecode(cv::Mat(*bgr, CV_8UC3, CV_AUTO_STEP), IMREAD_COLOR);
//image = cv::imdecode(cv::Mat(*bgr, CV_8UC1), IMREAD_UNCHANGED);
if (image.data == NULL)
{
std::cout << "!!! Failed imread(): image not found" << std::endl;
}
/* // save PNG format
snprintf(filename, 100, "/home/pkatsoulakos/catkin_ws/images/image_%d.png", fnameCounter);
std::cout << filename << std::endl;
// PNG image writing
// cv::imwrite(filename, *heightmap, compression_params);*/
}
++fnameCounter;
// Output height map to point cloud for python node to parse to PNG
pcl::toROSMsg(*cloud_grid, output);
output.header.stamp = ros::Time::now();
output.header.frame_id = "yrl_cloud_id"; // fixed frame (oblique alignment) from LiDAR
pubPointCloud.publish(output);
// Publish bird_view img
cv_bridge::CvImage cv_bird_view;
cv_bird_view.header.stamp = ros::Time::now();
cv_bird_view.header.frame_id = "out_bev_image";
cv_bird_view.encoding = "bgr8";
cv_bird_view.image = image;
pubImage.publish(cv_bird_view.toImageMsg());
// Output Image
//sensor_msgs::ImagePtr msg = cv_bridge::CvImage(std_msgs::Header(), "bgr8", image).toImageMsg();
//pubImage.publish(msg);pubPoin
}
int main(int argc, char** argv)
{
ROS_INFO("Starting LIDAR Node");
ros::init(argc, argv, "lidar_node");
ros::NodeHandle nh;
image_transport::ImageTransport it(nh);
// Setup image
cv::Mat map(IMAGE_HEIGHT, IMAGE_WIDTH, CV_8UC3, cv::Scalar(0, 0, 0));
cv::Mat map2(IMAGE_HEIGHT, IMAGE_WIDTH, CV_8UC3, cv::Scalar(0, 0, 0));
cv::Mat map3(IMAGE_HEIGHT, IMAGE_WIDTH, CV_8UC3, cv::Scalar(0, 0, 0));
// H S V
// image container
heightmap = &map; // default source code (mcshiggings)
hsv = &map2; // added for hSV visualization
bgr = &map3; // for displaying colored Pc
cv::namedWindow(WIN_NAME, WINDOW_AUTOSIZE);
cv::namedWindow(WIN_NAME2, WINDOW_AUTOSIZE);
cv::startWindowThread();
cv::imshow(WIN_NAME, *bgr); // BGR visualization of HSV
cv::imshow(WIN_NAME2, *heightmap); // default visualization
// Setup Image Output Parameters
fnameCounter = 0;
lowest = FLT_MAX;
/* PNG compression param
compression_params.push_back(IMWRITE_PNG_COMPRESSION);
A higher value means a smaller size and longer compression time. Default value is 3.
compression_params.push_back(9); */
// JPG compression param
compression_params.push_back(IMWRITE_JPEG_QUALITY);
// from 0 to 100 (the higher is the better). Default value is 95.
compression_params.push_back(95);
// Setup indicies in point clouds
/*
int index = 0;
double x, y;
for(int i = 0; i < IMAGE_HEIGHT; ++i){
for(int j = 0; j < IMAGE_WIDTH; ++j){
index = i * j;
(void)map_rc2pc(&x, &y, i, j);
cloud_grid->points[index].x = x;
cloud_grid->points[index].y = y;
cloud_grid->points[index].z = (-FLT_MAX)master.log
}
*/
// subscriber and publisher
subPointCloud = nh.subscribe<sensor_msgs::PointCloud2>("/pointcloud", 2, DEM);
pubPointCloud = nh.advertise<sensor_msgs::PointCloud2> ("/heightmap/pointcloud", 1);
pubImage = it.advertise("/out_bev_image",1);
ros::spin();
return 0;
}
Thank you for any advice and suggested solutions.
You can't simply pass the char array to opencv functions to create an image because of how the data is formatted. PointCloud2 data fields are strictly containing information about where a point lives in 3d space(think [x,y,z]); this means nothing in terms of an actual image. Instead you have to first convert the pointcloud into something that better represents an image. Luckily, this already exists. Check out the CloudToImage ROS package.

Warning: Invalid resolution 0 dpi. Using 70 instead

I know this theme already exists, but I didn't find any solution for this.
I am trying to detect characters from picture in this code below:
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
#include <opencv2/opencv.hpp>
#include <sstream>
#include <memory>
#include <iostream>
#define path "/home/jovan/Pictures/"
void resize(cv::Mat &img);
PIX *mat8ToPix(const cv::Mat *mat8);
cv::Mat pix8ToMat(PIX *pix8);
int main(int argc, char **argv)
{
// Load image
std::stringstream ss;
ss << path;
ss << argv[1];
cv::Mat im = cv::imread(ss.str() );
if (im.empty())
{
std::cout<<"Cannot open source image!" << std::endl;
return EXIT_FAILURE;
}
resize(im);
cv::Mat gray;
cv::cvtColor(im, gray, CV_BGR2GRAY);
// Pass it to Tesseract API
tesseract::TessBaseAPI tess;
tess.Init(NULL, "eng", tesseract::OEM_DEFAULT);
tess.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
tess.SetVariable("tessedit_char_whitelist", "QWERTYUIOPASDFGHJKLZXCVBNM");
PIX *image = mat8ToPix(&im);
//tess.SetImage((uchar*)gray.data, gray.cols, gray.rows, 1, gray.cols);
tess.SetImage(image);
// Get the text
char* out = tess.GetUTF8Text();
if(out != nullptr)
std::cout << "here it is: "<< out << std::endl;
cv::imshow("image", im);
cv::imshow("gray", gray);
cv::waitKey();
return 0;
}
void resize(cv::Mat &img)
{
while(img.size().width >= 500 && img.size().height >= 500 )
cv::resize(img, img, cv::Size(img.size().width/2, img.size().height/2) );
}
PIX *mat8ToPix(const cv::Mat *mat8)
{
PIX *pixd = pixCreate(mat8->size().width, mat8->size().height, 8);
for(int y=0; y<mat8->rows; y++)
for(int x=0; x<mat8->cols; x++)
pixSetPixel(pixd, x, y, (l_uint32) mat8->at<uchar>(y,x));
return pixd;
}
cv::Mat pix8ToMat(PIX *pix8)
{
cv::Mat mat(cv::Size(pix8->w, pix8->h), CV_8UC1);
uint32_t *line = pix8->data;
for (uint32_t y = 0; y < pix8->h; ++y)
{
for (uint32_t x = 0; x < pix8->w; ++x)
mat.at<uchar>(y, x) = GET_DATA_BYTE(line, x);
line += pix8->wpl;
}
return mat;
}
whatever picture I put to process I get this on terminal:
$: Warning: Invalid resolution 0 dpi. Using 70 instead.
Does anyone have some solution?
Thanks in advance.
If you know the input image's resolution, you can call pixSetResolution on Leptonica Pix object.
Or use Tesseract API to pass in the value. See
Tess4j - Pdf to Tiff to tesseract - "Warning: Invalid resolution 0 dpi. Using 70 instead."
Maybe it helps: I used EMGU & C#, but I think it must be the same in C++:
ocr.SetVariable("user_defined_dpi", "70");
... and the message should disappear ;)
I had similar issue. Found out from here that dark background in the image is the problem. Inversion of the image colors worked.

Convert matlab image svd method to opencv

I want to write a program with opencv by c++ in the visual studio.
My code is following matlab code:
close all
clear all
clc
%reading and converting the image
inImage=imread('pic.jpg');
inImageD=double(inImage);
[U,S,V]=svd(inImageD);
% Using different number of singular values (diagonal of S) to compress and
% reconstruct the image
dispEr = [];
numSVals = [];
for N=5:25:300
% store the singular values in a temporary var
C = S;
% discard the diagonal values not required for compression
C(N+1:end,:)=0;
C(:,N+1:end)=0;
% Construct an Image using the selected singular values
D=U*C*V';
% display and compute error
figure;
buffer = sprintf('Image output using %d singular values', N)
imshow(uint8(D));
title(buffer);
error=sum(sum((inImageD-D).^2));
% store vals for display
dispEr = [dispEr; error];
numSVals = [numSVals; N];
end
What's your opinion to do this? I want to save image in a text file and retrieve it from file into the Mat array. I've written this part as follow:
Mat image;
FileStorage read_file("pic_file.txt", FileStorage::READ);
read_file["pic"] >> image;
read_file.release();
Mat P;
image.convertTo(P, CV_32FC3,1.0/255);
SVD svda(P); //or SVD::compute(P,W,U,V);
But I have problem with SVD function and it doesn't work. Is there anything to do for computing SVD compression of an image?
Thank You so much.
Vahids.
Here is my code:
int main(int argc, char* argv[])
{
// Image matrix
Mat img;
Mat result;
//---------------------------------------------
//
//---------------------------------------------
namedWindow("Source Image");
namedWindow("Result");
// Load image in grayscale mode
img=imread("D:\\ImagesForTest\\cat.bmp",0);
img.convertTo(img,CV_32FC1,1.0/255.0);
cout << "Source size:" << img.rows*img.cols <<" elements "<< endl;
// create SVD
cv::SVD s;
// svd result
Mat w,u,vt;
// computations ...
s.compute(img,w,u,vt);
// collect Sigma matrix (diagonal - is eigen values, other - zeros)
// we got it in as vector, transform it to diagonal matrix
Mat W=Mat::zeros(w.rows,w.rows,CV_32FC1);
for(int i=0;i<w.rows;i++)
{
W.at<float>(i,i)=w.at<float>(i);
}
// reduce rank to k
int k=25;
W=W(Range(0,k),Range(0,k));
u=u(Range::all(),Range(0,k));
vt=vt(Range(0,k),Range::all());
// Get compressed image
result=u*W*vt;
cout << "Result size:" << u.rows*u.cols+k+vt.rows*vt.cols <<" elements "<< endl;
//---------------------------------------------
//
//---------------------------------------------
imshow("Source Image", img);
imshow("Result", result);
cvWaitKey(0);
return 0;
}
Source and result images.

HOG detector: relation between detected roi size and training sample size

I'm experimenting people detector with opencv and HOGDescriptor c++ object: HOGDescriptor::getDefaultPeopleDetector(). Using the sample program peopledetect.cpp in the sample/cpp directory of the Opencv 2.4.3 repository and testing it against some of the INRIA dataset images.. it works quite well.
Now I want to try with some images I have to work with and, even if I try to change parameters.. it doesn't find anything.
I suppose it is because of the pedestrian in the image I have are much more smaller then the INRIA ones. So it should be better to train a new detector but before doing it..
Here my question:
Is it right? Is there a strict relationship between the images used for training and the detected ones? That means that HOG detector is not really scale invariant method..
In particular, what is the best size of the default HOGDescriptor::getDefaultPeopleDetector() ? Do I have to train a new detector for detect much smaller people?
Here is the peopledetect.cpp I'm using:
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <iostream>
using namespace cv;
using namespace std;
// static void help()
// {
// printf(
// "\nDemonstrate the use of the HoG descriptor using\n"
// " HOGDescriptor::hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());\n"
// "Usage:\n"
// "./peopledetect (<image_filename> | <image_list>.txt)\n\n");
// }
int main(int argc, char** argv)
{
std::cout << "OPENCV version: " << CV_MAJOR_VERSION << " " << CV_MINOR_VERSION << std::endl;
Mat img;
FILE* f = 0;
char _filename[1024];
if( argc == 1 )
{
printf("Usage: peopledetect (<image_filename> | <image_list>.txt)\n");
return 0;
}
img = imread(argv[1]);
if( img.data )
{
strcpy(_filename, argv[1]);
}
else
{
f = fopen(argv[1], "rt");
if(!f)
{
fprintf( stderr, "ERROR: the specified file could not be loaded\n");
return -1;
}
}
HOGDescriptor hog;
hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
namedWindow("people detector", 1);
for(;;)
{
char* filename = _filename;
if(f)
{
if(!fgets(filename, (int)sizeof(_filename)-2, f))
break;
//while(*filename && isspace(*filename))
// ++filename;
if(filename[0] == '#')
continue;
int l = (int)strlen(filename);
while(l > 0 && isspace(filename[l-1]))
--l;
filename[l] = '\0';
img = imread(filename);
}
printf("%s:\n", filename);
if(!img.data)
continue;
fflush(stdout);
vector<Rect> found, found_filtered;
double t = (double)getTickCount();
// run the detector with default parameters. to get a higher hit-rate
// (and more false alarms, respectively), decrease the hitThreshold and
// groupThreshold (set groupThreshold to 0 to turn off the grouping completely).
hog.detectMultiScale(img, found, 0, Size(8,8), Size(32,32), 1.05, 2);
t = (double)getTickCount() - t;
printf("tdetection time = %gms\n", t*1000./cv::getTickFrequency());
std::cout << "found: " << found.size() << std::endl;
size_t i, j;
for( i = 0; i < found.size(); i++ )
{
Rect r = found[i];
for( j = 0; j < found.size(); j++ )
if( j != i && (r & found[j]) == r)
break;
if( j == found.size() )
found_filtered.push_back(r);
}
for( i = 0; i < found_filtered.size(); i++ )
{
Rect r = found_filtered[i];
// the HOG detector returns slightly larger rectangles than the real objects.
// so we slightly shrink the rectangles to get a nicer output.
r.x += cvRound(r.width*0.1);
r.width = cvRound(r.width*0.8);
r.y += cvRound(r.height*0.07);
r.height = cvRound(r.height*0.8);
rectangle(img, r.tl(), r.br(), cv::Scalar(0,255,0), 3);
}
imshow("people detector", img);
int c = waitKey(0) & 255;
if( c == 'q' || c == 'Q' || !f)
break;
}
if(f)
fclose(f);
return 0;
}
HOG works with trained data. In order to use it efficiently, you have 3 possibilities:
Use your images with the same/close type of data of the trained data (i.e., like INRIA dataset shots) (the easy way)
Build your own training data to be used with HOG. (the hard way)
Find a very generic SVM set, which can be applied quite everywhere (hard to be found)
As in blackibiza answer, I had 2 main choices: find an already trained classifier, or do it for my self.
So, in the end, I managed to train a Hog classifier both with svmlight and with svm included in opencv.
The answer is yes: the detection depends on the sample size used for the training. If the classifier got samples of 64x128 pixel and you are trying to detect smaller object, it doesn't work. But the opposite is true: you can detect bigger object (though pyramid down the image and do a multi-scale-detection, also implemented in opencv).
Even if not documented in the CPU part you can find somewhere in the net, or you can youse the last (version 2.4.8) opencv and look at gpu module and you'll see those methods: gpu::HOGDescriptor::getPeopleDetector48x96 and gpu::HOGDescriptor::getPeopleDetector64x128, that are the two already trained hog detector.
As the last remark, I was warred about training time, but with 500 samples (more or less) the training process takes few minutes with a normal laptop.

Extracting DCT coefficients from encoded images and video

Is there a way to easily extract the DCT coefficients (and quantization parameters) from encoded images and video? Any decoder software must be using them to decode block-DCT encoded images and video. So I'm pretty sure the decoder knows what they are. Is there a way to expose them to whomever is using the decoder?
I'm implementing some video quality assessment algorithms that work directly in the DCT domain. Currently, the majority of my code uses OpenCV, so it would be great if anyone knows of a solution using that framework. I don't mind using other libraries (perhaps libjpeg, but that seems to be for still images only), but my primary concern is to do as little format-specific work as possible (I don't want to reinvent the wheel and write my own decoders). I want to be able to open any video/image (H.264, MPEG, JPEG, etc) that OpenCV can open, and if it's block DCT-encoded, to get the DCT coefficients.
In the worst case, I know that I can write up my own block DCT code, run the decompressed frames/images through it and then I'd be back in the DCT domain. That's hardly an elegant solution, and I hope I can do better.
Presently, I use the fairly common OpenCV boilerplate to open images:
IplImage *image = cvLoadImage(filename);
// Run quality assessment metric
The code I'm using for video is equally trivial:
CvCapture *capture = cvCaptureFromAVI(filename);
while (cvGrabFrame(capture))
{
IplImage *frame = cvRetrieveFrame(capture);
// Run quality assessment metric on frame
}
cvReleaseCapture(&capture);
In both cases, I get a 3-channel IplImage in BGR format. Is there any way I can get the DCT coefficients as well?
Well, I did a bit of reading and my original question seems to be an instance of wishful thinking.
Basically, it's not possible to get the DCT coefficients from H.264 video frames for the simple reason that H.264 doesn't use DCT. It uses a different transform (integer transform). Next, the coefficients for that transform don't necessarily change on a frame-by-frame basis -- H.264 is smarter cause it splits up frames into slices. It should be possible to get those coefficients through a special decoder, but I doubt OpenCV exposes it for the user.
For JPEG, things are a bit more positive. As I suspected, libjpeg exposes the DCT coefficients for you. I wrote a small app to show that it works (source at the end). It makes a new image using the DC term from each block. Because the DC term is equal to the block average (after proper scaling), the DC images are downsampled versions of the input JPEG image.
EDIT: fixed scaling in source
Original image (512 x 512):
DC images (64x64): luma Cr Cb RGB
Source (C++):
#include <stdio.h>
#include <assert.h>
#include <cv.h>
#include <highgui.h>
extern "C"
{
#include "jpeglib.h"
#include <setjmp.h>
}
#define DEBUG 0
#define OUTPUT_IMAGES 1
/*
* Extract the DC terms from the specified component.
*/
IplImage *
extract_dc(j_decompress_ptr cinfo, jvirt_barray_ptr *coeffs, int ci)
{
jpeg_component_info *ci_ptr = &cinfo->comp_info[ci];
CvSize size = cvSize(ci_ptr->width_in_blocks, ci_ptr->height_in_blocks);
IplImage *dc = cvCreateImage(size, IPL_DEPTH_8U, 1);
assert(dc != NULL);
JQUANT_TBL *tbl = ci_ptr->quant_table;
UINT16 dc_quant = tbl->quantval[0];
#if DEBUG
printf("DCT method: %x\n", cinfo->dct_method);
printf
(
"component: %d (%d x %d blocks) sampling: (%d x %d)\n",
ci,
ci_ptr->width_in_blocks,
ci_ptr->height_in_blocks,
ci_ptr->h_samp_factor,
ci_ptr->v_samp_factor
);
printf("quantization table: %d\n", ci);
for (int i = 0; i < DCTSIZE2; ++i)
{
printf("% 4d ", (int)(tbl->quantval[i]));
if ((i + 1) % 8 == 0)
printf("\n");
}
printf("raw DC coefficients:\n");
#endif
JBLOCKARRAY buf =
(cinfo->mem->access_virt_barray)
(
(j_common_ptr)cinfo,
coeffs[ci],
0,
ci_ptr->v_samp_factor,
FALSE
);
for (int sf = 0; (JDIMENSION)sf < ci_ptr->height_in_blocks; ++sf)
{
for (JDIMENSION b = 0; b < ci_ptr->width_in_blocks; ++b)
{
int intensity = 0;
intensity = buf[sf][b][0]*dc_quant/DCTSIZE + 128;
intensity = MAX(0, intensity);
intensity = MIN(255, intensity);
cvSet2D(dc, sf, (int)b, cvScalar(intensity));
#if DEBUG
printf("% 2d ", buf[sf][b][0]);
#endif
}
#if DEBUG
printf("\n");
#endif
}
return dc;
}
IplImage *upscale_chroma(IplImage *quarter, CvSize full_size)
{
IplImage *full = cvCreateImage(full_size, IPL_DEPTH_8U, 1);
cvResize(quarter, full, CV_INTER_NN);
return full;
}
GLOBAL(int)
read_JPEG_file (char * filename, IplImage **dc)
{
/* This struct contains the JPEG decompression parameters and pointers to
* working space (which is allocated as needed by the JPEG library).
*/
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
/* More stuff */
FILE * infile; /* source file */
/* In this example we want to open the input file before doing anything else,
* so that the setjmp() error recovery below can assume the file is open.
* VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
* requires it in order to read binary files.
*/
if ((infile = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
return 0;
}
/* Step 1: allocate and initialize JPEG decompression object */
cinfo.err = jpeg_std_error(&jerr);
/* Now we can initialize the JPEG decompression object. */
jpeg_create_decompress(&cinfo);
/* Step 2: specify data source (eg, a file) */
jpeg_stdio_src(&cinfo, infile);
/* Step 3: read file parameters with jpeg_read_header() */
(void) jpeg_read_header(&cinfo, TRUE);
/* We can ignore the return value from jpeg_read_header since
* (a) suspension is not possible with the stdio data source, and
* (b) we passed TRUE to reject a tables-only JPEG file as an error.
* See libjpeg.txt for more info.
*/
/* Step 4: set parameters for decompression */
/* In this example, we don't need to change any of the defaults set by
* jpeg_read_header(), so we do nothing here.
*/
jvirt_barray_ptr *coeffs = jpeg_read_coefficients(&cinfo);
IplImage *y = extract_dc(&cinfo, coeffs, 0);
IplImage *cb_q = extract_dc(&cinfo, coeffs, 1);
IplImage *cr_q = extract_dc(&cinfo, coeffs, 2);
IplImage *cb = upscale_chroma(cb_q, cvGetSize(y));
IplImage *cr = upscale_chroma(cr_q, cvGetSize(y));
cvReleaseImage(&cb_q);
cvReleaseImage(&cr_q);
#if OUTPUT_IMAGES
cvSaveImage("y.png", y);
cvSaveImage("cb.png", cb);
cvSaveImage("cr.png", cr);
#endif
*dc = cvCreateImage(cvGetSize(y), IPL_DEPTH_8U, 3);
assert(dc != NULL);
cvMerge(y, cr, cb, NULL, *dc);
cvReleaseImage(&y);
cvReleaseImage(&cb);
cvReleaseImage(&cr);
/* Step 7: Finish decompression */
(void) jpeg_finish_decompress(&cinfo);
/* We can ignore the return value since suspension is not possible
* with the stdio data source.
*/
/* Step 8: Release JPEG decompression object */
/* This is an important step since it will release a good deal of memory. */
jpeg_destroy_decompress(&cinfo);
fclose(infile);
return 1;
}
int
main(int argc, char **argv)
{
int ret = 0;
if (argc != 2)
{
fprintf(stderr, "usage: %s filename.jpg\n", argv[0]);
return 1;
}
IplImage *dc = NULL;
ret = read_JPEG_file(argv[1], &dc);
assert(dc != NULL);
IplImage *rgb = cvCreateImage(cvGetSize(dc), IPL_DEPTH_8U, 3);
cvCvtColor(dc, rgb, CV_YCrCb2RGB);
#if OUTPUT_IMAGES
cvSaveImage("rgb.png", rgb);
#else
cvNamedWindow("DC", CV_WINDOW_AUTOSIZE);
cvShowImage("DC", rgb);
cvWaitKey(0);
#endif
cvReleaseImage(&dc);
cvReleaseImage(&rgb);
return 0;
}
You can use, libjpeg to extract dct data of your jpeg file, but for h.264 video file, I can't find any open source code that give you dct data (actully Integer dct data). But you can use h.264 open source software like JM, JSVM or x264. In these two source file, you have to find their specific function that make use of dct function, and change it to your desire form, to get your output dct data.
For Image:
use the following code, and after read_jpeg_file( infilename, v, quant_tbl ), v and quant_tbl will have dct data and quantization table of your jpeg image respectively.
I used Qvector to store my output data, change it to your preferred c++ array list.
#include <iostream>
#include <stdio.h>
#include <jpeglib.h>
#include <stdlib.h>
#include <setjmp.h>
#include <fstream>
#include <QVector>
int read_jpeg_file( char *filename, QVector<QVector<int> > &dct_coeff, QVector<unsigned short> &quant_tbl)
{
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
FILE * infile;
if ((infile = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
return 0;
}
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
jpeg_stdio_src(&cinfo, infile);
(void) jpeg_read_header(&cinfo, TRUE);
jvirt_barray_ptr *coeffs_array = jpeg_read_coefficients(&cinfo);
for (int ci = 0; ci < 1; ci++)
{
JBLOCKARRAY buffer_one;
JCOEFPTR blockptr_one;
jpeg_component_info* compptr_one;
compptr_one = cinfo.comp_info + ci;
for (int by = 0; by < compptr_one->height_in_blocks; by++)
{
buffer_one = (cinfo.mem->access_virt_barray)((j_common_ptr)&cinfo, coeffs_array[ci], by, (JDIMENSION)1, FALSE);
for (int bx = 0; bx < compptr_one->width_in_blocks; bx++)
{
blockptr_one = buffer_one[0][bx];
QVector<int> tmp;
for (int bi = 0; bi < 64; bi++)
{
tmp.append(blockptr_one[bi]);
}
dct_coeff.push_back(tmp);
}
}
}
// coantization table
j_decompress_ptr dec_cinfo = (j_decompress_ptr) &cinfo;
jpeg_component_info *ci_ptr = &dec_cinfo->comp_info[0];
JQUANT_TBL *tbl = ci_ptr->quant_table;
for(int ci =0 ; ci < 64; ci++){
quant_tbl.append(tbl->quantval[ci]);
}
return 1;
}
int main()
{
QVector<QVector<int> > v;
QVector<unsigned short> quant_tbl;
char *infilename = "your_image.jpg";
std::ofstream out;
out.open("out_dct.txt");
if( read_jpeg_file( infilename, v, quant_tbl ) > 0 ){
for(int j = 0; j < v.size(); j++ ){
for (int i = 0; i < v[0].size(); ++i){
out << v[j][i] << "\t";
}
out << "---------------" << std::endl;
}
out << "\n\n\n" << std::string(10,'-') << std::endl;
out << "\nQauntization Table:" << std::endl;
for(int i = 0; i < quant_tbl.size(); i++ ){
out << quant_tbl[i] << "\t";
}
}
else{
std::cout << "Can not read, Returned With Error";
return -1;
}
out.close();
return 0;
}

Resources