How to quickly transform from RGB24 to BGRA on iOS? - ios

which I use is vImageConvert_RGB888toARGB8888 and vImagePermuteChannels_ARGB8888. First change it from RGB24 to ARGB, because OPENGL ES doesn't support ARGB, so need to change ARGB to BGRA with vImagePermuteChannels_ARGB8888. But when the data need to change is big the time cost is high, so someone with better methods? thanks! My codes like this:
- (void)transformRGBToBGRA:(const UInt8 *)pict{
rgb.data = (void *)pict;
vImage_Error error = vImageConvert_RGB888toARGB8888(&rgb,NULL,0,&argb,NO,kvImageNoFlags);
if (error != kvImageNoError) {
NSLog(#"vImageConvert_RGB888toARGB8888 error");
}
const uint8_t permuteMap[4] = {3,2,1,0};
error = vImagePermuteChannels_ARGB8888(&argb,&bgra,permuteMap,kvImageNoFlags);
if (error != kvImageNoError) {
NSLog(#"vImagePermuteChannels_ARGB8888 error");
}
free((void *)pict);
}

Now I found the fastest solution is assembly. Just like this:
__asm__ volatile(
"0: \n"
"# load 3 64-bit regs with interleave: \n"
"vld3.8 {d0,d1,d2}, [%0]! \n"
"# swap d0 and d2 - R and B\n"
"vswp d0, d2 \n"
"# store 4 64-bit regs: \n"
"vst4.8 {d0,d1,d2,d3}, [%2]! \n"
"subs %1, %1, #1 \n"
"bne 0b \n"
:
: "r"(img), "r"(numPixels24), "+r"(bgraData)
: "r6", "r7","r8"
);

Related

ImageMagick colors depth conversion

I have several thousands of tga files (without palette) which contain RGBA4444 data (I know usualy tga files don't contain RGBA4444 data). I would like to convert them into RGBA8888 data. I use the following command line:
convert -depth 4 woody4.tga -depth 8 woody8.tga
In this case, woody4.tga is the original RGBA4444 file, and woody8.tga the target RGBA8888 file but it doesn't change the colors of my pictures, what am I missing?
Thanks,
Pierre
Edit:
Thanks very much Mark, I have successfully converted more than 10 000 TGA with your program, the result is very good and correct to the original TGA ! this would has been impossible without the parallel command ! Just a last point, I have around 50 TGA larger (the backgrounds of the game) which are coded with RGBA5650 and not RGBA4444, how can I modify your program to manage the RGBA5650 ? Thanks very much !
Oh, I see Eric beat me to it:-)
Hey ho! I did it a different way anyway and got a different answer so you can see which one you like best. I also wrote some C but I didn't rely on any libraries, I just read the TGA and converted it to a PAM format and let ImageMagick make that into PNG afterwards at command-line.
I chose PAM because it is the simplest file to write which supports transparency - see Wikipedia on PAM format.
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
int main(int argc,char* argv[]){
unsigned char buf[64];
FILE* fp=fopen(argv[1],"rb");
if(fp==NULL){
fprintf(stderr,"ERROR: Unable to open %s\n",argv[1]);
exit(1);
}
// Read TGA header of 18 bytes, extract width and height
fread(buf,1,18,fp); // 12 bytes junk, 2 bytes width, 2 bytes height, 2 bytes junk
unsigned short w=buf[12]|(buf[13]<<8);
unsigned short h=buf[14]|(buf[15]<<8);
// Write PAM header
fprintf(stdout,"P7\n");
fprintf(stdout,"WIDTH %d\n",w);
fprintf(stdout,"HEIGHT %d\n",h);
fprintf(stdout,"DEPTH 4\n");
fprintf(stdout,"MAXVAL 255\n");
fprintf(stdout,"TUPLTYPE RGB_ALPHA\n");
fprintf(stdout,"ENDHDR\n");
// Read 2 bytes at a time RGBA4444
while(fread(buf,2,1,fp)==1){
unsigned char out[4];
out[0]=(buf[1]&0x0f)<<4;
out[1]=buf[0]&0xf0;
out[2]=(buf[0]&0x0f)<<4;
out[3]=buf[1]&0xf0;
// Write the 4 modified bytes out RGBA8888
fwrite(out,4,1,stdout);
}
fclose(fp);
return 0;
}
I the compile that with gcc:
gcc targa.c -o targa
Or you could use clang:
clang targa.c -o targa
and run it with
./targa someImage.tga > someImage.pam
and convert the PAM to PNG with ImageMagick at the command-line:
convert someImage.pam someImage.png
If you want to avoid writing the intermediate PAM file to disk, you can pipe it straight into convert like this:
./targa illu_evolution_01.tga | convert - result.png
You can, equally, make a BMP output file if you wish:
./targa illu_evolution_01.tga | convert - result.bmp
If you have thousands of files to do, and you are on a Mac or Linux, you can use GNU Parallel and get them all done in parallel much faster like this:
parallel --eta './targa {} | convert - {.}.png' ::: *.tga
If you have more than a couple of thousand files, you may get "Argument list too long" errors, in which case, use the slightly harder syntax:
find . -name \*tga -print0 | parallel -0 --eta './targa {} | convert - {.}.png'
On a Mac, you would install GNU Parallel with homebrew using:
brew install parallel
For your RGBA5650 images, I will fall back to PPM as my intermediate format because the alpha channel of PAM is no longer needed. The code will now look like this:
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
int main(int argc,char* argv[]){
unsigned char buf[64];
FILE* fp=fopen(argv[1],"rb");
if(fp==NULL){
fprintf(stderr,"ERROR: Unable to open %s\n",argv[1]);
exit(1);
}
// Read TGA header of 18 bytes, extract width and height
fread(buf,1,18,fp); // 12 bytes junk, 2 bytes width, 2 bytes height, 2 bytes junk
unsigned short w=buf[12]|(buf[13]<<8);
unsigned short h=buf[14]|(buf[15]<<8);
// Write PPM header
fprintf(stdout,"P6\n");
fprintf(stdout,"%d %d\n",w,h);
fprintf(stdout,"255\n");
// Read 2 bytes at a time RGBA5650
while(fread(buf,2,1,fp)==1){
unsigned char out[3];
out[0]=buf[1]&0xf8;
out[1]=((buf[1]&7)<<5) | ((buf[0]>>3)&0x1c);
out[2]=(buf[0]&0x1f)<<3;
// Write the 3 modified bytes out RGB888
fwrite(out,3,1,stdout);
}
fclose(fp);
return 0;
}
And will compile and run exactly the same way.
Updated answer.
After reading a few documents about TARGA format. I've revised + simplified a C program to convert.
// tga2img.c
#include <stdio.h>
#include <stdlib.h>
#include <wand/MagickWand.h>
typedef struct {
unsigned char idlength;
unsigned char colourmaptype;
unsigned char datatypecode;
short int colourmaporigin;
short int colourmaplength;
unsigned char colourmapdepth;
short int x_origin;
short int y_origin;
short int width;
short int height;
unsigned char bitsperpixel;
unsigned char imagedescriptor;
} HEADER;
typedef struct {
int extensionoffset;
int developeroffset;
char signature[16];
unsigned char p;
unsigned char n;
} FOOTER;
int main(int argc, const char * argv[]) {
HEADER tga_header;
FOOTER tga_footer;
FILE
* fd;
size_t
tga_data_size,
tga_pixel_size,
i,
j;
unsigned char
* tga_data,
* buffer;
const char
* input,
* output;
if (argc != 3) {
printf("Usage:\n\t %s <input> <output>\n", argv[0]);
return 1;
}
input = argv[1];
output = argv[2];
fd = fopen(input, "rb");
if (fd == NULL) {
fprintf(stderr, "Unable to read TGA input\n");
return 1;
}
/********\
* TARGA *
\*********/
#pragma mark TARGA
// Read TGA header
fread(&tga_header.idlength, sizeof(unsigned char), 1, fd);
fread(&tga_header.colourmaptype, sizeof(unsigned char), 1, fd);
fread(&tga_header.datatypecode, sizeof(unsigned char), 1, fd);
fread(&tga_header.colourmaporigin, sizeof( short int), 1, fd);
fread(&tga_header.colourmaplength, sizeof( short int), 1, fd);
fread(&tga_header.colourmapdepth, sizeof(unsigned char), 1, fd);
fread(&tga_header.x_origin, sizeof( short int), 1, fd);
fread(&tga_header.y_origin, sizeof( short int), 1, fd);
fread(&tga_header.width, sizeof( short int), 1, fd);
fread(&tga_header.height, sizeof( short int), 1, fd);
fread(&tga_header.bitsperpixel, sizeof(unsigned char), 1, fd);
fread(&tga_header.imagedescriptor, sizeof(unsigned char), 1, fd);
// Calculate sizes
tga_pixel_size = tga_header.bitsperpixel / 8;
tga_data_size = tga_header.width * tga_header.height * tga_pixel_size;
// Read image data
tga_data = malloc(tga_data_size);
fread(tga_data, 1, tga_data_size, fd);
// Read TGA footer.
fseek(fd, -26, SEEK_END);
fread(&tga_footer.extensionoffset, sizeof( int), 1, fd);
fread(&tga_footer.developeroffset, sizeof( int), 1, fd);
fread(&tga_footer.signature, sizeof( char), 16, fd);
fread(&tga_footer.p, sizeof(unsigned char), 1, fd);
fread(&tga_footer.n, sizeof(unsigned char), 1, fd);
fclose(fd);
buffer = malloc(tga_header.width * tga_header.height * 4);
#pragma mark RGBA4444 to RGBA8888
for (i = 0, j=0; i < tga_data_size; i+= tga_pixel_size) {
buffer[j++] = (tga_data[i+1] & 0x0f) << 4; // Red
buffer[j++] = tga_data[i ] & 0xf0; // Green
buffer[j++] = (tga_data[i ] & 0x0f) << 4; // Blue
buffer[j++] = tga_data[i+1] & 0xf0; // Alpha
}
free(tga_data);
/***************\
* IMAGEMAGICK *
\***************/
#pragma mark IMAGEMAGICK
MagickWandGenesis();
PixelWand * background;
background = NewPixelWand();
PixelSetColor(background, "none");
MagickWand * wand;
wand = NewMagickWand();
MagickNewImage(wand,
tga_header.width,
tga_header.height,
background);
background = DestroyPixelWand(background);
MagickImportImagePixels(wand,
0,
0,
tga_header.width,
tga_header.height,
"RGBA",
CharPixel,
buffer);
free(buffer);
MagickWriteImage(wand, argv[2]);
wand = DestroyMagickWand(wand);
return 0;
}
Which can be compiled with clang $(MagickWand-config --cflags --libs) -o tga2im tga2im.c, and can be executed simply by ./tga2im N_birthday_0000.tga N_birthday_0000.tga.png.
Original answer.
The only way I can think of converting the images is to author a quick program/script to do the bitwise color-pixel logic.
This answer offers a quick way to read the image data; so combining with MagickWand, can be converted easily. (Although I know there'll be better solutions found on old game-dev forums...)
#include <stdio.h>
#include <stdbool.h>
#include <wand/MagickWand.h>
typedef struct
{
unsigned char imageTypeCode;
short int imageWidth;
short int imageHeight;
unsigned char bitCount;
unsigned char *imageData;
} TGAFILE;
bool LoadTGAFile(const char *filename, TGAFILE *tgaFile);
int main(int argc, const char * argv[]) {
const char
* input,
* output;
if (argc != 3) {
printf("Usage:\n\t%s <input> <output>\n", argv[0]);
}
input = argv[1];
output = argv[2];
MagickWandGenesis();
TGAFILE header;
if (LoadTGAFile(input, &header) == true) {
// Build a blank canvas image matching TGA file.
MagickWand * wand;
wand = NewMagickWand();
PixelWand * background;
background = NewPixelWand();
PixelSetColor(background, "NONE");
MagickNewImage(wand, header.imageWidth, header.imageHeight, background);
background = DestroyPixelWand(background);
// Allocate RGBA8888 buffer
unsigned char * buffer = malloc(header.imageWidth * header.imageHeight * 4);
// Iterate over TGA image data, and convert RGBA4444 to RGBA8888;
size_t pixel_size = header.bitCount / 8;
size_t total_bytes = header.imageWidth * header.imageHeight * pixel_size;
for (int i = 0, j = 0; i < total_bytes; i+=pixel_size) {
// Red
buffer[j++] = (header.imageData[i ] & 0x0f) << 4;
// Green
buffer[j++] = (header.imageData[i ] & 0xf0);
// Blue
buffer[j++] = (header.imageData[i+1] & 0xf0) << 4;
// Alpha
buffer[j++] = (header.imageData[i+1] & 0xf0);
}
// Import image data over blank canvas
MagickImportImagePixels(wand, 0, 0, header.imageWidth, header.imageHeight, "RGBA", CharPixel, buffer);
// Write image
MagickWriteImage(wand, output);
wand = DestroyMagickWand(wand);
} else {
fprintf(stderr, "Could not read TGA file %s\n", input);
}
MagickWandTerminus();
return 0;
}
/*
* Method copied verbatim from https://stackoverflow.com/a/7050007/438117
* Show your love by +1 to Wroclai answer.
*/
bool LoadTGAFile(const char *filename, TGAFILE *tgaFile)
{
FILE *filePtr;
unsigned char ucharBad;
short int sintBad;
long imageSize;
int colorMode;
unsigned char colorSwap;
// Open the TGA file.
filePtr = fopen(filename, "rb");
if (filePtr == NULL)
{
return false;
}
// Read the two first bytes we don't need.
fread(&ucharBad, sizeof(unsigned char), 1, filePtr);
fread(&ucharBad, sizeof(unsigned char), 1, filePtr);
// Which type of image gets stored in imageTypeCode.
fread(&tgaFile->imageTypeCode, sizeof(unsigned char), 1, filePtr);
// For our purposes, the type code should be 2 (uncompressed RGB image)
// or 3 (uncompressed black-and-white images).
if (tgaFile->imageTypeCode != 2 && tgaFile->imageTypeCode != 3)
{
fclose(filePtr);
return false;
}
// Read 13 bytes of data we don't need.
fread(&sintBad, sizeof(short int), 1, filePtr);
fread(&sintBad, sizeof(short int), 1, filePtr);
fread(&ucharBad, sizeof(unsigned char), 1, filePtr);
fread(&sintBad, sizeof(short int), 1, filePtr);
fread(&sintBad, sizeof(short int), 1, filePtr);
// Read the image's width and height.
fread(&tgaFile->imageWidth, sizeof(short int), 1, filePtr);
fread(&tgaFile->imageHeight, sizeof(short int), 1, filePtr);
// Read the bit depth.
fread(&tgaFile->bitCount, sizeof(unsigned char), 1, filePtr);
// Read one byte of data we don't need.
fread(&ucharBad, sizeof(unsigned char), 1, filePtr);
// Color mode -> 3 = BGR, 4 = BGRA.
colorMode = tgaFile->bitCount / 8;
imageSize = tgaFile->imageWidth * tgaFile->imageHeight * colorMode;
// Allocate memory for the image data.
tgaFile->imageData = (unsigned char*)malloc(sizeof(unsigned char)*imageSize);
// Read the image data.
fread(tgaFile->imageData, sizeof(unsigned char), imageSize, filePtr);
// Change from BGR to RGB so OpenGL can read the image data.
for (int imageIdx = 0; imageIdx < imageSize; imageIdx += colorMode)
{
colorSwap = tgaFile->imageData[imageIdx];
tgaFile->imageData[imageIdx] = tgaFile->imageData[imageIdx + 2];
tgaFile->imageData[imageIdx + 2] = colorSwap;
}
fclose(filePtr);
return true;
}
The order of the color channels may need to be switch around.
I have been thinking about this some more and it ought to be possible to reconstruct the image without any special software - I can't quite see my mistake for the moment by maybe #emcconville can cast your expert eye over it and point out my mistake! Pretty please?
So, my concept is that ImageMagick has read in the image size and pixel data correctly but has just allocated the bits according to the standard RGB5551 interpretation of a TARGA file rather than RGBA4444. So, we rebuild the 16-bits of data it read and split them differently.
The first line below does the rebuild into the original 16-bit data, then each subsequent line splits out one of the RGBA channels and then we recombine them:
convert illu_evolution_01.tga -depth 16 -channel R -fx "(((r*255)<<10) | ((g*255)<<5) | (b*255) | ((a*255)<<15))/255" \
\( -clone 0 -channel R -fx "((((r*255)>>12)&15)<<4)/255" \) \
\( -clone 0 -channel R -fx "((((r*255)>>8 )&15)<<4)/255" \) \
\( -clone 0 -channel R -fx "((((r*255) )&15)<<4)/255" \) \
-delete 0 -set colorspace RGB -combine -colorspace sRGB result.png
# The rest is just debug so you can see the reconstructed channels in [rgba].png
convert result.png -channel R -separate r.png
convert result.png -channel G -separate g.png
convert result.png -channel B -separate b.png
convert result.png -channel A -separate a.png
So, the following diagram represents the 16-bits of 1 pixel:
A R R R R R G G G G G B B B B B <--- what IM saw
R R R R G G G G B B B B A A A A <--- what it really meant
Yes, I have disregarded the alpha channel for the moment.

Converting from single channel 12-bit little endian raw binary data

I have a raw binary image file where every pixel consists of 12 bit data (gray-scale). For example, the first four pixels in the raw file:
0x0 0xC0
0x1 0x05
0x2 0x5C
0x3 0xC0
0x4 0x05
0x5 0x5C
This corresponds to 4 pixel values with the value 0x5C0 (little endian).
Unfortunately, using the following command:
convert -size 384x184 -depth 12 gray:frame_0.raw out.tiff
interprets the pixel values incorrectly (big endian), resulting in the pixel values 0xC00 0x55C 0xC00 0x55C.
I tried the options -endian LSB and -endian MSB, but unfortunately they only change the output byte order, not the input byte order.
How do I get convert to open the raw image as 12-bit little endian data?
I had a quick try at this, but I have no test data but it should be fairly close and easy to detect errors with your images:
// pad12to16.c
// Mark Setchell
// Pad 12-bit data to 16-bit
//
// Compile with:
// gcc pad12to16.c -o pad12to16
//
// Run with:
// ./pad12to16 < 12-bit.dat > 16-bit.dat
#include <stdio.h>
#include <sys/uio.h>
#include <unistd.h>
#include <sys/types.h>
#define BYTESPERREAD 6
#define PIXPERWRITE 4
int main(){
unsigned char buf[BYTESPERREAD];
unsigned short pixel[PIXPERWRITE];
// Read 6 bytes at a time and decode to 4 off 16-bit pixels
while(read(0,buf,BYTESPERREAD)==BYTESPERREAD){
pixel[0] = buf[0] | ((buf[1] & 0xf) << 8);
pixel[1] = (buf[2] << 4) | ((buf[1] & 0xf0) >> 4);
pixel[2] = buf[3] | ((buf[2] & 0xf) << 8);
pixel[3] = (buf[5] << 4) | ((buf[4] & 0xf0) >> 4);
write(1,pixel,PIXPERWRITE*2);
}
return 0;
}
So you would run this (I think):
./pad12to16 < 12-bit.dat | convert -size 384x184 -depth 16 gray:- result.tif
Mark's answer is correct, as you'll need to involve some external tool to sort-out the data stream. Usually there's some sort of padding when working with 12-bit depth. In the example blob provided, we see that the each pair of pixels share a common byte. The task of splitting the shared byte, and shifting what-to-where is fairly easy. This answer compliments Mark's answer, and argues that ImageMagick's C-API might as well be used.
// my12bit_convert.c
#include <stdio.h>
#include <stdlib.h>
#include <magick/MagickCore.h>
#include <wand/MagickWand.h>
static ExceptionType serverty;
#define LEADING_HALF(x) ((x >> 4) & 0xF)
#define FOLLOWING_HALF(x) (x & 0xF)
#define TO_DOUBLE(x) ((double)x / (double)0xFFF);
#define IS_OK(x,y) if(x == MagickFalse) { fprintf(stderr, "%s\n", MagickGetException(y, &serverty)); }
int main(int argc, const char * argv[]) {
// Prototype vars
int
i,
tmp_pixels[2];
double * pixel_buffer;
size_t
w = 0,
h =0,
total = 0,
iterator = 0;
ssize_t
x = 0,
y = 0;
const char
* path = NULL,
* output = NULL;
unsigned char read_pixel_chunk[3];
FILE * fh;
MagickWand * wand;
PixelWand * pwand;
MagickBooleanType ok;
// Iterate over arguments and collect size, input, & output.
for ( i = 1; i < argc; i++ ) {
if (argv[i][0] == '-') {
if (LocaleCompare("size", &argv[i][1]) == 0) {
i++;
if (i == argc) {
fprintf(stderr, "Missing `WxH' argument for `-size'.");
return EXIT_FAILURE;
}
GetGeometry(argv[i], &x, &y, &w, &h);
}
} else if (path == NULL){
path = argv[i];
} else {
output = argv[i];
}
}
// Validate to some degree
if ( path == NULL ) {
fprintf(stderr, "Missing input path\n");
return EXIT_FAILURE;
}
if ( output == NULL ) {
fprintf(stderr, "Missing output path\n");
return EXIT_FAILURE;
}
total = w * h;
if (total == 0) {
fprintf(stderr, "Unable to determine size of %s. (use `-size WxH')\n", path);
return EXIT_FAILURE;
}
// Allocated memory and start the party!
pixel_buffer = malloc(sizeof(double) * total);
MagickWandGenesis();
// Read input file, and sort 12-bit pixels.
fh = fopen(path, "rb");
if (fh == NULL) {
fprintf(stderr, "Unable to read `%s'\n", path);
return 1;
}
while(!feof(fh)) {
total = fread(read_pixel_chunk, 3, 1, fh);
if (total) {
// 0xC0 0x05
// ^------' ==> 0x05C0
tmp_pixels[0] = FOLLOWING_HALF(read_pixel_chunk[1]) << 8 | read_pixel_chunk[0];
// 0x05 0x5C
// '------^ ==> 0x05C0
tmp_pixels[1] = read_pixel_chunk[2] << 4 | LEADING_HALF(read_pixel_chunk[1]);
// 0x5C0 / 0xFFF ==> 0.359463
pixel_buffer[iterator++] = TO_DOUBLE(tmp_pixels[0]);
pixel_buffer[iterator++] = TO_DOUBLE(tmp_pixels[1]);
}
}
fclose(fh);
// Create image
wand = NewMagickWand();
pwand = NewPixelWand();
ok = PixelSetColor(pwand, "white");
IS_OK(ok, wand);
// Create new Image
ok = MagickNewImage(wand, w, h, pwand);
IS_OK(ok, wand);
// Import pixels as gray, or intensity, values.
ok = MagickImportImagePixels(wand, x, y, w, h, "I", DoublePixel, pixel_buffer);
IS_OK(ok, wand);
// Save ouput
ok = MagickWriteImage(wand, output);
IS_OK(ok, wand);
// Clean house
DestroyPixelWand(pwand);
DestroyMagickWand(wand);
MagickWandTerminus();
if (pixel_buffer) {
free(pixel_buffer);
}
return 0;
}
Which can be compiled with
LLVM_CFLAGS=`MagickWand-config --cflags`
LLVM_LDFLAGS=`MagickWand-config --ldflags`
clang $LLVM_CFLAGS $LLVM_LDFLAGS -o my12bit_convert my12bit_convert.c
And usage
./my12bit_convert -size 384x184 frame_0.raw out.tiff

On iOS how to quickly convert RGB24 to BGR24?

I use vImageConvert_RGB888toPlanar8 and vImageConvert_Planar8toRGB888 from Accelerate.framework to convert RGB24 to BGR24, but when the data need to transform is very big, such as 3M or 4M, the time need to spend on this is about 10ms. So some one know some fast enough idea?.My code like this:
- (void)transformRGBToBGR:(const UInt8 *)pict{
rgb.data = (void *)pict;
vImage_Error error = vImageConvert_RGB888toPlanar8(&rgb,&red,&green,&blue,kvImageNoFlags);
if (error != kvImageNoError) {
NSLog(#"vImageConvert_RGB888toARGB8888 error");
}
error = vImageConvert_Planar8toRGB888(&blue,&green,&red,&bgr,kvImageNoFlags);
if (error != kvImageNoError) {
NSLog(#"vImagePermuteChannels_ARGB8888 error");
}
free((void *)pict);
}
With a RGB888ToPlanar8 call you scatter the data and then gather it once again. This is very-very-very bad. If the memory overhead of 33% is affordable, try using the RGBA format and permute the B/R bytes in-place.
If you want to save 33% percents, then I might suggest the following. Iterate all the pixels, but read only a multiple of 4 bytes (since lcm(3,4) is 12, that is 3 dwords).
uint8_t* src_image;
uint8_t* dst_image;
uint32_t* src = (uint32_t*)src_image;
uint32_t* dst = (uint32_t*)dst_image;
uint32_t v1, v2, v3;
uint32_t nv1, nv2, nv3;
for(int i = 0 ; i < num_pixels / 12 ; i++)
{
// read 12 bytes
v1 = *src++;
v2 = *src++;
v3 = *src++;
// shuffle bits in the pixels
// [R1 G1 B1 R2 | G2 B2 R3 G3 | B3 R4 G4 B4]
nv1 = // [B1 G1 R1 B2]
((v1 >> 8) & 0xFF) | (v1 & 0x00FF0000) | ((v1 >> 16) & 0xFF) | ((v2 >> 24) & 0xFF);
nv2 = // [G2 R2 B3 G3]
...
nv3 = // [R3 B4 G4 R4]
...
// write 12 bytes
*dst++ = nv1;
*dst++ = nv2;
*dst++ = nv3;
}
Even better can be done with NEON intrinsics.
See this link from ARM's website to see how the 24-bit swapping is done.
The BGR-to-RGB can be done in-place like this:
void neon_asm_convert_BGR_TO_RGB(uint8_t* img, int numPixels24)
{
// numPixels is divided by 24
__asm__ volatile(
"0: \n"
"# load 3 64-bit regs with interleave: \n"
"vld3.8 {d0,d1,d2}, [%0] \n"
"# swap d0 and d2 - R and B\n"
"vswp d0, d2 \n"
"# store 3 64-bit regs: \n"
"vst3.8 {d0,d1,d2}, [%0]! \n"
"subs %1, %1, #1 \n"
"bne 0b \n"
:
: "r"(img), "r"(numPixels24)
: "r4", "r5"
);
}
Just swap the channels - BGRA to RGBA
- (void)convertBGRAFrame:(const CLPBasicVideoFrame &)bgraFrame toRGBA:(CLPBasicVideoFrame &)rgbaFrame
{
vImage_Buffer bgraImageBuffer = {
.width = bgraFrame.width,
.height = bgraFrame.height,
.rowBytes = bgraFrame.bytesPerRow,
.data = bgraFrame.rawPixelData
};
vImage_Buffer rgbaImageBuffer = {
.width = rgbaFrame.width,
.height = rgbaFrame.height,
.rowBytes = rgbaFrame.bytesPerRow,
.data = rgbaFrame.rawPixelData
};
const uint8_t byteSwapMap[4] = { 2, 1, 0, 3 };
vImage_Error error;
error = vImagePermuteChannels_ARGB8888(&bgraImageBuffer, &rgbaImageBuffer, byteSwapMap, kvImageNoFlags);
if (error != kvImageNoError) {
NSLog(#"%s, vImage error %zd", __PRETTY_FUNCTION__, error);
}
}

OpenCL: Strange buffer or image bahaviour with NVidia but not Amd

I have a big problem (on Linux):
I create a buffer with defined data, then an OpenCL kernel takes this data and puts it into an image2d_t. When working on an AMD C50 (Fusion CPU/GPU) the program works as desired, but on my GeForce 9500 GT the given kernel computes the correct result very rarely. Sometimes the result is correct, but very often it is incorrect. Sometimes it depends on very strange changes like removing unused variable declarations or adding a newline. I realized that disabling the optimization will increase the probability to fail. I have the most actual display driver in both systems.
Here is my reduced code:
#include <CL/cl.h>
#include <string>
#include <iostream>
#include <sstream>
#include <cmath>
void checkOpenCLErr(cl_int err, std::string name){
const char* errorString[] = {
"CL_SUCCESS",
"CL_DEVICE_NOT_FOUND",
"CL_DEVICE_NOT_AVAILABLE",
"CL_COMPILER_NOT_AVAILABLE",
"CL_MEM_OBJECT_ALLOCATION_FAILURE",
"CL_OUT_OF_RESOURCES",
"CL_OUT_OF_HOST_MEMORY",
"CL_PROFILING_INFO_NOT_AVAILABLE",
"CL_MEM_COPY_OVERLAP",
"CL_IMAGE_FORMAT_MISMATCH",
"CL_IMAGE_FORMAT_NOT_SUPPORTED",
"CL_BUILD_PROGRAM_FAILURE",
"CL_MAP_FAILURE",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"CL_INVALID_VALUE",
"CL_INVALID_DEVICE_TYPE",
"CL_INVALID_PLATFORM",
"CL_INVALID_DEVICE",
"CL_INVALID_CONTEXT",
"CL_INVALID_QUEUE_PROPERTIES",
"CL_INVALID_COMMAND_QUEUE",
"CL_INVALID_HOST_PTR",
"CL_INVALID_MEM_OBJECT",
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
"CL_INVALID_IMAGE_SIZE",
"CL_INVALID_SAMPLER",
"CL_INVALID_BINARY",
"CL_INVALID_BUILD_OPTIONS",
"CL_INVALID_PROGRAM",
"CL_INVALID_PROGRAM_EXECUTABLE",
"CL_INVALID_KERNEL_NAME",
"CL_INVALID_KERNEL_DEFINITION",
"CL_INVALID_KERNEL",
"CL_INVALID_ARG_INDEX",
"CL_INVALID_ARG_VALUE",
"CL_INVALID_ARG_SIZE",
"CL_INVALID_KERNEL_ARGS",
"CL_INVALID_WORK_DIMENSION",
"CL_INVALID_WORK_GROUP_SIZE",
"CL_INVALID_WORK_ITEM_SIZE",
"CL_INVALID_GLOBAL_OFFSET",
"CL_INVALID_EVENT_WAIT_LIST",
"CL_INVALID_EVENT",
"CL_INVALID_OPERATION",
"CL_INVALID_GL_OBJECT",
"CL_INVALID_BUFFER_SIZE",
"CL_INVALID_MIP_LEVEL",
"CL_INVALID_GLOBAL_WORK_SIZE",
};
if (err != CL_SUCCESS) {
std::stringstream str;
str << errorString[-err] << " (" << err << ")";
throw std::string(name)+(str.str());
}
}
int main(){
try{
cl_context m_context;
cl_platform_id* m_platforms;
unsigned int m_numPlatforms;
cl_command_queue m_queue;
cl_device_id m_device;
cl_int error = 0; // Used to handle error codes
clGetPlatformIDs(0,NULL,&m_numPlatforms);
m_platforms = new cl_platform_id[m_numPlatforms];
error = clGetPlatformIDs(m_numPlatforms,m_platforms,&m_numPlatforms);
checkOpenCLErr(error, "getPlatformIDs");
// Device
error = clGetDeviceIDs(m_platforms[0], CL_DEVICE_TYPE_GPU, 1, &m_device, NULL);
checkOpenCLErr(error, "getDeviceIDs");
// Context
cl_context_properties properties[] =
{ CL_CONTEXT_PLATFORM, (cl_context_properties)(m_platforms[0]), 0};
m_context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, NULL);
// m_private->m_context = clCreateContext(properties, 1, &m_private->m_device, NULL, NULL, &error);
checkOpenCLErr(error, "Create context");
// Command-queue
m_queue = clCreateCommandQueue(m_context, m_device, 0, &error);
checkOpenCLErr(error, "Create command queue");
//Build program and kernel
const char* source = "#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n"
"\n"
"__kernel void bufToImage(__global unsigned char* in, __write_only image2d_t out, const unsigned int offset_x, const unsigned int image_width , const unsigned int maxval ){\n"
"\tint i = get_global_id(0);\n"
"\tint j = get_global_id(1);\n"
"\tint width = get_global_size(0);\n"
"\tint height = get_global_size(1);\n"
"\n"
"\tint pos = j*image_width*3+(offset_x+i)*3;\n"
"\tif( maxval < 256 ){\n"
"\t\tfloat4 c = (float4)(in[pos],in[pos+1],in[pos+2],1.0f);\n"
"\t\tc.x /= maxval;\n"
"\t\tc.y /= maxval;\n"
"\t\tc.z /= maxval;\n"
"\t\twrite_imagef(out, (int2)(i,j), c);\n"
"\t}else{\n"
"\t\tfloat4 c = (float4)(255.0f*in[2*pos]+in[2*pos+1],255.0f*in[2*pos+2]+in[2*pos+3],255.0f*in[2*pos+4]+in[2*pos+5],1.0f);\n"
"\t\tc.x /= maxval;\n"
"\t\tc.y /= maxval;\n"
"\t\tc.z /= maxval;\n"
"\t\twrite_imagef(out, (int2)(i,j), c);\n"
"\t}\n"
"}\n"
"\n"
"__constant sampler_t imageSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
"\n"
"__kernel void imageToBuf(__read_only image2d_t in, __global unsigned char* out, const unsigned int offset_x, const unsigned int image_width ){\n"
"\tint i = get_global_id(0);\n"
"\tint j = get_global_id(1);\n"
"\tint pos = j*image_width*3+(offset_x+i)*3;\n"
"\tfloat4 c = read_imagef(in, imageSampler, (int2)(i,j));\n"
"\tif( c.x <= 1.0f && c.y <= 1.0f && c.z <= 1.0f ){\n"
"\t\tout[pos] = c.x*255.0f;\n"
"\t\tout[pos+1] = c.y*255.0f;\n"
"\t\tout[pos+2] = c.z*255.0f;\n"
"\t}else{\n"
"\t\tout[pos] = 200.0f;\n"
"\t\tout[pos+1] = 0.0f;\n"
"\t\tout[pos+2] = 255.0f;\n"
"\t}\n"
"}\n";
cl_int err;
cl_program prog = clCreateProgramWithSource(m_context,1,&source,NULL,&err);
if( -err != CL_SUCCESS ) throw std::string("clCreateProgramWithSources");
err = clBuildProgram(prog,0,NULL,"-cl-opt-disable",NULL,NULL);
if( -err != CL_SUCCESS ) throw std::string("clBuildProgram(fromSources)");
cl_kernel kernel = clCreateKernel(prog,"bufToImage",&err);
checkOpenCLErr(err,"CreateKernel");
cl_uint imageWidth = 80;
cl_uint imageHeight = 90;
//Initialize datas
cl_uint maxVal = 255;
cl_uint offsetX = 0;
int size = imageWidth*imageHeight*3;
int resSize = imageWidth*imageHeight*4;
cl_uchar* data = new cl_uchar[size];
cl_float* expectedData = new cl_float[resSize];
for( int i = 0,j=0; i < size; i++,j++ ){
data[i] = (cl_uchar)i;
expectedData[j] = (cl_float)((unsigned char)i)/255.0f;
if ( i%3 == 2 ){
j++;
expectedData[j] = 1.0f;
}
}
cl_mem inBuffer = clCreateBuffer(m_context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,size*sizeof(cl_uchar),data,&err);
checkOpenCLErr(err, "clCreateBuffer()");
clFinish(m_queue);
cl_image_format imgFormat;
imgFormat.image_channel_order = CL_RGBA;
imgFormat.image_channel_data_type = CL_FLOAT;
cl_mem outImg = clCreateImage2D( m_context, CL_MEM_READ_WRITE, &imgFormat, imageWidth, imageHeight, 0, NULL, &err );
checkOpenCLErr(err,"get2DImage()");
clFinish(m_queue);
size_t kernelRegion[]={imageWidth,imageHeight};
size_t kernelWorkgroup[]={1,1};
//Fill kernel with data
clSetKernelArg(kernel,0,sizeof(cl_mem),&inBuffer);
clSetKernelArg(kernel,1,sizeof(cl_mem),&outImg);
clSetKernelArg(kernel,2,sizeof(cl_uint),&offsetX);
clSetKernelArg(kernel,3,sizeof(cl_uint),&imageWidth);
clSetKernelArg(kernel,4,sizeof(cl_uint),&maxVal);
//Run kernel
err = clEnqueueNDRangeKernel(m_queue,kernel,2,NULL,kernelRegion,kernelWorkgroup,0,NULL,NULL);
checkOpenCLErr(err,"RunKernel");
clFinish(m_queue);
//Check resulting data for validty
cl_float* computedData = new cl_float[resSize];;
size_t region[]={imageWidth,imageHeight,1};
const size_t offset[] = {0,0,0};
err = clEnqueueReadImage(m_queue,outImg,CL_TRUE,offset,region,0,0,computedData,0,NULL,NULL);
checkOpenCLErr(err, "readDataFromImage()");
clFinish(m_queue);
for( int i = 0; i < resSize; i++ ){
if( fabs(expectedData[i]-computedData[i])>0.1 ){
std::cout << "Expected: \n";
for( int j = 0; j < resSize; j++ ){
std::cout << expectedData[j] << " ";
}
std::cout << "\nComputed: \n";
std::cout << "\n";
for( int j = 0; j < resSize; j++ ){
std::cout << computedData[j] << " ";
}
std::cout << "\n";
throw std::string("Error, computed and expected data are not the same!\n");
}
}
}catch(std::string& e){
std::cout << "\nCaught an exception: " << e << "\n";
return 1;
}
std::cout << "Works fine\n";
return 0;
}
I also uploaded the source code for you to make it easier to test it:
http://www.file-upload.net/download-3524302/strangeOpenCLError.cpp.html
Please can you tell me if I've done wrong anything?
Is there any mistake in the code or is this a bug in my driver?
Best reagards,
Alex
Edit: changed the program (both: here and the linked one) a little bit to make it more likely to get a mismatch.
I found the bug and this is an annoying one:
When working under linux and just linking the OpenCL program with the most actual "OpenCV" library (yes, the computation lib), the binary parts of the kernels, which get compiled and cached in ~/.nv are damaged.
Can you please install the actual OpenCV library and execute following commands:
Generating bad kernel maybe leading sometimes to bad behaviour:
rm -R ~/.nv && g++ strangeOpenCLError.cpp -lOpenCL -lopencv_gpu -o strangeOpenCLError && ./strangeOpenCLError && ls -la ~/.nv/ComputeCache/*/*
Generating good kernel which performs as desired:
rm -R ~/.nv && g++ strangeOpenCLError.cpp -lOpenCL -o strangeOpenCLError && ./strangeOpenCLError && ls -la ~/.nv/ComputeCache/*/*
In my system when using -lopencv_gpu or -lopencv_core I get a kernel object in ~/.nv with a slightly other size due to sightly different binary parts. So these smaller kernels computed bad results in my systems.
The problem is that the bug does not always appear: Sometimes just when working on buffers, which are big enough. So the more relyable measurement is the different kernel-cache size. I edited the program in my question, now it is more likely that it will create the bad result.
Best regards,
Alex
PS: I also created a bug report at NVidia and it is in progress. They could reproduce the bug on their system.
To turn off Nvidia compiler cache, set env. variable CUDA_CACHE_DISABLE=1. That may helps to avoid the problem in future.
In line
m_context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, NULL);
you should use &error as last parameter to get a meaningful error. Without it I got some silly error messages. (I needed to change the platform to get my GPU board.)
I can not reproduce the error with my nVidia GeForce 8600 GTS. I get a 'Works fine'. I tried it >20 times without any issue.
I also can not see any error beside that you code is a little confusing. You should remove all commented out code and introduce some blank lines for grouping the code a little bit.
Do you have the latest drivers? The behavior you describe sounds very familiar like an uninitialized buffer or variable, but I do not see anything like that.

Extracting DCT coefficients from encoded images and video

Is there a way to easily extract the DCT coefficients (and quantization parameters) from encoded images and video? Any decoder software must be using them to decode block-DCT encoded images and video. So I'm pretty sure the decoder knows what they are. Is there a way to expose them to whomever is using the decoder?
I'm implementing some video quality assessment algorithms that work directly in the DCT domain. Currently, the majority of my code uses OpenCV, so it would be great if anyone knows of a solution using that framework. I don't mind using other libraries (perhaps libjpeg, but that seems to be for still images only), but my primary concern is to do as little format-specific work as possible (I don't want to reinvent the wheel and write my own decoders). I want to be able to open any video/image (H.264, MPEG, JPEG, etc) that OpenCV can open, and if it's block DCT-encoded, to get the DCT coefficients.
In the worst case, I know that I can write up my own block DCT code, run the decompressed frames/images through it and then I'd be back in the DCT domain. That's hardly an elegant solution, and I hope I can do better.
Presently, I use the fairly common OpenCV boilerplate to open images:
IplImage *image = cvLoadImage(filename);
// Run quality assessment metric
The code I'm using for video is equally trivial:
CvCapture *capture = cvCaptureFromAVI(filename);
while (cvGrabFrame(capture))
{
IplImage *frame = cvRetrieveFrame(capture);
// Run quality assessment metric on frame
}
cvReleaseCapture(&capture);
In both cases, I get a 3-channel IplImage in BGR format. Is there any way I can get the DCT coefficients as well?
Well, I did a bit of reading and my original question seems to be an instance of wishful thinking.
Basically, it's not possible to get the DCT coefficients from H.264 video frames for the simple reason that H.264 doesn't use DCT. It uses a different transform (integer transform). Next, the coefficients for that transform don't necessarily change on a frame-by-frame basis -- H.264 is smarter cause it splits up frames into slices. It should be possible to get those coefficients through a special decoder, but I doubt OpenCV exposes it for the user.
For JPEG, things are a bit more positive. As I suspected, libjpeg exposes the DCT coefficients for you. I wrote a small app to show that it works (source at the end). It makes a new image using the DC term from each block. Because the DC term is equal to the block average (after proper scaling), the DC images are downsampled versions of the input JPEG image.
EDIT: fixed scaling in source
Original image (512 x 512):
DC images (64x64): luma Cr Cb RGB
Source (C++):
#include <stdio.h>
#include <assert.h>
#include <cv.h>
#include <highgui.h>
extern "C"
{
#include "jpeglib.h"
#include <setjmp.h>
}
#define DEBUG 0
#define OUTPUT_IMAGES 1
/*
* Extract the DC terms from the specified component.
*/
IplImage *
extract_dc(j_decompress_ptr cinfo, jvirt_barray_ptr *coeffs, int ci)
{
jpeg_component_info *ci_ptr = &cinfo->comp_info[ci];
CvSize size = cvSize(ci_ptr->width_in_blocks, ci_ptr->height_in_blocks);
IplImage *dc = cvCreateImage(size, IPL_DEPTH_8U, 1);
assert(dc != NULL);
JQUANT_TBL *tbl = ci_ptr->quant_table;
UINT16 dc_quant = tbl->quantval[0];
#if DEBUG
printf("DCT method: %x\n", cinfo->dct_method);
printf
(
"component: %d (%d x %d blocks) sampling: (%d x %d)\n",
ci,
ci_ptr->width_in_blocks,
ci_ptr->height_in_blocks,
ci_ptr->h_samp_factor,
ci_ptr->v_samp_factor
);
printf("quantization table: %d\n", ci);
for (int i = 0; i < DCTSIZE2; ++i)
{
printf("% 4d ", (int)(tbl->quantval[i]));
if ((i + 1) % 8 == 0)
printf("\n");
}
printf("raw DC coefficients:\n");
#endif
JBLOCKARRAY buf =
(cinfo->mem->access_virt_barray)
(
(j_common_ptr)cinfo,
coeffs[ci],
0,
ci_ptr->v_samp_factor,
FALSE
);
for (int sf = 0; (JDIMENSION)sf < ci_ptr->height_in_blocks; ++sf)
{
for (JDIMENSION b = 0; b < ci_ptr->width_in_blocks; ++b)
{
int intensity = 0;
intensity = buf[sf][b][0]*dc_quant/DCTSIZE + 128;
intensity = MAX(0, intensity);
intensity = MIN(255, intensity);
cvSet2D(dc, sf, (int)b, cvScalar(intensity));
#if DEBUG
printf("% 2d ", buf[sf][b][0]);
#endif
}
#if DEBUG
printf("\n");
#endif
}
return dc;
}
IplImage *upscale_chroma(IplImage *quarter, CvSize full_size)
{
IplImage *full = cvCreateImage(full_size, IPL_DEPTH_8U, 1);
cvResize(quarter, full, CV_INTER_NN);
return full;
}
GLOBAL(int)
read_JPEG_file (char * filename, IplImage **dc)
{
/* This struct contains the JPEG decompression parameters and pointers to
* working space (which is allocated as needed by the JPEG library).
*/
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
/* More stuff */
FILE * infile; /* source file */
/* In this example we want to open the input file before doing anything else,
* so that the setjmp() error recovery below can assume the file is open.
* VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
* requires it in order to read binary files.
*/
if ((infile = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
return 0;
}
/* Step 1: allocate and initialize JPEG decompression object */
cinfo.err = jpeg_std_error(&jerr);
/* Now we can initialize the JPEG decompression object. */
jpeg_create_decompress(&cinfo);
/* Step 2: specify data source (eg, a file) */
jpeg_stdio_src(&cinfo, infile);
/* Step 3: read file parameters with jpeg_read_header() */
(void) jpeg_read_header(&cinfo, TRUE);
/* We can ignore the return value from jpeg_read_header since
* (a) suspension is not possible with the stdio data source, and
* (b) we passed TRUE to reject a tables-only JPEG file as an error.
* See libjpeg.txt for more info.
*/
/* Step 4: set parameters for decompression */
/* In this example, we don't need to change any of the defaults set by
* jpeg_read_header(), so we do nothing here.
*/
jvirt_barray_ptr *coeffs = jpeg_read_coefficients(&cinfo);
IplImage *y = extract_dc(&cinfo, coeffs, 0);
IplImage *cb_q = extract_dc(&cinfo, coeffs, 1);
IplImage *cr_q = extract_dc(&cinfo, coeffs, 2);
IplImage *cb = upscale_chroma(cb_q, cvGetSize(y));
IplImage *cr = upscale_chroma(cr_q, cvGetSize(y));
cvReleaseImage(&cb_q);
cvReleaseImage(&cr_q);
#if OUTPUT_IMAGES
cvSaveImage("y.png", y);
cvSaveImage("cb.png", cb);
cvSaveImage("cr.png", cr);
#endif
*dc = cvCreateImage(cvGetSize(y), IPL_DEPTH_8U, 3);
assert(dc != NULL);
cvMerge(y, cr, cb, NULL, *dc);
cvReleaseImage(&y);
cvReleaseImage(&cb);
cvReleaseImage(&cr);
/* Step 7: Finish decompression */
(void) jpeg_finish_decompress(&cinfo);
/* We can ignore the return value since suspension is not possible
* with the stdio data source.
*/
/* Step 8: Release JPEG decompression object */
/* This is an important step since it will release a good deal of memory. */
jpeg_destroy_decompress(&cinfo);
fclose(infile);
return 1;
}
int
main(int argc, char **argv)
{
int ret = 0;
if (argc != 2)
{
fprintf(stderr, "usage: %s filename.jpg\n", argv[0]);
return 1;
}
IplImage *dc = NULL;
ret = read_JPEG_file(argv[1], &dc);
assert(dc != NULL);
IplImage *rgb = cvCreateImage(cvGetSize(dc), IPL_DEPTH_8U, 3);
cvCvtColor(dc, rgb, CV_YCrCb2RGB);
#if OUTPUT_IMAGES
cvSaveImage("rgb.png", rgb);
#else
cvNamedWindow("DC", CV_WINDOW_AUTOSIZE);
cvShowImage("DC", rgb);
cvWaitKey(0);
#endif
cvReleaseImage(&dc);
cvReleaseImage(&rgb);
return 0;
}
You can use, libjpeg to extract dct data of your jpeg file, but for h.264 video file, I can't find any open source code that give you dct data (actully Integer dct data). But you can use h.264 open source software like JM, JSVM or x264. In these two source file, you have to find their specific function that make use of dct function, and change it to your desire form, to get your output dct data.
For Image:
use the following code, and after read_jpeg_file( infilename, v, quant_tbl ), v and quant_tbl will have dct data and quantization table of your jpeg image respectively.
I used Qvector to store my output data, change it to your preferred c++ array list.
#include <iostream>
#include <stdio.h>
#include <jpeglib.h>
#include <stdlib.h>
#include <setjmp.h>
#include <fstream>
#include <QVector>
int read_jpeg_file( char *filename, QVector<QVector<int> > &dct_coeff, QVector<unsigned short> &quant_tbl)
{
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
FILE * infile;
if ((infile = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
return 0;
}
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
jpeg_stdio_src(&cinfo, infile);
(void) jpeg_read_header(&cinfo, TRUE);
jvirt_barray_ptr *coeffs_array = jpeg_read_coefficients(&cinfo);
for (int ci = 0; ci < 1; ci++)
{
JBLOCKARRAY buffer_one;
JCOEFPTR blockptr_one;
jpeg_component_info* compptr_one;
compptr_one = cinfo.comp_info + ci;
for (int by = 0; by < compptr_one->height_in_blocks; by++)
{
buffer_one = (cinfo.mem->access_virt_barray)((j_common_ptr)&cinfo, coeffs_array[ci], by, (JDIMENSION)1, FALSE);
for (int bx = 0; bx < compptr_one->width_in_blocks; bx++)
{
blockptr_one = buffer_one[0][bx];
QVector<int> tmp;
for (int bi = 0; bi < 64; bi++)
{
tmp.append(blockptr_one[bi]);
}
dct_coeff.push_back(tmp);
}
}
}
// coantization table
j_decompress_ptr dec_cinfo = (j_decompress_ptr) &cinfo;
jpeg_component_info *ci_ptr = &dec_cinfo->comp_info[0];
JQUANT_TBL *tbl = ci_ptr->quant_table;
for(int ci =0 ; ci < 64; ci++){
quant_tbl.append(tbl->quantval[ci]);
}
return 1;
}
int main()
{
QVector<QVector<int> > v;
QVector<unsigned short> quant_tbl;
char *infilename = "your_image.jpg";
std::ofstream out;
out.open("out_dct.txt");
if( read_jpeg_file( infilename, v, quant_tbl ) > 0 ){
for(int j = 0; j < v.size(); j++ ){
for (int i = 0; i < v[0].size(); ++i){
out << v[j][i] << "\t";
}
out << "---------------" << std::endl;
}
out << "\n\n\n" << std::string(10,'-') << std::endl;
out << "\nQauntization Table:" << std::endl;
for(int i = 0; i < quant_tbl.size(); i++ ){
out << quant_tbl[i] << "\t";
}
}
else{
std::cout << "Can not read, Returned With Error";
return -1;
}
out.close();
return 0;
}

Resources