Converting from single channel 12-bit little endian raw binary data - imagemagick

I have a raw binary image file where every pixel consists of 12 bit data (gray-scale). For example, the first four pixels in the raw file:
0x0 0xC0
0x1 0x05
0x2 0x5C
0x3 0xC0
0x4 0x05
0x5 0x5C
This corresponds to 4 pixel values with the value 0x5C0 (little endian).
Unfortunately, using the following command:
convert -size 384x184 -depth 12 gray:frame_0.raw out.tiff
interprets the pixel values incorrectly (big endian), resulting in the pixel values 0xC00 0x55C 0xC00 0x55C.
I tried the options -endian LSB and -endian MSB, but unfortunately they only change the output byte order, not the input byte order.
How do I get convert to open the raw image as 12-bit little endian data?

I had a quick try at this, but I have no test data but it should be fairly close and easy to detect errors with your images:
// pad12to16.c
// Mark Setchell
// Pad 12-bit data to 16-bit
//
// Compile with:
// gcc pad12to16.c -o pad12to16
//
// Run with:
// ./pad12to16 < 12-bit.dat > 16-bit.dat
#include <stdio.h>
#include <sys/uio.h>
#include <unistd.h>
#include <sys/types.h>
#define BYTESPERREAD 6
#define PIXPERWRITE 4
int main(){
unsigned char buf[BYTESPERREAD];
unsigned short pixel[PIXPERWRITE];
// Read 6 bytes at a time and decode to 4 off 16-bit pixels
while(read(0,buf,BYTESPERREAD)==BYTESPERREAD){
pixel[0] = buf[0] | ((buf[1] & 0xf) << 8);
pixel[1] = (buf[2] << 4) | ((buf[1] & 0xf0) >> 4);
pixel[2] = buf[3] | ((buf[2] & 0xf) << 8);
pixel[3] = (buf[5] << 4) | ((buf[4] & 0xf0) >> 4);
write(1,pixel,PIXPERWRITE*2);
}
return 0;
}
So you would run this (I think):
./pad12to16 < 12-bit.dat | convert -size 384x184 -depth 16 gray:- result.tif

Mark's answer is correct, as you'll need to involve some external tool to sort-out the data stream. Usually there's some sort of padding when working with 12-bit depth. In the example blob provided, we see that the each pair of pixels share a common byte. The task of splitting the shared byte, and shifting what-to-where is fairly easy. This answer compliments Mark's answer, and argues that ImageMagick's C-API might as well be used.
// my12bit_convert.c
#include <stdio.h>
#include <stdlib.h>
#include <magick/MagickCore.h>
#include <wand/MagickWand.h>
static ExceptionType serverty;
#define LEADING_HALF(x) ((x >> 4) & 0xF)
#define FOLLOWING_HALF(x) (x & 0xF)
#define TO_DOUBLE(x) ((double)x / (double)0xFFF);
#define IS_OK(x,y) if(x == MagickFalse) { fprintf(stderr, "%s\n", MagickGetException(y, &serverty)); }
int main(int argc, const char * argv[]) {
// Prototype vars
int
i,
tmp_pixels[2];
double * pixel_buffer;
size_t
w = 0,
h =0,
total = 0,
iterator = 0;
ssize_t
x = 0,
y = 0;
const char
* path = NULL,
* output = NULL;
unsigned char read_pixel_chunk[3];
FILE * fh;
MagickWand * wand;
PixelWand * pwand;
MagickBooleanType ok;
// Iterate over arguments and collect size, input, & output.
for ( i = 1; i < argc; i++ ) {
if (argv[i][0] == '-') {
if (LocaleCompare("size", &argv[i][1]) == 0) {
i++;
if (i == argc) {
fprintf(stderr, "Missing `WxH' argument for `-size'.");
return EXIT_FAILURE;
}
GetGeometry(argv[i], &x, &y, &w, &h);
}
} else if (path == NULL){
path = argv[i];
} else {
output = argv[i];
}
}
// Validate to some degree
if ( path == NULL ) {
fprintf(stderr, "Missing input path\n");
return EXIT_FAILURE;
}
if ( output == NULL ) {
fprintf(stderr, "Missing output path\n");
return EXIT_FAILURE;
}
total = w * h;
if (total == 0) {
fprintf(stderr, "Unable to determine size of %s. (use `-size WxH')\n", path);
return EXIT_FAILURE;
}
// Allocated memory and start the party!
pixel_buffer = malloc(sizeof(double) * total);
MagickWandGenesis();
// Read input file, and sort 12-bit pixels.
fh = fopen(path, "rb");
if (fh == NULL) {
fprintf(stderr, "Unable to read `%s'\n", path);
return 1;
}
while(!feof(fh)) {
total = fread(read_pixel_chunk, 3, 1, fh);
if (total) {
// 0xC0 0x05
// ^------' ==> 0x05C0
tmp_pixels[0] = FOLLOWING_HALF(read_pixel_chunk[1]) << 8 | read_pixel_chunk[0];
// 0x05 0x5C
// '------^ ==> 0x05C0
tmp_pixels[1] = read_pixel_chunk[2] << 4 | LEADING_HALF(read_pixel_chunk[1]);
// 0x5C0 / 0xFFF ==> 0.359463
pixel_buffer[iterator++] = TO_DOUBLE(tmp_pixels[0]);
pixel_buffer[iterator++] = TO_DOUBLE(tmp_pixels[1]);
}
}
fclose(fh);
// Create image
wand = NewMagickWand();
pwand = NewPixelWand();
ok = PixelSetColor(pwand, "white");
IS_OK(ok, wand);
// Create new Image
ok = MagickNewImage(wand, w, h, pwand);
IS_OK(ok, wand);
// Import pixels as gray, or intensity, values.
ok = MagickImportImagePixels(wand, x, y, w, h, "I", DoublePixel, pixel_buffer);
IS_OK(ok, wand);
// Save ouput
ok = MagickWriteImage(wand, output);
IS_OK(ok, wand);
// Clean house
DestroyPixelWand(pwand);
DestroyMagickWand(wand);
MagickWandTerminus();
if (pixel_buffer) {
free(pixel_buffer);
}
return 0;
}
Which can be compiled with
LLVM_CFLAGS=`MagickWand-config --cflags`
LLVM_LDFLAGS=`MagickWand-config --ldflags`
clang $LLVM_CFLAGS $LLVM_LDFLAGS -o my12bit_convert my12bit_convert.c
And usage
./my12bit_convert -size 384x184 frame_0.raw out.tiff

Related

ImageMagick colors depth conversion

I have several thousands of tga files (without palette) which contain RGBA4444 data (I know usualy tga files don't contain RGBA4444 data). I would like to convert them into RGBA8888 data. I use the following command line:
convert -depth 4 woody4.tga -depth 8 woody8.tga
In this case, woody4.tga is the original RGBA4444 file, and woody8.tga the target RGBA8888 file but it doesn't change the colors of my pictures, what am I missing?
Thanks,
Pierre
Edit:
Thanks very much Mark, I have successfully converted more than 10 000 TGA with your program, the result is very good and correct to the original TGA ! this would has been impossible without the parallel command ! Just a last point, I have around 50 TGA larger (the backgrounds of the game) which are coded with RGBA5650 and not RGBA4444, how can I modify your program to manage the RGBA5650 ? Thanks very much !
Oh, I see Eric beat me to it:-)
Hey ho! I did it a different way anyway and got a different answer so you can see which one you like best. I also wrote some C but I didn't rely on any libraries, I just read the TGA and converted it to a PAM format and let ImageMagick make that into PNG afterwards at command-line.
I chose PAM because it is the simplest file to write which supports transparency - see Wikipedia on PAM format.
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
int main(int argc,char* argv[]){
unsigned char buf[64];
FILE* fp=fopen(argv[1],"rb");
if(fp==NULL){
fprintf(stderr,"ERROR: Unable to open %s\n",argv[1]);
exit(1);
}
// Read TGA header of 18 bytes, extract width and height
fread(buf,1,18,fp); // 12 bytes junk, 2 bytes width, 2 bytes height, 2 bytes junk
unsigned short w=buf[12]|(buf[13]<<8);
unsigned short h=buf[14]|(buf[15]<<8);
// Write PAM header
fprintf(stdout,"P7\n");
fprintf(stdout,"WIDTH %d\n",w);
fprintf(stdout,"HEIGHT %d\n",h);
fprintf(stdout,"DEPTH 4\n");
fprintf(stdout,"MAXVAL 255\n");
fprintf(stdout,"TUPLTYPE RGB_ALPHA\n");
fprintf(stdout,"ENDHDR\n");
// Read 2 bytes at a time RGBA4444
while(fread(buf,2,1,fp)==1){
unsigned char out[4];
out[0]=(buf[1]&0x0f)<<4;
out[1]=buf[0]&0xf0;
out[2]=(buf[0]&0x0f)<<4;
out[3]=buf[1]&0xf0;
// Write the 4 modified bytes out RGBA8888
fwrite(out,4,1,stdout);
}
fclose(fp);
return 0;
}
I the compile that with gcc:
gcc targa.c -o targa
Or you could use clang:
clang targa.c -o targa
and run it with
./targa someImage.tga > someImage.pam
and convert the PAM to PNG with ImageMagick at the command-line:
convert someImage.pam someImage.png
If you want to avoid writing the intermediate PAM file to disk, you can pipe it straight into convert like this:
./targa illu_evolution_01.tga | convert - result.png
You can, equally, make a BMP output file if you wish:
./targa illu_evolution_01.tga | convert - result.bmp
If you have thousands of files to do, and you are on a Mac or Linux, you can use GNU Parallel and get them all done in parallel much faster like this:
parallel --eta './targa {} | convert - {.}.png' ::: *.tga
If you have more than a couple of thousand files, you may get "Argument list too long" errors, in which case, use the slightly harder syntax:
find . -name \*tga -print0 | parallel -0 --eta './targa {} | convert - {.}.png'
On a Mac, you would install GNU Parallel with homebrew using:
brew install parallel
For your RGBA5650 images, I will fall back to PPM as my intermediate format because the alpha channel of PAM is no longer needed. The code will now look like this:
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
int main(int argc,char* argv[]){
unsigned char buf[64];
FILE* fp=fopen(argv[1],"rb");
if(fp==NULL){
fprintf(stderr,"ERROR: Unable to open %s\n",argv[1]);
exit(1);
}
// Read TGA header of 18 bytes, extract width and height
fread(buf,1,18,fp); // 12 bytes junk, 2 bytes width, 2 bytes height, 2 bytes junk
unsigned short w=buf[12]|(buf[13]<<8);
unsigned short h=buf[14]|(buf[15]<<8);
// Write PPM header
fprintf(stdout,"P6\n");
fprintf(stdout,"%d %d\n",w,h);
fprintf(stdout,"255\n");
// Read 2 bytes at a time RGBA5650
while(fread(buf,2,1,fp)==1){
unsigned char out[3];
out[0]=buf[1]&0xf8;
out[1]=((buf[1]&7)<<5) | ((buf[0]>>3)&0x1c);
out[2]=(buf[0]&0x1f)<<3;
// Write the 3 modified bytes out RGB888
fwrite(out,3,1,stdout);
}
fclose(fp);
return 0;
}
And will compile and run exactly the same way.
Updated answer.
After reading a few documents about TARGA format. I've revised + simplified a C program to convert.
// tga2img.c
#include <stdio.h>
#include <stdlib.h>
#include <wand/MagickWand.h>
typedef struct {
unsigned char idlength;
unsigned char colourmaptype;
unsigned char datatypecode;
short int colourmaporigin;
short int colourmaplength;
unsigned char colourmapdepth;
short int x_origin;
short int y_origin;
short int width;
short int height;
unsigned char bitsperpixel;
unsigned char imagedescriptor;
} HEADER;
typedef struct {
int extensionoffset;
int developeroffset;
char signature[16];
unsigned char p;
unsigned char n;
} FOOTER;
int main(int argc, const char * argv[]) {
HEADER tga_header;
FOOTER tga_footer;
FILE
* fd;
size_t
tga_data_size,
tga_pixel_size,
i,
j;
unsigned char
* tga_data,
* buffer;
const char
* input,
* output;
if (argc != 3) {
printf("Usage:\n\t %s <input> <output>\n", argv[0]);
return 1;
}
input = argv[1];
output = argv[2];
fd = fopen(input, "rb");
if (fd == NULL) {
fprintf(stderr, "Unable to read TGA input\n");
return 1;
}
/********\
* TARGA *
\*********/
#pragma mark TARGA
// Read TGA header
fread(&tga_header.idlength, sizeof(unsigned char), 1, fd);
fread(&tga_header.colourmaptype, sizeof(unsigned char), 1, fd);
fread(&tga_header.datatypecode, sizeof(unsigned char), 1, fd);
fread(&tga_header.colourmaporigin, sizeof( short int), 1, fd);
fread(&tga_header.colourmaplength, sizeof( short int), 1, fd);
fread(&tga_header.colourmapdepth, sizeof(unsigned char), 1, fd);
fread(&tga_header.x_origin, sizeof( short int), 1, fd);
fread(&tga_header.y_origin, sizeof( short int), 1, fd);
fread(&tga_header.width, sizeof( short int), 1, fd);
fread(&tga_header.height, sizeof( short int), 1, fd);
fread(&tga_header.bitsperpixel, sizeof(unsigned char), 1, fd);
fread(&tga_header.imagedescriptor, sizeof(unsigned char), 1, fd);
// Calculate sizes
tga_pixel_size = tga_header.bitsperpixel / 8;
tga_data_size = tga_header.width * tga_header.height * tga_pixel_size;
// Read image data
tga_data = malloc(tga_data_size);
fread(tga_data, 1, tga_data_size, fd);
// Read TGA footer.
fseek(fd, -26, SEEK_END);
fread(&tga_footer.extensionoffset, sizeof( int), 1, fd);
fread(&tga_footer.developeroffset, sizeof( int), 1, fd);
fread(&tga_footer.signature, sizeof( char), 16, fd);
fread(&tga_footer.p, sizeof(unsigned char), 1, fd);
fread(&tga_footer.n, sizeof(unsigned char), 1, fd);
fclose(fd);
buffer = malloc(tga_header.width * tga_header.height * 4);
#pragma mark RGBA4444 to RGBA8888
for (i = 0, j=0; i < tga_data_size; i+= tga_pixel_size) {
buffer[j++] = (tga_data[i+1] & 0x0f) << 4; // Red
buffer[j++] = tga_data[i ] & 0xf0; // Green
buffer[j++] = (tga_data[i ] & 0x0f) << 4; // Blue
buffer[j++] = tga_data[i+1] & 0xf0; // Alpha
}
free(tga_data);
/***************\
* IMAGEMAGICK *
\***************/
#pragma mark IMAGEMAGICK
MagickWandGenesis();
PixelWand * background;
background = NewPixelWand();
PixelSetColor(background, "none");
MagickWand * wand;
wand = NewMagickWand();
MagickNewImage(wand,
tga_header.width,
tga_header.height,
background);
background = DestroyPixelWand(background);
MagickImportImagePixels(wand,
0,
0,
tga_header.width,
tga_header.height,
"RGBA",
CharPixel,
buffer);
free(buffer);
MagickWriteImage(wand, argv[2]);
wand = DestroyMagickWand(wand);
return 0;
}
Which can be compiled with clang $(MagickWand-config --cflags --libs) -o tga2im tga2im.c, and can be executed simply by ./tga2im N_birthday_0000.tga N_birthday_0000.tga.png.
Original answer.
The only way I can think of converting the images is to author a quick program/script to do the bitwise color-pixel logic.
This answer offers a quick way to read the image data; so combining with MagickWand, can be converted easily. (Although I know there'll be better solutions found on old game-dev forums...)
#include <stdio.h>
#include <stdbool.h>
#include <wand/MagickWand.h>
typedef struct
{
unsigned char imageTypeCode;
short int imageWidth;
short int imageHeight;
unsigned char bitCount;
unsigned char *imageData;
} TGAFILE;
bool LoadTGAFile(const char *filename, TGAFILE *tgaFile);
int main(int argc, const char * argv[]) {
const char
* input,
* output;
if (argc != 3) {
printf("Usage:\n\t%s <input> <output>\n", argv[0]);
}
input = argv[1];
output = argv[2];
MagickWandGenesis();
TGAFILE header;
if (LoadTGAFile(input, &header) == true) {
// Build a blank canvas image matching TGA file.
MagickWand * wand;
wand = NewMagickWand();
PixelWand * background;
background = NewPixelWand();
PixelSetColor(background, "NONE");
MagickNewImage(wand, header.imageWidth, header.imageHeight, background);
background = DestroyPixelWand(background);
// Allocate RGBA8888 buffer
unsigned char * buffer = malloc(header.imageWidth * header.imageHeight * 4);
// Iterate over TGA image data, and convert RGBA4444 to RGBA8888;
size_t pixel_size = header.bitCount / 8;
size_t total_bytes = header.imageWidth * header.imageHeight * pixel_size;
for (int i = 0, j = 0; i < total_bytes; i+=pixel_size) {
// Red
buffer[j++] = (header.imageData[i ] & 0x0f) << 4;
// Green
buffer[j++] = (header.imageData[i ] & 0xf0);
// Blue
buffer[j++] = (header.imageData[i+1] & 0xf0) << 4;
// Alpha
buffer[j++] = (header.imageData[i+1] & 0xf0);
}
// Import image data over blank canvas
MagickImportImagePixels(wand, 0, 0, header.imageWidth, header.imageHeight, "RGBA", CharPixel, buffer);
// Write image
MagickWriteImage(wand, output);
wand = DestroyMagickWand(wand);
} else {
fprintf(stderr, "Could not read TGA file %s\n", input);
}
MagickWandTerminus();
return 0;
}
/*
* Method copied verbatim from https://stackoverflow.com/a/7050007/438117
* Show your love by +1 to Wroclai answer.
*/
bool LoadTGAFile(const char *filename, TGAFILE *tgaFile)
{
FILE *filePtr;
unsigned char ucharBad;
short int sintBad;
long imageSize;
int colorMode;
unsigned char colorSwap;
// Open the TGA file.
filePtr = fopen(filename, "rb");
if (filePtr == NULL)
{
return false;
}
// Read the two first bytes we don't need.
fread(&ucharBad, sizeof(unsigned char), 1, filePtr);
fread(&ucharBad, sizeof(unsigned char), 1, filePtr);
// Which type of image gets stored in imageTypeCode.
fread(&tgaFile->imageTypeCode, sizeof(unsigned char), 1, filePtr);
// For our purposes, the type code should be 2 (uncompressed RGB image)
// or 3 (uncompressed black-and-white images).
if (tgaFile->imageTypeCode != 2 && tgaFile->imageTypeCode != 3)
{
fclose(filePtr);
return false;
}
// Read 13 bytes of data we don't need.
fread(&sintBad, sizeof(short int), 1, filePtr);
fread(&sintBad, sizeof(short int), 1, filePtr);
fread(&ucharBad, sizeof(unsigned char), 1, filePtr);
fread(&sintBad, sizeof(short int), 1, filePtr);
fread(&sintBad, sizeof(short int), 1, filePtr);
// Read the image's width and height.
fread(&tgaFile->imageWidth, sizeof(short int), 1, filePtr);
fread(&tgaFile->imageHeight, sizeof(short int), 1, filePtr);
// Read the bit depth.
fread(&tgaFile->bitCount, sizeof(unsigned char), 1, filePtr);
// Read one byte of data we don't need.
fread(&ucharBad, sizeof(unsigned char), 1, filePtr);
// Color mode -> 3 = BGR, 4 = BGRA.
colorMode = tgaFile->bitCount / 8;
imageSize = tgaFile->imageWidth * tgaFile->imageHeight * colorMode;
// Allocate memory for the image data.
tgaFile->imageData = (unsigned char*)malloc(sizeof(unsigned char)*imageSize);
// Read the image data.
fread(tgaFile->imageData, sizeof(unsigned char), imageSize, filePtr);
// Change from BGR to RGB so OpenGL can read the image data.
for (int imageIdx = 0; imageIdx < imageSize; imageIdx += colorMode)
{
colorSwap = tgaFile->imageData[imageIdx];
tgaFile->imageData[imageIdx] = tgaFile->imageData[imageIdx + 2];
tgaFile->imageData[imageIdx + 2] = colorSwap;
}
fclose(filePtr);
return true;
}
The order of the color channels may need to be switch around.
I have been thinking about this some more and it ought to be possible to reconstruct the image without any special software - I can't quite see my mistake for the moment by maybe #emcconville can cast your expert eye over it and point out my mistake! Pretty please?
So, my concept is that ImageMagick has read in the image size and pixel data correctly but has just allocated the bits according to the standard RGB5551 interpretation of a TARGA file rather than RGBA4444. So, we rebuild the 16-bits of data it read and split them differently.
The first line below does the rebuild into the original 16-bit data, then each subsequent line splits out one of the RGBA channels and then we recombine them:
convert illu_evolution_01.tga -depth 16 -channel R -fx "(((r*255)<<10) | ((g*255)<<5) | (b*255) | ((a*255)<<15))/255" \
\( -clone 0 -channel R -fx "((((r*255)>>12)&15)<<4)/255" \) \
\( -clone 0 -channel R -fx "((((r*255)>>8 )&15)<<4)/255" \) \
\( -clone 0 -channel R -fx "((((r*255) )&15)<<4)/255" \) \
-delete 0 -set colorspace RGB -combine -colorspace sRGB result.png
# The rest is just debug so you can see the reconstructed channels in [rgba].png
convert result.png -channel R -separate r.png
convert result.png -channel G -separate g.png
convert result.png -channel B -separate b.png
convert result.png -channel A -separate a.png
So, the following diagram represents the 16-bits of 1 pixel:
A R R R R R G G G G G B B B B B <--- what IM saw
R R R R G G G G B B B B A A A A <--- what it really meant
Yes, I have disregarded the alpha channel for the moment.

ImageMagick load image into RAM

I have a JPG picture on which I'd like to perform some operations in order to use pattern recognition. The picture is being rotated and also some filters like color inversion, greyscale,.. are applied
The program goes like this
for(i=0;i<360;i++){
rotate(pic,i);
foreach(filter as f){
f(pic);
recognize(pic);
}
}
In order to increase speed I'd like to have the source image loaded in RAM and then read from there. Is it possible?
You can write the image to mpr:, or clone the image instance to a new structure. Regardless if where the original source is in memory, you will still need to copy the data in the first for loop. Here's an example, in C, that holds a wand instance and clones each iteration.
#include <stdio.h>
#include <MagickWand/MagickWand.h>
void rotate(MagickWand * wand, double degree) {
PixelWand * pwand = NewPixelWand();
PixelSetColor(pwand, "white");
MagickRotateImage(wand, pwand, degree);
DestroyPixelWand(pwand);
}
void _f(MagickWand * wand, FilterTypes filter) {
double x,y;
x = y = 0.0;
MagickResampleImage(wand, x, y, filter);
}
void recognize(MagickWand * wand) {
// ???
}
int main(int argc, const char * argv[]) {
MagickWandGenesis();
MagickWand * wand, * copy_wand;
wand = NewMagickWand();
MagickReadImage(wand, "rose:");
for ( int i = 0; i < 360 ; i++ ) {
copy_wand = CloneMagickWand(wand);
for ( FilterTypes f = UndefinedFilter; f < SentinelFilter; f++ ) {
_f(copy_wand, f);
recognize(copy_wand);
}
}
MagickWandTerminus();
return 0;
}
The MPR writes to a specific page in memory, and can be identified by a user defined label.
MagickReadImage(wand, "rose:");
MagickWriteImage(wand, "mpr:original"); // Save image to "original" label
for ( int i = 0; i < 360 ; i++ ) {
copy_wand = NewMagickWand();
MagickReadImage(copy_wand, "mpr:original"); // Read image from "original" label
for ( FilterTypes f = UndefinedFilter; f < SentinelFilter; f++ ) {
_f(copy_wand, f);
recognize(copy_wand);
}
copy_wand = DestroyMagickWand(copy_wand);
}
The last option I can think of is to copy the image pixel-data into memory, and re-reference it with each iteration. This allows some performance improvements, and I'm thinking OpenMP, but you'll loose a lot of helper methods.
MagickReadImage(wand, "rose:");
size_t w = MagickGetImageWidth(wand);
size_t h = MagickGetImageHeight(wand);
size_t data_length = w * h * 4;
char * data = malloc(data_length);
MagickExportImagePixels(wand, 0, 0, w, h, "RGBA", CharPixel, (void *)data);
for ( int i = 0; i < 360; i++ ) {
long * copy_data = malloc(data_length);
memcpy(copy_data, data, data_length);
As you haven't specified a language or an operating system, I'll show you how to do that with Magick++ in C++ in a Linux/OSX environment:
#include <Magick++.h>
#include <iostream>
using namespace std;
using namespace Magick;
int main(int argc,char **argv)
{
InitializeMagick(*argv);
// Create an image object
Image image;
// Read a file into image object
image.read( "input.gif" );
// Crop the image to specified size (width, height, xOffset, yOffset)
image.crop( Geometry(100,100, 0, 0) );
// Repage the image to forget it was part of something bigger
image.repage();
// Write the image to a file
image.write( "result.gif" );
return 0;
}
Compile with:
g++ -o program program.cpp `Magick++-config --cppflags --cxxflags --ldflags --libs`
You will need an image called input.gif for it to read and that should be bigger than 100x100, so create one with:
convert -size 256x256 xc:gray +noise random input.gif

Openni opencv kinect Bad Memory allocation

Basically I've got a loop which goes through all the kinects depth pixels. If they are greater than 3000mm it sets the pixel value to black.
For some reason this works only at a close range while pointed to a wall. If I pull the kinect back (giving it a larger area to scan) I get a Bad Memory allocation error. My code can be found below. I get the bad memory allocation error inside that try catch statement. Most of the code is from the opencv kinect sample here and here.
i figured out the problem, its because the depth values are stored in an array instead of matrix, i need a better way of finding out which location in the array, the x.y of the pixels which start from 1,1 point to instead of the (i = x+y*640)
#include <opencv.hpp>
#include <iostream>
#include <string>
#include <stdio.h>
#include <OpenNI.h>
using namespace std;
using namespace cv;
int main()
{
openni::Device device;
openni::VideoStream depth;
const char* device_uri = openni::ANY_DEVICE;
openni::Status ret = openni::OpenNI::initialize();
// Open
ret =device.open( device_uri );
ret = depth.create( device, openni::SENSOR_DEPTH );
if ( ret == openni::STATUS_OK )
{
// Start Depth
depth.start();
}
// Get Depth Stream Min-Max Value
int minDepthValue = depth.getMinPixelValue();
int maxDepthValue = depth.getMaxPixelValue();
//cout << "Depth min-Max Value : " << minDepthValue << "-" << maxDepthValue << endl;
// Frame Information Reference
openni::VideoFrameRef depthFrame;
// Get Sensor Resolution Information
int dImgWidth = depth.getVideoMode().getResolutionX();
int dImgHeight = depth.getVideoMode().getResolutionY();
// Depth Image Matrix
cv::Mat dImg = cv::Mat( dImgHeight, dImgWidth, CV_8UC3 );
Mat grey= cvCreateImage(cvSize(640, 480), 8, 1); ;
for(;;)
{
depth.readFrame( &depthFrame );
openni::DepthPixel* depthImgRaw = (openni::DepthPixel*)depthFrame.getData();
for ( int i = 0 ; i < ( depthFrame.getDataSize() / sizeof( openni::DepthPixel ) ) ; i++ )
{
int idx = i * 3; // Grayscale
unsigned char* data = &dImg.data[idx];
int gray_scale = ( ( depthImgRaw[i] * 255 ) / ( maxDepthValue - minDepthValue ) );
data[0] = (unsigned char)~gray_scale;
data[1] = (unsigned char)~gray_scale;
data[2] = (unsigned char)~gray_scale;
}
openni::DepthPixel* depthpixels = (openni::DepthPixel*)depthFrame.getData();
cvtColor(dImg, grey, CV_RGB2GRAY);
int i ;
try{
for( int y =0; y < 480 ; y++){
//getting in to each pixel in a row
for(int x = 0; x < 640; x++){
//getting out the corresponding pixel value from the array
i = x+y*640;
if (depthpixels[i] >3000)
{
grey.at<unsigned char>(x,y) = 0;
}
}
}
}catch(exception e)
{cout << e.what() <<endl ;
cout <<depthpixels[i] <<endl ;
cout << i <<endl ;
}
// cv:imshow( "depth", dImg );
imshow("dpeth2", grey);
int k = cvWaitKey( 30 ); // About 30fps
if ( k == 0x1b )
break;
}
// Destroy Streams
depth.destroy();
// Close Device
device.close();
// Shutdown OpenNI
openni::OpenNI::shutdown();
return 0;
}
solved the problem simply by swapping my x and y around
for( y =0; y < 480 ; y++)
{
//getting in to each pixel in a row
for( x = 0; x < 640; x++)
{
if (depthpixels[i]>1500)
{
grey.at<unsigned char >(y,x) = 0;
}
if (depthpixels[i] <500)
{
grey.at<unsigned char >(y,x) = 0;
}
i++;
}
}

On iOS how to quickly convert RGB24 to BGR24?

I use vImageConvert_RGB888toPlanar8 and vImageConvert_Planar8toRGB888 from Accelerate.framework to convert RGB24 to BGR24, but when the data need to transform is very big, such as 3M or 4M, the time need to spend on this is about 10ms. So some one know some fast enough idea?.My code like this:
- (void)transformRGBToBGR:(const UInt8 *)pict{
rgb.data = (void *)pict;
vImage_Error error = vImageConvert_RGB888toPlanar8(&rgb,&red,&green,&blue,kvImageNoFlags);
if (error != kvImageNoError) {
NSLog(#"vImageConvert_RGB888toARGB8888 error");
}
error = vImageConvert_Planar8toRGB888(&blue,&green,&red,&bgr,kvImageNoFlags);
if (error != kvImageNoError) {
NSLog(#"vImagePermuteChannels_ARGB8888 error");
}
free((void *)pict);
}
With a RGB888ToPlanar8 call you scatter the data and then gather it once again. This is very-very-very bad. If the memory overhead of 33% is affordable, try using the RGBA format and permute the B/R bytes in-place.
If you want to save 33% percents, then I might suggest the following. Iterate all the pixels, but read only a multiple of 4 bytes (since lcm(3,4) is 12, that is 3 dwords).
uint8_t* src_image;
uint8_t* dst_image;
uint32_t* src = (uint32_t*)src_image;
uint32_t* dst = (uint32_t*)dst_image;
uint32_t v1, v2, v3;
uint32_t nv1, nv2, nv3;
for(int i = 0 ; i < num_pixels / 12 ; i++)
{
// read 12 bytes
v1 = *src++;
v2 = *src++;
v3 = *src++;
// shuffle bits in the pixels
// [R1 G1 B1 R2 | G2 B2 R3 G3 | B3 R4 G4 B4]
nv1 = // [B1 G1 R1 B2]
((v1 >> 8) & 0xFF) | (v1 & 0x00FF0000) | ((v1 >> 16) & 0xFF) | ((v2 >> 24) & 0xFF);
nv2 = // [G2 R2 B3 G3]
...
nv3 = // [R3 B4 G4 R4]
...
// write 12 bytes
*dst++ = nv1;
*dst++ = nv2;
*dst++ = nv3;
}
Even better can be done with NEON intrinsics.
See this link from ARM's website to see how the 24-bit swapping is done.
The BGR-to-RGB can be done in-place like this:
void neon_asm_convert_BGR_TO_RGB(uint8_t* img, int numPixels24)
{
// numPixels is divided by 24
__asm__ volatile(
"0: \n"
"# load 3 64-bit regs with interleave: \n"
"vld3.8 {d0,d1,d2}, [%0] \n"
"# swap d0 and d2 - R and B\n"
"vswp d0, d2 \n"
"# store 3 64-bit regs: \n"
"vst3.8 {d0,d1,d2}, [%0]! \n"
"subs %1, %1, #1 \n"
"bne 0b \n"
:
: "r"(img), "r"(numPixels24)
: "r4", "r5"
);
}
Just swap the channels - BGRA to RGBA
- (void)convertBGRAFrame:(const CLPBasicVideoFrame &)bgraFrame toRGBA:(CLPBasicVideoFrame &)rgbaFrame
{
vImage_Buffer bgraImageBuffer = {
.width = bgraFrame.width,
.height = bgraFrame.height,
.rowBytes = bgraFrame.bytesPerRow,
.data = bgraFrame.rawPixelData
};
vImage_Buffer rgbaImageBuffer = {
.width = rgbaFrame.width,
.height = rgbaFrame.height,
.rowBytes = rgbaFrame.bytesPerRow,
.data = rgbaFrame.rawPixelData
};
const uint8_t byteSwapMap[4] = { 2, 1, 0, 3 };
vImage_Error error;
error = vImagePermuteChannels_ARGB8888(&bgraImageBuffer, &rgbaImageBuffer, byteSwapMap, kvImageNoFlags);
if (error != kvImageNoError) {
NSLog(#"%s, vImage error %zd", __PRETTY_FUNCTION__, error);
}
}

Extracting DCT coefficients from encoded images and video

Is there a way to easily extract the DCT coefficients (and quantization parameters) from encoded images and video? Any decoder software must be using them to decode block-DCT encoded images and video. So I'm pretty sure the decoder knows what they are. Is there a way to expose them to whomever is using the decoder?
I'm implementing some video quality assessment algorithms that work directly in the DCT domain. Currently, the majority of my code uses OpenCV, so it would be great if anyone knows of a solution using that framework. I don't mind using other libraries (perhaps libjpeg, but that seems to be for still images only), but my primary concern is to do as little format-specific work as possible (I don't want to reinvent the wheel and write my own decoders). I want to be able to open any video/image (H.264, MPEG, JPEG, etc) that OpenCV can open, and if it's block DCT-encoded, to get the DCT coefficients.
In the worst case, I know that I can write up my own block DCT code, run the decompressed frames/images through it and then I'd be back in the DCT domain. That's hardly an elegant solution, and I hope I can do better.
Presently, I use the fairly common OpenCV boilerplate to open images:
IplImage *image = cvLoadImage(filename);
// Run quality assessment metric
The code I'm using for video is equally trivial:
CvCapture *capture = cvCaptureFromAVI(filename);
while (cvGrabFrame(capture))
{
IplImage *frame = cvRetrieveFrame(capture);
// Run quality assessment metric on frame
}
cvReleaseCapture(&capture);
In both cases, I get a 3-channel IplImage in BGR format. Is there any way I can get the DCT coefficients as well?
Well, I did a bit of reading and my original question seems to be an instance of wishful thinking.
Basically, it's not possible to get the DCT coefficients from H.264 video frames for the simple reason that H.264 doesn't use DCT. It uses a different transform (integer transform). Next, the coefficients for that transform don't necessarily change on a frame-by-frame basis -- H.264 is smarter cause it splits up frames into slices. It should be possible to get those coefficients through a special decoder, but I doubt OpenCV exposes it for the user.
For JPEG, things are a bit more positive. As I suspected, libjpeg exposes the DCT coefficients for you. I wrote a small app to show that it works (source at the end). It makes a new image using the DC term from each block. Because the DC term is equal to the block average (after proper scaling), the DC images are downsampled versions of the input JPEG image.
EDIT: fixed scaling in source
Original image (512 x 512):
DC images (64x64): luma Cr Cb RGB
Source (C++):
#include <stdio.h>
#include <assert.h>
#include <cv.h>
#include <highgui.h>
extern "C"
{
#include "jpeglib.h"
#include <setjmp.h>
}
#define DEBUG 0
#define OUTPUT_IMAGES 1
/*
* Extract the DC terms from the specified component.
*/
IplImage *
extract_dc(j_decompress_ptr cinfo, jvirt_barray_ptr *coeffs, int ci)
{
jpeg_component_info *ci_ptr = &cinfo->comp_info[ci];
CvSize size = cvSize(ci_ptr->width_in_blocks, ci_ptr->height_in_blocks);
IplImage *dc = cvCreateImage(size, IPL_DEPTH_8U, 1);
assert(dc != NULL);
JQUANT_TBL *tbl = ci_ptr->quant_table;
UINT16 dc_quant = tbl->quantval[0];
#if DEBUG
printf("DCT method: %x\n", cinfo->dct_method);
printf
(
"component: %d (%d x %d blocks) sampling: (%d x %d)\n",
ci,
ci_ptr->width_in_blocks,
ci_ptr->height_in_blocks,
ci_ptr->h_samp_factor,
ci_ptr->v_samp_factor
);
printf("quantization table: %d\n", ci);
for (int i = 0; i < DCTSIZE2; ++i)
{
printf("% 4d ", (int)(tbl->quantval[i]));
if ((i + 1) % 8 == 0)
printf("\n");
}
printf("raw DC coefficients:\n");
#endif
JBLOCKARRAY buf =
(cinfo->mem->access_virt_barray)
(
(j_common_ptr)cinfo,
coeffs[ci],
0,
ci_ptr->v_samp_factor,
FALSE
);
for (int sf = 0; (JDIMENSION)sf < ci_ptr->height_in_blocks; ++sf)
{
for (JDIMENSION b = 0; b < ci_ptr->width_in_blocks; ++b)
{
int intensity = 0;
intensity = buf[sf][b][0]*dc_quant/DCTSIZE + 128;
intensity = MAX(0, intensity);
intensity = MIN(255, intensity);
cvSet2D(dc, sf, (int)b, cvScalar(intensity));
#if DEBUG
printf("% 2d ", buf[sf][b][0]);
#endif
}
#if DEBUG
printf("\n");
#endif
}
return dc;
}
IplImage *upscale_chroma(IplImage *quarter, CvSize full_size)
{
IplImage *full = cvCreateImage(full_size, IPL_DEPTH_8U, 1);
cvResize(quarter, full, CV_INTER_NN);
return full;
}
GLOBAL(int)
read_JPEG_file (char * filename, IplImage **dc)
{
/* This struct contains the JPEG decompression parameters and pointers to
* working space (which is allocated as needed by the JPEG library).
*/
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
/* More stuff */
FILE * infile; /* source file */
/* In this example we want to open the input file before doing anything else,
* so that the setjmp() error recovery below can assume the file is open.
* VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
* requires it in order to read binary files.
*/
if ((infile = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
return 0;
}
/* Step 1: allocate and initialize JPEG decompression object */
cinfo.err = jpeg_std_error(&jerr);
/* Now we can initialize the JPEG decompression object. */
jpeg_create_decompress(&cinfo);
/* Step 2: specify data source (eg, a file) */
jpeg_stdio_src(&cinfo, infile);
/* Step 3: read file parameters with jpeg_read_header() */
(void) jpeg_read_header(&cinfo, TRUE);
/* We can ignore the return value from jpeg_read_header since
* (a) suspension is not possible with the stdio data source, and
* (b) we passed TRUE to reject a tables-only JPEG file as an error.
* See libjpeg.txt for more info.
*/
/* Step 4: set parameters for decompression */
/* In this example, we don't need to change any of the defaults set by
* jpeg_read_header(), so we do nothing here.
*/
jvirt_barray_ptr *coeffs = jpeg_read_coefficients(&cinfo);
IplImage *y = extract_dc(&cinfo, coeffs, 0);
IplImage *cb_q = extract_dc(&cinfo, coeffs, 1);
IplImage *cr_q = extract_dc(&cinfo, coeffs, 2);
IplImage *cb = upscale_chroma(cb_q, cvGetSize(y));
IplImage *cr = upscale_chroma(cr_q, cvGetSize(y));
cvReleaseImage(&cb_q);
cvReleaseImage(&cr_q);
#if OUTPUT_IMAGES
cvSaveImage("y.png", y);
cvSaveImage("cb.png", cb);
cvSaveImage("cr.png", cr);
#endif
*dc = cvCreateImage(cvGetSize(y), IPL_DEPTH_8U, 3);
assert(dc != NULL);
cvMerge(y, cr, cb, NULL, *dc);
cvReleaseImage(&y);
cvReleaseImage(&cb);
cvReleaseImage(&cr);
/* Step 7: Finish decompression */
(void) jpeg_finish_decompress(&cinfo);
/* We can ignore the return value since suspension is not possible
* with the stdio data source.
*/
/* Step 8: Release JPEG decompression object */
/* This is an important step since it will release a good deal of memory. */
jpeg_destroy_decompress(&cinfo);
fclose(infile);
return 1;
}
int
main(int argc, char **argv)
{
int ret = 0;
if (argc != 2)
{
fprintf(stderr, "usage: %s filename.jpg\n", argv[0]);
return 1;
}
IplImage *dc = NULL;
ret = read_JPEG_file(argv[1], &dc);
assert(dc != NULL);
IplImage *rgb = cvCreateImage(cvGetSize(dc), IPL_DEPTH_8U, 3);
cvCvtColor(dc, rgb, CV_YCrCb2RGB);
#if OUTPUT_IMAGES
cvSaveImage("rgb.png", rgb);
#else
cvNamedWindow("DC", CV_WINDOW_AUTOSIZE);
cvShowImage("DC", rgb);
cvWaitKey(0);
#endif
cvReleaseImage(&dc);
cvReleaseImage(&rgb);
return 0;
}
You can use, libjpeg to extract dct data of your jpeg file, but for h.264 video file, I can't find any open source code that give you dct data (actully Integer dct data). But you can use h.264 open source software like JM, JSVM or x264. In these two source file, you have to find their specific function that make use of dct function, and change it to your desire form, to get your output dct data.
For Image:
use the following code, and after read_jpeg_file( infilename, v, quant_tbl ), v and quant_tbl will have dct data and quantization table of your jpeg image respectively.
I used Qvector to store my output data, change it to your preferred c++ array list.
#include <iostream>
#include <stdio.h>
#include <jpeglib.h>
#include <stdlib.h>
#include <setjmp.h>
#include <fstream>
#include <QVector>
int read_jpeg_file( char *filename, QVector<QVector<int> > &dct_coeff, QVector<unsigned short> &quant_tbl)
{
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
FILE * infile;
if ((infile = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
return 0;
}
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
jpeg_stdio_src(&cinfo, infile);
(void) jpeg_read_header(&cinfo, TRUE);
jvirt_barray_ptr *coeffs_array = jpeg_read_coefficients(&cinfo);
for (int ci = 0; ci < 1; ci++)
{
JBLOCKARRAY buffer_one;
JCOEFPTR blockptr_one;
jpeg_component_info* compptr_one;
compptr_one = cinfo.comp_info + ci;
for (int by = 0; by < compptr_one->height_in_blocks; by++)
{
buffer_one = (cinfo.mem->access_virt_barray)((j_common_ptr)&cinfo, coeffs_array[ci], by, (JDIMENSION)1, FALSE);
for (int bx = 0; bx < compptr_one->width_in_blocks; bx++)
{
blockptr_one = buffer_one[0][bx];
QVector<int> tmp;
for (int bi = 0; bi < 64; bi++)
{
tmp.append(blockptr_one[bi]);
}
dct_coeff.push_back(tmp);
}
}
}
// coantization table
j_decompress_ptr dec_cinfo = (j_decompress_ptr) &cinfo;
jpeg_component_info *ci_ptr = &dec_cinfo->comp_info[0];
JQUANT_TBL *tbl = ci_ptr->quant_table;
for(int ci =0 ; ci < 64; ci++){
quant_tbl.append(tbl->quantval[ci]);
}
return 1;
}
int main()
{
QVector<QVector<int> > v;
QVector<unsigned short> quant_tbl;
char *infilename = "your_image.jpg";
std::ofstream out;
out.open("out_dct.txt");
if( read_jpeg_file( infilename, v, quant_tbl ) > 0 ){
for(int j = 0; j < v.size(); j++ ){
for (int i = 0; i < v[0].size(); ++i){
out << v[j][i] << "\t";
}
out << "---------------" << std::endl;
}
out << "\n\n\n" << std::string(10,'-') << std::endl;
out << "\nQauntization Table:" << std::endl;
for(int i = 0; i < quant_tbl.size(); i++ ){
out << quant_tbl[i] << "\t";
}
}
else{
std::cout << "Can not read, Returned With Error";
return -1;
}
out.close();
return 0;
}

Resources