SimpleCV blob tight cropping - orientation

I have an image of a large number of elliptical objects against a dark background. The objects are oriented in many different directions. I need to extract them so that they are all oriented in the same direction (i.e., horizontally ) so that they can be tightly cropped.
I have successfully used findBlobs() and crop to extract the individual objects but the cropped images preserves their orientation in the original image. I have also successfully rotated the individual objects so that are horizontal but this usually chops off the ends of the objects.
Because I know the coordinates and the angle the major axis makes with the x axis of the original image, I have tried to step through each object's angle then use findBlobs() to crop only those blobs that have an angle =0.
I might be making this more difficult than it has to be. So I need some advice.
Here is the code:
from SimpleCV import *
from operator import itemgetter, attrgetter, methodcaller
def rotatedRectWithMaxArea(w, h, angle):
"""
Given a rectangle of size wxh that has been rotated by 'angle' (in
radians), computes the width and height of the largest possible
axis-aligned rectangle (maximal area) within the rotated rectangle.
http://stackoverflow.com/questions/16702966/rotate-image-and-crop-out- black-borders
"""
if w <= 0 or h <= 0:
return 0,0
width_is_longer = w >= h
side_long, side_short = (w,h) if width_is_longer else (h,w)
# since the solutions for angle, -angle and 180-angle are all the same,
# if suffices to look at the first quadrant and the absolute values of sin,cos:
sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle))
if side_short <= 2.*sin_a*cos_a*side_long:
# half constrained case: two crop corners touch the longer side,
# the other two corners are on the mid-line parallel to the longer line
x = 0.5*side_short
wr,hr = (x/sin_a,x/cos_a) if width_is_longer else (x/cos_a,x/sin_a)
else:
# fully constrained case: crop touches all 4 sides
cos_2a = cos_a*cos_a - sin_a*sin_a
wr,hr = (w*cos_a - h*sin_a)/cos_2a, (h*cos_a - w*sin_a)/cos_2a
return wr,hr
Ellipses=Image("Elliptical.jpg")
#now find the location and angle of the blobs
blobs=Ellipses.findBlobs()
for b in blobs:
r=round(b.angle(),0)
[x,y]=b.coordinates()
#now that we know the angles and coordinates of each blob rotate the original image and
#apply findBlobs iteratively
Ak=0
for angle in range (0,len(r)):
[L,W]=Ellipses.size()
print ("Ellipse Image Length =", L, "Width=",W)
Ellipses1=Image("Elliptical.jpg")
Ellipses1r=Ellipses1.rotate(angle)
[wr,lr]=rotatedRectWithMaxArea(W,L,angle)
print ("largest dimensions w, l = ",round(wr,0),round(lr,0))
Ellipses1r.crop(L/2,W/2,lr,wr,centered=True)
Ellipses1r.save("cropped_rotated"+str(Ak)+".png")
blobs1=Ellipses1.findBlobs()
Ak +=1

I successfully figured out how to crop a number of randomly oriented ellipses so that they were all horizontally oriented and uniformly cropped. It's probably not the most elegant approach but it works. I first found the maximum length and angle of the blobs
blobs=Ellipses.findBlobs()
k=0
x=[b for b in blobs]
y=[b for b in blobs]
r=[b for b in blobs]
bl=[b for b in blobs]
#bl[k]=b.length()+.2*b.length()
#set crop to the largest blob
bl[k]=b.length()
if [x[k],y[k]]==[blobs[-1].x,blobs[-1].y]:
#print ("the largest blob has length =", bl[k])
bigX=x[k]
bigY=y[k]
bigR=r[k]
bigL=bl[k]
ar[k]=b.aspectRatio()
bw[k]=bl[k]*ar[k]
#print ("angle=",round(r[k],0),"and coordinates=",
x[k],y[k],"length=",bl[k],"width=",bw[k])
k+=1bw=[b for b in blobs]
ar=[b for b in blobs]
#largest Blob is
biggest=blobs[-1]
print ("the largest blob has length =", biggest)
for b in blobs:
r[k]=round(b.angle(),0)
[x[k],y[k]]=b.coordinates()
, then sizing a square crop based on the maximum blob.
Ak=0
for b in blobs:
#print "number of blobs=",len(blobs)
angleset=r[Ak]
Ellipses1=Image(FN)
#rotate whole image to make target blob horizontal
#print "blob length=",bl[Ak],"blob width=",bw[Ak]
#print "blob aspect Ratio=",ar[Ak], "width=",bl[Ak]*ar[Ak]
#print "blobs coordinates=", x[Ak],y[Ak],"b1 angle=",angleset
#crops the individual blobs and saves to disk
Ellipses1.crop(x[Ak],y[Ak],bigL,bigL,centered=True).save("cropped"+str(angleset)+".png")
#reads individual cropped blobs from disk
Ellipses1c=Image("cropped"+str(angleset)+".png")
[L,W]=Ellipses1c.size()
#print ("Ellipse1c Image Length =", L, "Width=",W)
#rotate the individual images so they are horizontal (co-linear with x axis), then saves to disk
Ellipses1c.rotate(angleset,point=(L/2,L/2)).save("rotated_cropped"+str(angleset)+".png")
Ak +=1
Next I rotated the blobs using the blob.angle and saved the images.
for i in range(0,len(r)):
angleset=r[i]
Ellipses2c=Image("rotated_cropped"+str(angleset)+".png")
[L,W]=Ellipses2c.size()
print ("Ellipse2c Image Length =", L, "Width=",W)
blobs2=Ellipses2c.findBlobs()
for b in blobs2:
Ellipses2c.crop(b).save("final_"+FN_prefix+str(angleset)+".png")
This provided a set of images that were suitable for classification. I hope this helps someone.

Related

Perspective Transform using paper

I've got an image from phone camera which have a paper inside it. In the image are also, some coordinates marked to get the distance between them. Since the aspect ratio of paper is known in advance (0.7072135785007072), I want to correct the distortion so that the whole image looks as if it's taken from the top view. I collect the four corners of the paper and apply opencv getPerspectiveTransform as follows:
pts1 = [[ 717., 664.],
[1112., 660.],
[1117., 1239.],
[ 730., 1238.]]
ratio=0.7072135785007072
cardH=math.sqrt((pts1[2][0]-pts1[1][0])*(pts1[2][0]-pts1[1][0])+(pts1[2][1]-pts1[1][1])*(pts1[2][1]-pts1[1][1]))
cardW=ratio*cardH;
pts2 = np.float32([[pts1[0][0],pts1[0][1]], [pts1[0][0]+cardW, pts1[0][1]], [pts1[0][0]+cardW, pts1[0][1]+cardH], [pts1[0][0], pts1[0][1]+cardH]])
M = cv2.getPerspectiveTransform(pts1,pts2)
with this matrix M I'm transforming the whole image as follows:
transformed = np.zeros((image.shape[1], image.shape[0]), dtype=np.uint8);
dst = cv2.warpPerspective(image, M, transformed.shape)
_ = cv2.rectangle(dst, (pts2[0][0], pts2[0][1]), (int(pts2[2][0]), int(pts2[2][1])), (0, 255, 0), 2)
The problem with this is that it's correcting the perspective of paper but distorting the overall image. I don't know why. The input image is this and the corresponding output image is this. In the input image point M and O and aligned horizontally, but to my surprise after transforming the overall image the point M and O are no longer aligned horizontally, why is that happening ?

OpenCv warpPerspective meaning of elements of homography

I have a question regarding the meaning of the elements from an projective transformation matrix e.g. in an homography used by OpenCv warpPerspective.
I know the basic of an affin transformation, but here I'm more interested in the projective transformation, meaning in the below shown matrix the elements A31 and A32:
A11 A12 A13
A21 A22 A23
A31 A32 1
I played around with the values a bit which means having a fixed numbers for all other element. Meaning:
1 0 0
0 1 0
A31 A32 1
to have just the projective elements.
But what exactly causing the elements A31 and A32 ? Like A13 and A23 are responsible for the horizontal and vertical translation.
Is there an simple explanation for this two elements? Like having a positive value means ...., having a negativ value meaning ... . S.th. like that.
Hope anyone can help me.
Newton's descriptions are correct, but it might be helpful to actually see the transformations to understand what's going on, and how they might work together with other values in the transformation matrix to make a bit more sense. I'll give some python/OpenCV examples with animations to show what these values do.
import numpy as np
import cv2
img = cv2.imread('img1.png')
h, w = img.shape[:2]
# initializations
max_m20 = 2e-3
nsteps = 50
M = np.eye(3)
So here I'm setting the transformation matrix to be the identity (no transformation). We want to see the effect of changing the element at (2, 0) in the transformation matrix M, so we'll animate by looping through nsteps linearly spaced between 0 to max_m20.
for m20 in np.linspace(0, max_m20, nsteps):
M[2, 0] = m20
warped = cv2.warpPerspective(img, M, (w, h))
cv2.imshow('warped', warped)
k = cv2.waitKey(1)
if k == ord('q') & 0xFF:
break
I applied this on an image taken from Oxford's Visual Geometry Group.
So indeed, we can see that this is similar to either rotating your camera around a point that is aligned with the left edge of the image, or rotating the image itself around an axis. However, it is a little different than that. Note that the top edge stays along the top the whole time, which is a little strange. Instead of we rotate around an axis like above, we would imagine that the top edge would start to come down on the right edge too. Like this:
Well, if you're thinking about transformations, one easy way to get this transformation is to take the transformation above, and add some skew distortion so that the right top side is being pushed down as that bottom right corner is being pushed up. And that's actually exactly how this view was created:
M = np.eye(3)
max_m20 = 2e-3
max_m10 = 0.6
for m20, m10 in zip(np.linspace(0, max_m20, nsteps), np.linspace(0, max_m10, nsteps)):
M[2, 0] = m20
M[1, 0] = m10
warped = cv2.warpPerspective(img, M, (w, h))
cv2.imshow('warped', warped)
k = cv2.waitKey(1)
if k == ord('q') & 0xFF:
break
So the right way to think about the perspective in these matrices is, IMO, with the skew entries and the last row together. Those are the two places in the homography matrix where angles actually get modified*; otherwise, it's just rotation, scaling, and translation---all of which are angle preserving.
*Note: Actually, angles can be changed in one more way that I didn't mention. Affine transformations allow for non-uniform scaling, which means you can stretch a shape in width and not in height or vice-versa, which would also change the angles. Imagine if you had a triangle and stretched it only in width; the angles would change. So it turns out that non-uniform scaling (i.e. when the first and middle element of the transformation matrix are different values) can also modify angles in addition to the perspective change and shearing distortions.
Note that in these examples, the same applies to the second entry in the last row with the other skew location; the only difference is it happens at the top instead of the left side. Negative values in both cases is akin to rotating the plane along that axis towards, instead of farther away from, the camera.
The 3x1 ,3x2 elements of homography matrix change the plane of the image. Thats the difference between Affine and Homography matrices. For instance consider this- The A31 changes the plane of your image along the left edge. Its like sticking your image to a stick like a flag and rotating. The positive is clock wise and the negative is reverse. The other element does the same from the top edge. But together, they set a plane for your image. That's the simplest way i could put it.

Does image compression reduce actual size of image?

I am performing an SVD image compression- SVD allows you to approximate the actual image matrix by a lower rank matrix of rank v, thus achieving compression(link).
Here's the pseudo-code:
load image_mat % load image as a matrix
[U, S, V] = SVD(image_mat) % U and V are square, S is diagonal (mxn)
set S(v+1:end,:) = 0; set S(:,v+1:end) = 0; % set all values after rank v as zero
new_image = U*S*V';
The problem I am facing is this: Once I perform the lower rank approximation, the old and the new matrix are of the same size (m x n). Both images contain the same number of pixels (since U and V do not change.) Thus, the file size does not (read: CANNOT!) change. However, I see the image quality changing drastically for different values of v.
What am I missing?
EDIT: Further explanation:
Below is the result of the SVD decompression, by rank reduction:
My question is, if the number of pixels in both the pictures remains the same, how would I get a file size reduction (decompression) ? Except the fact that the matrix of singular values (S) is changing in size, everything else pretty much remains the same (despite the obvious drop in image quality), i.e the new matrix constructed after decompression has the same size 512 x 512 as the original image.
You are not missing anything. the original image has mn data while the compressed one has k+km+k*n data where k is the rank.

How to efficiently find and remove 1 pixel bands of image intensity changes?

We're having some visual artifacts on a normal map for a shader because of some bands of single pixels which are very contrast to their surroundings. Just to be clear, edges are not an issue, only these single pixel bands.
Using something like typical Sobel edge detection would not work in this case because on top of such a band, it would detect 0. I can think of other modifications to the kernel which might works such as
-1 -2 -1
2 4 2
-1 -2 -1
but I assumed that there was likely a "correct" mathematical way to do such an operation.
In the end, I want to smooth these lines out using the surrounding pixels (so a selective blur). These lines could appear in any orientation, so if I were to use the above kernel, I would need to apply it in both direction and add it to get the line intensity similar to when applying the Sobel kernel.
I assume that you have lines of 1 pixel width in your image that are brighter or darker than their surroundings and you want to find them and remove them from the image and replace the removed pixels by an average of the local neighborhood.
I developed an algorithm for this and it works on my example data (since you did not give any data). It has two parts:
Identification of lines
I could not think of a simple, yet effective filter to detect lines (which are connected, so one would probably need to look at correlations). So I used a simple single pixel detection filter:
-1 -1 -1
-1 8 -1
-1 -1 -1
and then some suitable thresholding.
Extrapolation of data from outside of a mask to the mask
A very elegant solution (using only convolutions) is to take the data outside the mask and convolve it with a gaussian, then take negative mask and convolve it with the very same gaussian, then divide both pixelwise. The result within the mask is the desired blurring.
What it is mathematically: a weighted averaging of the data.
Here is my phantom data:
And this is the identification of the lines
And the final result shows that the distortion has been suppressed tenfold:
And finally my code (in Matlab):
%% create phantom data with lines (1pixel wide bands)
[x, y] = ndgrid(1:100, 1:100);
original = 3 * x - 2 * y + 100 * sin(x / 2) + 120 * cos(y / 3); % funny shapes
bw = original > mean(original(:)); % black and white
distortion = bwmorph(bw,'remove'); % some lines
data = original + max(original(:)) * distortion; % phantom
% show
figure();
subplot(1,3,1); imagesc(original); axis image; colormap(hot); title('original');
subplot(1,3,2); imagesc(distortion); axis image; title('distortion');
subplot(1,3,3); imagesc(data); axis image; title('image');
%% line detection
% filter by single pixel filter
pixel_filtered = filter2([-1,-1,-1;-1,8,-1;-1,-1,-1], data);
% create mask by simple thresholding
mask = pixel_filtered > 0.2 * max(pixel_filtered(:));
% show
figure();
subplot(1,2,1); imagesc(pixel_filtered); axis image; colormap(hot); title('filtered');
subplot(1,2,2); imagesc(mask); axis image; title('mask');
%% line removal and interpolation
% smoothing kernel: gaussian
smooth_kernel = fspecial('gaussian', [3, 3], 1);
smooth_kernel = smooth_kernel ./ sum(smooth_kernel(:)); % normalize to one
% smooth image outside mask and divide by smoothed negative mask
smoothed = filter2(smooth_kernel, data .* ~mask) ./ filter2(smooth_kernel, ~mask);
% withing mask set data to smoothed
reconstruction = data .* ~mask + smoothed .* mask;
% show
figure();
subplot(1,3,1); imagesc(reconstruction); axis image; colormap(hot); title('reconstruction');
subplot(1,3,2); imagesc(original); axis image; title('original');
subplot(1,3,3); imagesc(reconstruction - original); axis image; title('difference');

How to identify different objects in an image?

I'm intending to write a program to detect and differentiate certain objects from a nearly solid background. The foreground and the background have a high contrast difference which I would further increase to aid in the object identification process. I'm planning to use Hough transform technique and OpenCV.
Sample image
As seen in the above image, I would want to separately identify the circular objects and the square objects (or any other shape out of a finite set of shapes). Since I'm quite new to image processing I do not have an idea whether such a situation needs a neural network to be implemented and each shape to be learned beforehand. Would a technique such as template matching let me do this without a neural network?
These posts will get you started:
How to detect circles
How to detect squares
How to detect a sheet of paper (advanced square detection)
You will probably have to adjust some parameters in these codes to match your circles/squares, but the core of the technique is shown on these examples.
If you intend to detect shapes other than just circles, (and from the image I assume you do), I would recommend the Chamfer matching for a quick start, especially as you have a good contrast.
The basic premise, explained in simple terms, is following:
You do an edge detection (for example, cvCanny in opencv)
You create a distance image, where the value of each pixel means the distance fom the nearest edge.
You take the shapes you would like to detect, define sample points along the edges of the shape, and try to match these points on the distance image. Basically you just add the values on the distance image which are "under" the coordinates of your sample points, given a specific position of your objects.
Find a good minimization algorithm, the effectiveness of this depends on your application.
This basic approach is a general solution, usually works well, but without further advancements, it is very slow.
Usually it's a good idea to first separate the objects of interest, so you don't have to always do the full search on the whole image. Find a good threshold, so you can separate objects. You still don't know which object it is, but you only have to do the matching itself in close proximity of this object.
Another good idea is, instead of doing the full search on the high resolution image, first do it on a very low resolution. The result will not be very accurate, but you can know the general areas where it's worth to do a search on a higher resolution, so you don't waste your time on areas where there is nothing of interest.
There are a number of more advanced techniques, but it's still worth to take a look at the basic chamfer matching, as it is the base of a large number of techniques.
With the assumption that the objects are simple shapes, here's an approach using thresholding + contour approximation. Contour approximation is based on the assumption that a curve can be approximated by a series of short line segments which can be used to determine the shape of a contour. For instance, a triangle has three vertices, a square/rectangle has four vertices, a pentagon has five vertices, and so on.
Obtain binary image. We load the image, convert to grayscale, Gaussian blur, then adaptive threshold to obtain a binary image.
Detect shapes. Find contours and identify the shape of each contour using contour approximation filtering. This can be done using arcLength to compute the perimeter of the contour and approxPolyDP to obtain the actual contour approximation.
Input image
Detected objects highlighted in green
Labeled contours
Code
import cv2
def detect_shape(c):
# Compute perimeter of contour and perform contour approximation
shape = ""
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.04 * peri, True)
# Triangle
if len(approx) == 3:
shape = "triangle"
# Square or rectangle
elif len(approx) == 4:
(x, y, w, h) = cv2.boundingRect(approx)
ar = w / float(h)
# A square will have an aspect ratio that is approximately
# equal to one, otherwise, the shape is a rectangle
shape = "square" if ar >= 0.95 and ar <= 1.05 else "rectangle"
# Star
elif len(approx) == 10:
shape = "star"
# Otherwise assume as circle or oval
else:
shape = "circle"
return shape
# Load image, grayscale, Gaussian blur, and adaptive threshold
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,31,3)
# Find contours and detect shape
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
# Identify shape
shape = detect_shape(c)
# Find centroid and label shape name
M = cv2.moments(c)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
cv2.putText(image, shape, (cX - 20, cY), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36,255,12), 2)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()

Resources