[OpenCV]how to fix contours to rectangle? - opencv

Note
I'm new to OpenCV(or computer vision), so it would be very helpful just to tell me the search query!
What I want to ask
I want to write a program that extract the business cards from pictures.
I was able to extract a rough outline, but reflected light becomes noise and I can't extract an accurate outline. Please tell me your idea.
image(raw data)
raw data
output
output data(rough outline)
code
import math
import itertools
from glob import glob
import cv2
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
def read_images():
"read image data from data directory"
names = glob('data/*.jpg')
names.sort()
return map(lambda name: cv2.imread(name), names)
def blur(img):
"apply blur"
return cv2.GaussianBlur(img, (25, 25), 0)
def show_images(images, column, color_type=cv2.COLOR_BGR2RGB):
"plot images with matplotlib"
plt.figure(figsize=(10,10), dpi=150)
for n, img in zip(range(len(images)), images):
p = plt.subplot(math.ceil(len(images) / column), column, n + 1)
p.axis('off')
if color_type is None:
p.imshow(img)
else:
p.imshow(cv2.cvtColor(img, color_type))
plt.show()
def detect_background_color(img):
"detect background color"
# Assume that the perimeter is all background
height, width, *_ = img.shape
background_colors = np.concatenate([
img[5:height-5, 5], img[5, 5:width-5],
img[5:height-5, width-5], img[height-5, 5:width-5]
])
background_colors = background_colors.astype(np.float32)
# Assume that the background color is only one.
K = 2
iter_flg = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER
_, labels, centers = cv2.kmeans(
background_colors, K, None, (iter_flg, 10, 1.0), 10,
cv2.KMEANS_RANDOM_CENTERS)
cnt1 = len(labels[labels==0])
cnt2 = len(labels[labels==1])
return centers[0] if cnt1 > cnt2 else centers[1]
def scale(img):
bg = detect_background_color(img)
return np.fix(np.sqrt(np.sum(np.square(img - bg), axis=2)) / 1.732).astype(np.uint8)
def binarize(img):
th, bit = cv2.threshold(img, 40, 255, cv2.THRESH_BINARY)
return bit
binarized = [binarize(scale(blur(img))) for img in read_images()]
show_images(binarized, 4, None)

Looks like you need to apply morphology try cv2.erode and then cv2 dilate operations.
The first will remove regions smaller than erode kernel size, the second will restore initial size of large blob. You need to apply the same size kernels for both operations.
morphology.
Check also this: medium article

Related

Why isn't RandomCrop inserting the padding in pytorch?

I am getting that RandomCrop isn't putting the padding when I create my images. Why is it?
Reproducible script 1
todo with cifar...
Reproducible script 2:
code:
def check_size_of_mini_imagenet_original_img():
import random
import numpy as np
import torch
import os
seed = 0
os.environ["PYTHONHASHSEED"] = str(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)
import learn2learn
batch_size = 5
kwargs: dict = dict(name='mini-imagenet', train_ways=2, train_samples=2, test_ways=2, test_samples=2)
kwargs['data_augmentation'] = 'lee2019'
benchmark: learn2learn.BenchmarkTasksets = learn2learn.vision.benchmarks.get_tasksets(**kwargs)
tasksets = [(split, getattr(benchmark, split)) for split in splits]
for i, (split, taskset) in enumerate(tasksets):
print(f'{taskset=}')
print(f'{taskset.dataset.dataset.transform=}')
for task_num in range(batch_size):
X, y = taskset.sample()
print(f'{X.size()=}')
assert X.size(2) == 84
print(f'{y.size()=}')
print(f'{y=}')
for img_idx in range(X.size(0)):
visualize_pytorch_tensor_img(X[img_idx], show_img_now=True)
if img_idx >= 5: # print 5 images only
break
# visualize_pytorch_batch_of_imgs(X, show_img_now=True)
print()
if task_num >= 4: # so to get a MI image finally (note omniglot does not have padding at train...oops!)
break
break
break
and
def visualize_pytorch_tensor_img(tensor_image: torch.Tensor, show_img_now: bool = False):
"""
Due to channel orders not agreeing in pt and matplot lib.
Given a Tensor representing the image, use .permute() to put the channels as the last dimension:
ref: https://stackoverflow.com/questions/53623472/how-do-i-display-a-single-image-in-pytorch
"""
from matplotlib import pyplot as plt
assert len(tensor_image.size()) == 3, f'Err your tensor is the wrong shape {tensor_image.size()=}' \
f'likely it should have been a single tensor with 3 channels' \
f'i.e. CHW.'
if tensor_image.size(0) == 3: # three chanels
plt.imshow(tensor_image.permute(1, 2, 0))
else:
plt.imshow(tensor_image)
if show_img_now:
plt.tight_layout()
plt.show()
images here: https://github.com/learnables/learn2learn/issues/376#issuecomment-1319368831
first one:
I am getting odd images despite printing the transform the data is using:
-- splits[i]='train'
taskset=<learn2learn.data.task_dataset.TaskDataset object at 0x7fbc38345880>
taskset.dataset.dataset.datasets[0].dataset.transform=Compose(
ToPILImage()
RandomCrop(size=(84, 84), padding=8)
ColorJitter(brightness=[0.6, 1.4], contrast=[0.6, 1.4], saturation=[0.6, 1.4], hue=None)
RandomHorizontalFlip(p=0.5)
ToTensor()
Normalize(mean=[0.47214064400000005, 0.45330829125490196, 0.4099612805098039], std=[0.2771838538039216, 0.26775040952941176, 0.28449041290196075])
)
but the padding is missing:
but when I use this instead:
train_data_transform = Compose([
RandomResizedCrop((size - padding*2, size - padding*2), scale=scale, ratio=ratio),
Pad(padding=padding),
ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
RandomHorizontalFlip(),
ToTensor(),
Normalize(mean=mean, std=std),
])
it seems to work:
why don't both have the 8 and 8 padding on both sides I expect?
I tried seeing the images with mini-imagenet for torch-meta and it also didn't seem the padding was there:
task_num=0
Compose(
RandomCrop(size=(84, 84), padding=8)
RandomHorizontalFlip(p=0.5)
ColorJitter(brightness=[0.6, 1.4], contrast=[0.6, 1.4], saturation=[0.6, 1.4], hue=[-0.2, 0.2])
ToTensor()
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)
X.size()=torch.Size([25, 3, 84, 84])
The code is much harder to make compact and reproducible but you can see my torchmeta_plot_images_is_the_padding_there ultimate-utils library.
For now since 2 data sets say that padding is not being inserted despite the transform saying it should be I am concluding there is a bug in pytorch or my pytorch version or I just don't understand RandomCrop. But the description is clear to me:
padding (int or sequence, optional) –
Optional padding on each border of the image. Default is None. If a single int is provided this is used to pad all borders.
and the normal padding Pad(...) says something very similar:
padding (int or sequence) –
Padding on each border. If a single int is provided this is used to pad all borders.
so what else could go wrong? The bottom img I provided with a pad is done with the above Pad() function not with RandomCrop.
cross:
gitissues: https://github.com/learnables/learn2learn/issues/376
pytorch forum: https://discuss.pytorch.org/t/why-isnt-randomcrop-inserting-the-padding-in-pytorch/166244
They are padded to 84+8 then cropped back to 84: you can see the black padding on each image (eg, on the left for the 2nd image).
I discovered & confirmed that by doing it on cifar. But note this NOT what the docs say for RandomCrop:
Optional padding on each border of the image. Default is None. If a single int is provided this is used to pad all borders.
it says something very similar to pad:
Padding on each border. If a single int is provided this is used to pad all borders.
See: https://github.com/learnables/learn2learn/issues/376#issuecomment-1319405466
I am going to report this to pytorch as a bug https://github.com/pytorch/pytorch/issues/89253. Reproducible code in cifar:
def check_padding_random_crop_cifar_pure_torch():
# -
import sys
print(f'python version: {sys.version=}')
import torch
print(f'{torch.__version__=}')
# -
from uutils.plot.image_visualization import visualize_pytorch_tensor_img
from torchvision.transforms import RandomCrop
# - for determinism
import random
random.seed(0)
import torch
torch.manual_seed(0)
import numpy as np
np.random.seed(0)
# -
from pathlib import Path
root = Path('~/data/').expanduser()
import torch
import torchvision
# - test tensor imgs
from torchvision.transforms import Resize
from torchvision.transforms import Pad
from torchvision.transforms import ToTensor
from torchvision.transforms import Compose
# -- see if pad doubles length
print(f'--- test padding doubles length with Pad(...)')
transform = Compose([Resize((32, 32)), Pad(padding=4), ToTensor()])
train = torchvision.datasets.CIFAR100(root=root, train=True, download=True,
transform=transform,
target_transform=lambda data: torch.tensor(data, dtype=torch.long))
transform = Compose([Resize((32, 32)), Pad(padding=8), ToTensor()])
test = torchvision.datasets.CIFAR100(root=root, train=True, download=True,
transform=transform,
target_transform=lambda data: torch.tensor(data, dtype=torch.long))
# - test padding doubles length
from torch.utils.data import DataLoader
loader = DataLoader(train)
x, y = next(iter(loader))
print(f'{x[0].size()=}')
assert x[0].size(2) == 32 + 4 * 2
assert x[0].size(2) == 32 + 8
visualize_pytorch_tensor_img(x[0], show_img_now=True)
#
loader = DataLoader(test)
x, y = next(iter(loader))
print(f'{x[0].size()=}')
assert x.size(2) == 32 + 8 * 2
assert x.size(2) == 32 + 16
visualize_pytorch_tensor_img(x[0], show_img_now=True)
# -- see if RandomCrop also puts the pad
print(f'--- test RandomCrop indeed puts padding')
transform = Compose([Resize((32, 32)), RandomCrop(28, padding=8), ToTensor()])
train = torchvision.datasets.CIFAR100(root=root, train=True, download=True,
transform=transform,
target_transform=lambda data: torch.tensor(data, dtype=torch.long))
transform = Compose([Resize((32, 32)), RandomCrop(28), ToTensor()])
test = torchvision.datasets.CIFAR100(root=root, train=True, download=True,
transform=transform,
target_transform=lambda data: torch.tensor(data, dtype=torch.long))
# - test that the padding is there visually
from torch.utils.data import DataLoader
loader = DataLoader(train)
x, y = next(iter(loader))
print(f'{x[0].size()=}')
assert x[0].size(2) == 28
visualize_pytorch_tensor_img(x[0], show_img_now=True)
#
loader = DataLoader(test)
x, y = next(iter(loader))
print(f'{x[0].size()=}')
assert x.size(2) == 28
visualize_pytorch_tensor_img(x[0], show_img_now=True

Richardson-Lucy for bitmap image

I am new to Python and am trying to modify an existing Richardson-Lucy program for an image that I have.
Specifically, I have a bitmap image 'flower2.bmp' that I am using to test the following program:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageFilter
from scipy.signal import convolve2d as conv2
from skimage import color, data, restoration
Image.open('flower2.bmp').convert('L').save('flower2_gray.bmp')
astro = Image.open('flower2_gray.bmp')
psf = np.ones((5, 5)) / 25
astro = conv2(astro, psf, 'same')
# Add Noise to Image
astro_noisy = astro.copy()
astro_noisy += (np.random.poisson(lam=25, size=astro.shape) - 10) / 255.
# Restore Image using Richardson-Lucy algorithm
deconvolved_RL = restoration.richardson_lucy(astro_noisy, psf, iterations=100)
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(8, 5))
plt.gray()
for a in (ax[0], ax[1], ax[2]):
a.axis('off')
ax[0].imshow(astro)
ax[0].set_title('Original Data')
ax[1].imshow(astro_noisy)
ax[1].set_title('Noisy data')
ax[2].imshow(deconvolved_RL, vmin=astro_noisy.min(), vmax=astro_noisy.max())
ax[2].set_title('Restoration using\nRichardson-Lucy')
fig.subplots_adjust(wspace=0.02, hspace=0.2,
top=0.9, bottom=0.05, left=0, right=1)
plt.show()
Below is the image output. I would appreciate help understanding why the Restoration image is black.
Thank you.
The restoration.richardson_lucy documentation notes that the function has an optional "clip" argument, which is true by default:
clip : boolean, optional
True by default. If true, pixel value of the result above 1 or under -1 are thresholded for skimage pipeline compatibility.
However, the astro image read from Image.open has nominal range [0, 255]. I'm guessing that all pixels in astro are >= 1, so that the result after clipping to [-1, 1] is simply a constant image of ones. This could explain the plot.
The fix is to divide astro_noisy by 255 before passing it to restoration.richardson_lucy.

How to split image of table at vertical lines into three images?

I want to split an image of a table at the vertical lines into three images as shown below. Is it possible? The width of each column is variable. And the sad thing is that the left vertical line is drawn down from the header as you can see.
Input image (input.png)
Output image (output1.png)
Output image (output2.png)
Output image (output3.png)
Update 1
And the sad thing is that the left vertical line is drawn down from the header as you can see.
It means I guess the following image B is easier to split. But my case is A.
Update 2
I am trying to do the way #HansHirse gave me. My expectation is sub_image_1.png, sub_image_2.png and sub_image_3.png are stored in the out folder. But no luck so far. I'm looking into it.
https://github.com/zono/ocr/blob/16fd0ec9a2c7d2e26279ec53947fe7fbab9f526d/src/opencv.py
$ git clone https://github.com/zono/ocr.git
$ cd ocr
$ git checkout 16fd0ec9a2c7d2e26279ec53947fe7fbab9f526d
$ docker-compose up -d
$ docker exec -it ocr /bin/bash
$ python3 opencv.py
Since your table is perfectly aligned, you can inverse binary threshold your image, and count (white) pixels along the y-axis to detect the vertical lines:
You'll need to clean the peaks, since you might get plateaus for the thicker lines.
That'd be my idea in Python OpenCV:
import cv2
import numpy as np
from skimage import io # Only needed for web reading images
# Web read image via scikit-image; convert to OpenCV's BGR color ordering
img = cv2.cvtColor(io.imread('https://i.stack.imgur.com/BTqBs.png'), cv2.COLOR_RGB2BGR)
# Inverse binary threshold grayscale version of image
img_thr = cv2.threshold(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 128, 255, cv2.THRESH_BINARY_INV)[1]
# Count pixels along the y-axis, find peaks
thr_y = 200
y_sum = np.count_nonzero(img_thr, axis=0)
peaks = np.where(y_sum > thr_y)[0]
# Clean peaks
thr_x = 50
temp = np.diff(peaks).squeeze()
idx = np.where(temp > thr_x)[0]
peaks = np.concatenate(([0], peaks[idx+1]), axis=0) + 1
# Save sub-images
for i in np.arange(peaks.shape[0] - 1):
cv2.imwrite('sub_image_' + str(i) + '.png', img[:, peaks[i]:peaks[i+1]])
I get the following three images:
As you can see, you might want to modify the selection by +/- 1 pixel, if an actual line is only 1 pixel wide.
Hope that helps!
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.16299-SP0
Python: 3.8.1
NumPy: 1.18.1
OpenCV: 4.2.0
----------------------------------------
OpenCV has a line detection function:
You can filter the lines that are returned by passing min_theta and max_theta. For vertical lines you can specify maybe : 88 and 92 respectively for margin.
This is a edited sample taken from openCV documentation:
import sys
import math
import cv2 as cv
import numpy as np
def main(argv):
default_file = 'img.png'
filename = argv[0] if len(argv) > 0 else default_file
# Loads an image
src = cv.imread(cv.samples.findFile(filename), cv.IMREAD_GRAYSCALE)
#some preparation of the photo
dst = cv.Canny(src, 50, 200, None, 3)
# Copy edges to the images that will display the results in BGR
cdst = cv.cvtColor(dst, cv.COLOR_GRAY2BGR)
cdstP = np.copy(cdst)
lines = cv.HoughLines(dst, 1, np.pi / 180, 150, None, 88, 92) #min and max theta
You can get the x, y coordinate of the line and draw them by using the following code.
if lines is not None:
for i in range(0, len(lines)):
rho = lines[i][0][0]
theta = lines[i][0][2]
a = math.cos(theta)
b = math.sin(theta)
x0 = a * rho
y0 = b * rho
pt1 = (int(x0 + 1000*(-b)), int(y0 + 1000*(a)))
pt2 = (int(x0 - 1000*(-b)), int(y0 - 1000*(a)))
cv.line(cdst, pt1, pt2, (0,0,255), 3, cv.LINE_AA)
Alternatively you can also use HoughLinesP as this allows you to specify a minimum length, which will help your filtering. Also the lines are returned as x,y pairs for each end making it easier to work with.
linesP = cv.HoughLinesP(dst, 1, np.pi / 180, 50, None, 50, 10)
if linesP is not None:
for i in range(0, len(linesP)):
l = linesP[i][0]
cv.line(cdstP, (l[0], l[2]), (l[2], l[3]), (0,0,255), 3, cv.LINE_AA)
cv.imshow("Source", src)
cv.imshow("Detected Lines (in red) - Standard Hough Line Transform", cdst)
cv.imshow("Detected Lines (in red) - Probabilistic Line Transform", cdstP)
cv.waitKey()
return 0
Documentation
To crop your image you can take the x coordinates of the lines you detected and use numpy slicing.
for i in range(0, len(linesP) - 1):
l = linesP[i][0]
xcoords = l[0], linesP[i+1][0][0]
slice = img[:xcoords[0],xcoords[1]]
cv.imshow('slice', slice)
cv.waitKey(0)

Why am I getting only one channeled-output through the tf.nn.conv2d?

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import imread
img = imread('dog2.jpg')
#img is a shape of (360, 480, 3)
w = img.shape[0]
h = img.shape[1]
c = img.shape[2]
k = 3 # for my convenience
plt.subplot(1,2,1)
plt.imshow(img)
img = tf.cast(img, tf.float32)
img4d = tf.reshape(img,[1,w,h,c])
diag = np.array([[1,1,1],[0,0,0],[1,1,1]]*k, np.float32)
# diag = np.diag(diag)
diag4d = tf.reshape(diag,[k,k,c,1])
convolved = tf.nn.conv2d(img4d, diag4d, strides=[1,1,1,1], padding='SAME')
with tf.Session() as sess:
result = sess.run(convolved)
print result.shape
plt.subplot(1,2,2)
plt.imshow(np.squeeze(result))
plt.show()
I am just trying to use convolution and apply some blur effect initially. Yeah I know that my kernel values aren't right. But my question is, I am giving an input image that has 3 channels. How could I get an output image of 3 channels. Well. I tried. But all I get is some one channeled values alone.
You are passing a kernel of shape [3, 3, 3, 1] to tf.nn.conv2d(). If you want to get a 3-channel image output from your convolution, the fourth dimension of your kernel (called out_channels in the official documentation) should be 3 instead of 1; [3, 3, 3, 3] for example.
You could also take a look at the conv2d documentation, this question and this question to better understand Tensorflow's conv2d method.

Image not segmenting properly using DBSCAN

I am trying to use DBSCAN from scikitlearn to segment an image based on color. The results I'm getting are . As you can see there are 3 clusters. My goal is to separate the buoys in the picture into different clusters. But obviously they are showing up as the same cluster. I've tried a wide range of eps values and min_samples but those two things always cluster together. My code is:
img= cv2.imread("buoy1.jpg)
labimg = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
n = 0
while(n<4):
labimg = cv2.pyrDown(labimg)
n = n+1
feature_image=np.reshape(labimg, [-1, 3])
rows, cols, chs = labimg.shape
db = DBSCAN(eps=5, min_samples=50, metric = 'euclidean',algorithm ='auto')
db.fit(feature_image)
labels = db.labels_
plt.figure(2)
plt.subplot(2, 1, 1)
plt.imshow(img)
plt.axis('off')
plt.subplot(2, 1, 2)
plt.imshow(np.reshape(labels, [rows, cols]))
plt.axis('off')
plt.show()
I assume this is taking the euclidean distance and since its in lab space euclidean distance would be different between different colors. If anyone can give me guidance on this I'd really appreciate it.
Update:
The below answer works. Since DBSCAN requires an array with no more then 2 dimensions I concatenated the columns to the original image and reshaped to produce a n x 5 matrix where n is the x dimension times the y dimension. This seems to work for me.
indices = np.dstack(np.indices(img.shape[:2]))
xycolors = np.concatenate((img, indices), axis=-1)
np.reshape(xycolors, [-1,5])
You need to use both color and position.
Right now, you are using colors only.
Could you please add the enitre code in the answer? Im not able to understand where do I add the those 3 lines which have worked for you – user8306074 Sep 4 at 8:58
Let me answer for you, and here is the full version of the code:
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
img= cv2.imread('your image')
labimg = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
n = 0
while(n<4):
labimg = cv2.pyrDown(labimg)
n = n+1
feature_image=np.reshape(labimg, [-1, 3])
rows, cols, chs = labimg.shape
db = DBSCAN(eps=5, min_samples=50, metric = 'euclidean',algorithm ='auto')
db.fit(feature_image)
labels = db.labels_
indices = np.dstack(np.indices(labimg.shape[:2]))
xycolors = np.concatenate((labimg, indices), axis=-1)
feature_image2 = np.reshape(xycolors, [-1,5])
db.fit(feature_image2)
labels2 = db.labels_
plt.figure(2)
plt.subplot(2, 1, 1)
plt.imshow(img)
plt.axis('off')
# plt.subplot(2, 1, 2)
# plt.imshow(np.reshape(labels, [rows, cols]))
# plt.axis('off')
plt.subplot(2, 1, 2)
plt.imshow(np.reshape(labels2, [rows, cols]))
plt.axis('off')
plt.show()

Resources