Is there any tool to extract all comic strips from comic page? - image-processing

I have comic page images like
Link to image
And I want to extract all bordered comic strips from it as an individual image.
I don't intend to do it manually. I need some automatic tool for it.

I don't know any tool but with this script you should be able to do it:
Extracted image example
import cv2
import numpy as np
import imutils
img = "comic.jpg"
image = cv2.imread(img)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# blur
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
# threshold it
(T, threshInv) = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# find contours
cnts, cnts_hierarchy = cv2.findContours(threshInv.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
clone = image.copy()
cnts = sorted(cnts, key=cv2.contourArea, reverse=True) # order contours by area
for i,c in enumerate(cnts):
(x, y, w, h) = cv2.boundingRect(c)
area = cv2.contourArea(c)
extent = area / float(w * h)
crWidth = w / float(image.shape[1]) # width ratio of contour to image width
crHeight = h / float(image.shape[0]) # height ratio of contour to image height
# check if it's noise or a comic strip, change if necessary
if crWidth > 0.15 or crHeight > 0.15 or extent > 0.8:
# rotated bounding box
box = cv2.minAreaRect(c)
box = np.int0(cv2.cv.BoxPoints(box) if imutils.is_cv2() else cv2.boxPoints(box)) # gives us a contour
warped = imutils.perspective.four_point_transform(clone, box.reshape(4, 2))
cv2.imwrite(f'./image_{i}.png', warped)
else:
break

Related

OpenCV: How to remove the unwanted parts in an image

I am trying to get the outline of the blue area in an image and then calculate the length and area, as shown in the picture (I have many similar images with the same resolution but different size of the blue areas).
Here is the code I am using:
import cv2
import numpy as np
# read image as grayscale
img = cv2.imread('VF2.jpg', cv2.IMREAD_GRAYSCALE)
# threshold to binary
thresh = cv2.threshold(img, 210, 255, cv2.THRESH_BINARY)[1] # the 2nd parameter should be changed.
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
# find contours - write black over all small contours
letter = morph.copy()
cntrs = cv2.findContours(morph, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# cntrs = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
# cntrs = cntrs[0]
for c in cntrs:
area = cv2.contourArea(c)
print(area)
if area < 100:
cv2.drawContours(letter,[c],0,(0,0,0),-1)
# do canny edge detection
edges = cv2.Canny(letter, 200, 200) # the result for edges is good.
length = cv2.arcLength(cntrs[0], False) # not closed curves
print('length = ',length) # both length and area need calibration
area = cv2.contourArea(cntrs[0])
print('area = ',area)
# Outputs
print(np.squeeze(cntrs[0]), '\n') # Contour
print('Contour points:', cntrs[0].shape[0], '\n')
print('arcLength:', cv2.arcLength(cntrs[0], True)) # closed curves
# write results
# cv2.imwrite("K_thresh.png", thresh)
# show results
# cv2.imshow("K_thresh", thresh)
# cv2.imshow("K_morph", morph)
cv2.imshow("K_letter", letter)
cv2.imshow("K_edges", edges)
cv2.waitKey(0)
cv2.destroyAllWindows()
I used the above code and obtained the outline but with some additional parts, as highlighted in the following image. Can any one help to delete the additional parts and make the outline closed? Thanks a lot.
Change the size of your kernel to (4, 4) and perform erosion instead of open, here:
import cv2
import numpy as np
img = cv2.imread("images/flower.jpg", cv2.IMREAD_GRAYSCALE)
scale_percent = 60 # percent of original size
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
# resize image
resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
# threshold to binary
thresh = cv2.threshold(resized, 210, 255, cv2.THRESH_BINARY_INV)[1] # the 2nd parameter should be changed.
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4,4))
morph = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel, 1)
letter = morph.copy()
cntrs = cv2.findContours(morph, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# cntrs = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
# cntrs = cntrs[0]
for c in cntrs:
area = cv2.contourArea(c)
print(area)
if area < 100:
cv2.drawContours(letter,[c],0,(0,0,0),-1)
# do canny edge detection
edges = cv2.Canny(letter, 200, 200) # the result for edges is good.
length = cv2.arcLength(cntrs[0], False) # not closed curves
print('length = ',length) # both length and area need calibration
area = cv2.contourArea(cntrs[0])
print('area = ',area)
# Outputs
print(np.squeeze(cntrs[0]), '\n') # Contour
print('Contour points:', cntrs[0].shape[0], '\n')
print('arcLength:', cv2.arcLength(cntrs[0], True)) # closed curves
# write results
# cv2.imwrite("K_thresh.png", thresh)
# show results
# cv2.imshow("K_thresh", thresh)
# cv2.imshow("K_morph", morph)
cv2.imshow("K_letter", letter)
cv2.imshow("K_edges", edges)
cv2.waitKey(0)
cv2.destroyAllWindows()

I want to detect all the underlined words in a paragraph

Original Image
Click here for the image
For this, I am trying to detect the underlines first. But as the underlines might be tilted, this code:
import time
from google.colab.patches import cv2_imshow
from collections import OrderedDict
# Let's load a simple image with 3 black squares
image = cv2.imread("line_detected.png")
cv2.waitKey(0)
# Grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Find Canny edges
font = cv2.FONT_HERSHEY_COMPLEX
edged = cv2.Canny(gray, 30, 200)
cv2.waitKey(0)
# Finding Contours
# Use a copy of the image e.g. edged.copy()
# since findContours alters the image
contours, hierarchy = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2_imshow(edged)
cv2.waitKey(0)
print("Number of Contours found = " + str(len(contours)))
# Draw all contours
# -1 signifies drawing all contours
# cv2.drawContours(image, contours, -1, (0, 255, 0), 3)
mask = np.ones(image.shape[:2], dtype="uint8") * 255
d=OrderedDict()
coords=[]
nuclei = []
l=[]
heading=[]
images=[]
lvalue=0
line=[]
h=[]
contours = contours[::-1]
for cnt in (contours):
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
if (len(approx==2)):
x, y, w, h = cv2.boundingRect(cnt)
# print(h)
cv2.rectangle(img,(x, y), (x+w, y+h),(0, 0, 255), 2)
cv2_imshow(img)
is not able to detect the slanting underlines very properly. Also, I want this code to extend to detecting only the gray underlines. "minor differences" has a single underline as it is slanted/tilted, it reads it as two straight lines. Also, it is reading the images in the left which it should not read(tesseract giving weird outputs).
For the gray shade only I found this mask thing online:
lower_range = np.array([110,50,50])
upper_range = np.array([130,255,255])
mask = cv2.inRange(hsv, lower_range, upper_range)
But Don't know how to incorporate in code... I'm a beginner, any help is much appreciated!

How to detect computer/laptop's screen in an image?

I am trying to detect the screen of the monitor either computer or laptop.
Original image
Points of the image
First picture is the original image, second image marked with points of the rectangular area I want to detect in an image.
I have tried to get the screen by using cv2.findContours with opencv by this tutorial but it didn't help. In tutorial he had the frontal picture of the screen but I have angular images mostly(videos) so it crashes to grab good contours and define the screen.
Code I used to find the screen:
import numpy as np
import imutils
import cv2
from PIL import Image
args = {
'query': '/Users/PC/Desktop/screendetect/mm.jpeg'
}
class dotdict(dict):
def __getattr__(self, name):
return self[name]
args = dotdict(args)
# load the query image, compute the ratio of the old height
# to the new height, clone it, and resize it
image = cv2.imread(args["query"])
ratio = image.shape[0] / 300.0
orig = image.copy()
image = imutils.resize(image, height = 300)
# convert the image to grayscale, blur it, and find edges
# in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
# find contours
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:100]
screenCnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.03 * peri, True)
# if our approximated contour has four points, then
# we can assume that we have found our screen
if len(approx):
screenCnt = approx
break
# draw a rectangle around the screen
orig = image.copy()
d = cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 1)
cv2.imwrite("/Users/PC/Desktop/screendetect/test_good.jpg", d)
# cv2.waitKey(0)
In other test images there will be only one monitor so I need to find only one screen. What is the best approach to tackle this kind of problem?

Segmenting products on the shelf

I am trying to detect edges from the products on a shelf using histogram projections. But I am stuck at 2 levels.
The challenges that I m facing are:
How to get the longest non shelf segment from the image i.e Detect the width of the widest product on the shelf from the available one.
How to achieve morphological reconstruction using custom markers.To eliminate
all small horizontal segments, I am generating 2 markers which can be seen in 'markers.png' (Attached). With them, I am calculating the minimum of the reconstruction outputs from both the markers.
Need assistance on this.
Thanks a lot
Below is my python code for the same.
Below is my python code
********************************************************************************
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
import math
# Read the input image
img = cv.imread('C:\\Users\\672059\\Desktop\\p2.png')
# Converting from BGR to RGB. Default is BGR.
# img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
# Resize the image to 150,150
img_resize = cv.resize(img, (150, 150))
# Get the dimensions of the image
img_h, img_w, img_c = img_resize.shape
# Split the image on channels
red = img[:, :, 0]
green = img[:, :, 1]
blue = img[:, :, 2]
# Defining a vse for erosion
vse = np.ones((img_h, img_w), dtype=np.uint8)
# Morphological Erosion for red channel
red_erode = cv.erode(red, vse, iterations=1)
grad_red = cv.subtract(red, red_erode)
# Morphological Erosion for green channel
green_erode = cv.erode(green, vse, iterations=1)
grad_green = cv.subtract(green, green_erode)
# Morphological Erosion for blue channel
blue_erode = cv.erode(blue, vse, iterations=1)
grad_blue = cv.subtract(blue, blue_erode)
# Stacking the individual channels into one processed image
grad = [grad_red, grad_green, grad_blue]
retrieved_img = np.stack(grad, axis=-1)
retrieved_img = retrieved_img.astype(np.uint8)
retrieved_img_gray = cv.cvtColor(retrieved_img, cv.COLOR_RGB2GRAY)
plt.title('Figure 1')
plt.imshow(cv.bitwise_not(retrieved_img_gray), cmap=plt.get_cmap('gray'))
plt.show()
# Hough Transform of the image to get the longest non shelf boundary from the image!
edges = cv.Canny(retrieved_img_gray, 127, 255)
minLineLength = img_w
maxLineGap = 10
lines = cv.HoughLinesP(edges, 1, np.pi/180, 127, minLineLength=1, maxLineGap=1)
temp = img.copy()
l = []
for x in range(0, len(lines)):
for x1, y1, x2, y2 in lines[x]:
cv.line(temp, (x1, y1), (x2, y2), (0, 255, 0), 2)
d = math.sqrt((x2-x1)**2 + (y2-y1)**2)
l.append(d)
# Defining a hse for erosion
hse = np.ones((1, 7), dtype=np.uint8)
opening = cv.morphologyEx(retrieved_img_gray, cv.MORPH_OPEN, hse)
plt.title('Figure 2')
plt.subplot(1, 2, 1), plt.imshow(img)
plt.subplot(1, 2, 2), plt.imshow(cv.bitwise_not(opening), 'gray')
plt.show()
# Dilation with disk shaped structuring element
horizontal_size = 7
horizontalstructure = cv.getStructuringElement(cv.MORPH_ELLIPSE, (horizontal_size, 1))
dilation = cv.dilate(opening, horizontalstructure)
plt.title('Figure 3')
plt.imshow(cv.bitwise_not(dilation), 'gray')
plt.show()
# Doing canny edge on dilated image
edge = cv.Canny(dilation, 127, 255)
plt.title('Figure 4')
plt.imshow(edges, cmap='gray')
plt.show()
h_projection = edge.sum(axis=1)
print(h_projection)
plt.title('Projection')
plt.plot(h_projection)
plt.show()
listing = []
for i in range(1, len(h_projection)-1):
if h_projection[i-1] == 0 and h_projection[i] == 0:
listing.append(dilation[i])
listing.append(dilation[i-1])
a = np.array([np.array(b) for b in l])
h = len(l)
_, contours, _ = cv.findContours(a, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
x, y, w, h = cv.boundingRect(contours[0])
y = y + i - h
cv.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
l.clear()
plt.imshow(img)
plt.show()
# Generating a mask
black_bg = np.ones([img_h, img_w], dtype=np.uint8)
# Clone the black bgd image
left = black_bg.copy()
right = black_bg.copy()
# Taking 10% of the image width
ten = int(0.1 * img_w)
left[:, 0:ten+1] = 0
right[:, img_w-ten:img_w+1] = 0
plt.title('Figure 4')
plt.subplot(121), plt.imshow(left, 'gray')
plt.subplot(122), plt.imshow(right, 'gray')
plt.show()
# Marker = left and right. Mask = dilation
mask = dilation
marker_left = left
marker_right = right
********************************************************************************
markers.png link: https://i.stack.imgur.com/45WJ6.png
********************************************************************************
Based on you input image, I would :
take a picture of an empty fridge
then compare the current image with the empty one.
play with morphological operations
get connected components > size N
If you can't take a empty fridge image:
segment the shelves (threshold white parts)
undo do the rotation of the image by using image moments of the shelves
for each shelve:
Threshold on saturation
Do a vertical projection
Count maxima.
Tresholded:
Erode-dilate:
Connected componens (width > 10 * height + > minsize):
And you have shelves.
Now take the average Y form each shelf and cut the original image in pieces:
Dither to 8 colors:
and threshold:
Connected components (h>1.5*w, minsize... this is hard here, I played with it :)

How to detect test strips with OpenCV?

I'm a newbie to computer vision, and I'm trying to detect all the test strips in this image:
The result I'm trying to get:
I assume it should be very easy, because all the target objects are in rectangular shape and have a fixed aspect ratio. But I have no idea which algorithm or function should I use.
I've tried edge detection and the 2D feature detection example in OpenCV, but the result is not ideal. How should I detect these similar objects but with small differences?
Update:
The test strips can vary in colors, and of course, the shade of the result lines. But they all have the same references lines, as showing in the picture:
I don't know how should I describe these simple features for object detection, as most examples I found online are for complex objects like a building or a face.
The solution is not exact, but it provides a good starting point. You have to play with the parameters though. It would greatly help you if you partition the strips using some threshold method and then apply hough lines individually as #api55 mentioned.
Here are the results I got.
Code.
import cv2
import numpy as np
# read image
img = cv2.imread('KbxN6.jpg')
# filter it
img = cv2.GaussianBlur(img, (11, 11), 0)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# get edges using laplacian
laplacian_val = cv2.Laplacian(gray_img, cv2.CV_32F)
# lap_img = np.zeros_like(laplacian_val, dtype=np.float32)
# cv2.normalize(laplacian_val, lap_img, 1, 255, cv2.NORM_MINMAX)
# cv2.imwrite('laplacian_val.jpg', lap_img)
# apply threshold to edges
ret, laplacian_th = cv2.threshold(laplacian_val, thresh=2, maxval=255, type=cv2.THRESH_BINARY)
# filter out salt and pepper noise
laplacian_med = cv2.medianBlur(laplacian_th, 5)
# cv2.imwrite('laplacian_blur.jpg', laplacian_med)
laplacian_fin = np.array(laplacian_med, dtype=np.uint8)
# get lines in the filtered laplacian using Hough lines
lines = cv2.HoughLines(laplacian_fin,1,np.pi/180,480)
for rho,theta in lines[0]:
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 1000*(-b))
y1 = int(y0 + 1000*(a))
x2 = int(x0 - 1000*(-b))
y2 = int(y0 - 1000*(a))
# overlay line on original image
cv2.line(img,(x1,y1),(x2,y2),(0,255,0),2)
# cv2.imwrite('processed.jpg', img)
# cv2.imshow('Window', img)
# cv2.waitKey(0)
This is an alternative solution by using the function findCountours in combination with canny edge detection. The code is based very slightly on this tutorial
import cv2
import numpy as np
import imutils
image = cv2.imread('test.jpg')
resized = imutils.resize(image, width=300)
ratio = image.shape[0] / float(resized.shape[0])
# convert the resized image to grayscale, blur it slightly,
# and threshold it
gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(resized,100,200)
cv2.imshow('dsd2', edges)
cv2.waitKey(0)
cnts = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
sd = ShapeDetector()
# loop over the contours
for c in cnts:
# compute the center of the contour, then detect the name of the
# shape using only the contour
M = cv2.moments(c)
cX = int((M["m10"] / M["m00"]) * ratio)
cY = int((M["m01"] / M["m00"]) * ratio)
# multiply the contour (x, y)-coordinates by the resize ratio,
# then draw the contours and the name of the shape on the image
c = c.astype("float")
c *= ratio
c = c.astype("int")
cv2.drawContours(image, [c], -1, (0, 255, 0), 2)
#show the output image
#cv2.imshow("Image", image)
#cv2.waitKey(0)
cv2.imwrite("erg.jpg",image)
Result:
I guess it can be improved by tuning following parameters:
image resizing width
CHAIN_APPROX_NONE (findContour Docs)
It is maybe also usefull to filter small contours or merge contours which are close to each other.

Resources