Why is width the first dimension in this code? - machine-learning

fig, ax = plt.subplots(figsize = (14, 14))
i = randint(0, len(X_train))
img = X_train[i]
ax.imshow(img, cmap='gray')
ax.set_title('Digit: {}'.format(y_train[i]), fontsize=16)
width, height = img.shape
thresh = img.max()/2.5
for x in range(width):
for y in range(height):
ax.annotate('{:2}'.format(img[x][y]),
xy=(y,x),
horizontalalignment='center',
verticalalignment='center',
color='white' if img[x][y]<thresh else 'black')
This code is from https://github.com/stefan-jansen/machine-learning-for-trading/blob/main/18_convolutional_neural_nets/02_digit_classification_with_lenet5.ipynb
I thought height should be the first dimension and I changed the code into
fig, ax = plt.subplots(figsize = (14, 14))
i = randint(0, len(X_train))
img = X_train[i]
ax.imshow(img, cmap='gray')
ax.set_title('Digit: {}'.format(y_train[i]), fontsize=16)
height, width = img.shape
thresh = img.max()/2.5
for x in range(height):
for y in range(width):
ax.annotate('{:2}'.format(img[x][y]),
xy=(y,x),
horizontalalignment='center',
verticalalignment='center',
color='white' if img[x][y]<thresh else 'black')
They got the identical output, but I still don't understand why the author make the width the first dimension?

Related

How to implementation ONNX file for real time semantic segmentation using Deep Neural Network

i have a problem in my code as shown in this code
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
labels = ['Background', 'Korosi', 'Tanah', 'Tanaman']
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")
net = cv2.dnn.readNetFromONNX('anomali_model1.onnx')
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers)]
capture = cv2.VideoCapture(0)
while True: re, img = capture.read()
#img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
#height, width, channels = img.shape
#blob = cv2.dnn.blobFromImage(img, 0.00392, (256, 256),
#swapRB=True, crop=False)
blob = cv2.dnn.blobFromImage(img, swapRB=True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(classes), 3))
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 2, color, 3)
cv2.imshow("Image",cv2.resize(img, (800,600)))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
cv2.destroyAllWindows()
And i get error like this:
error Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_10644\3517982312.py in <module>
9 blob = cv2.dnn.blobFromImage(img, swapRB=True, crop=False)
10 net.setInput(blob)
---> 11 outs = net.forward(output_layers)
12
13 class_ids = []
error: OpenCV(3.4.17) D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\layers\convolution_layer.cpp:331: error: (-2:Unspecified error) Number of input channels should be multiple of 3 but got 640 in function 'cv::dnn::ConvolutionLayerImpl::getMemoryShapes'
can you help me to solve this problem? because I've looked into various sources but did not find a solution.
library version for this code
python version 3.7
tensorflow version 2.0
opencv version 3.4.17
I hope you all can solve this problem and share with me

How to extract area of interest in the image while the boundary is not obvious

Are there ways to just extract the area of interest (the square light part in the red circle in the original image)? That means I need to get the coordinates of the edge and then masking the image outside the boundaries. I don't know how to do that. Could anyone help? Thanks!
#define horizontal and Vertical sobel kernels
Gx = np.array([[-1, 0, 1],[-2, 0, 2],[-1, 0, 1]])
Gy = np.array([[-1, -2, -1],[0, 0, 0],[1, 2, 1]])
#define kernal convolution function
# with image X and filter F
def convolve(X, F):
# height and width of the image
X_height = X.shape[0]
X_width = X.shape[3]
# height and width of the filter
F_height = F.shape[0]
F_width = F.shape[1]
H = (F_height - 1) // 2
W = (F_width - 1) // 2
#output numpy matrix with height and width
out = np.zeros((X_height, X_width))
#iterate over all the pixel of image X
for i in np.arange(H, X_height-H):
for j in np.arange(W, X_width-W):
sum = 0
#iterate over the filter
for k in np.arange(-H, H+1):
for l in np.arange(-W, W+1):
#get the corresponding value from image and filter
a = X[i+k, j+l]
w = F[H+k, W+l]
sum += (w * a)
out[i,j] = sum
#return convolution
return out
#normalizing the vectors
sob_x = convolve(image, Gx) / 8.0
sob_y = convolve(image, Gy) / 8.0
#calculate the gradient magnitude of vectors
sob_out = np.sqrt(np.power(sob_x, 2) + np.power(sob_y, 2))
# mapping values from 0 to 255
sob_out = (sob_out / np.max(sob_out)) * 255
plt.imshow(sob_out, cmap = 'gray', interpolation = 'bicubic')
plt.show()

Detecting multiple lines in OpenCV HoughLines

I'm using OpenCV 4.4 and running the following code to detect lines of a grid. When I display the image it always detect one line as shown in the screenshot. How can I detect all vertical lines in the grid?
grid = cv2.imread('images/grid.jpeg')
grayscale = cv2.cvtColor(grid, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(grayscale, 50, 150, apertureSize=3)
lines = cv2.HoughLines(edges, 1, np.pi/180, 100)
for rho, theta in lines[0]:
a = np.cos(theta)
b = np.sin(theta)
x0 = a * rho
y0 = b * rho
x1 = int(x0 + 1000 * (-b))
y1 = int(y0 + 1000 * (a))
x2 = int(x0 - 1000 * (-b))
y2 = int(y0 - 1000 * (a))
cv2.line(grid, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.imshow("Lines", grid)
cv2.waitKey(0)
cv2.destroyAllWindows()
Original Image:
You can use lineDetector algorithm.
Find the edges of your image, as #Micka suggested
img = cv2.imread("lines.png")
img_gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_cny = cv2.Canny(img_gry, 50, 200)
Result:
To detect the vertical edges, the difference between x-coordinates should be close to 0 Since only y-coordinates are changing.
if abs(x1 - x2) < 3:
cv2.line(img, pt1=(x1, y1), pt2=(x2, y2), color=(0, 0, 255), thickness=3)
Result:
Code:
import cv2
img = cv2.imread("lines.png")
img_gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_cny = cv2.Canny(img_gry, 50, 200)
lns = cv2.ximgproc.createFastLineDetector().detect(img_cny)
for ln in lns:
x1 = int(ln[0][0])
y1 = int(ln[0][1])
x2 = int(ln[0][2])
y2 = int(ln[0][3])
if abs(x1 - x2) < 3:
cv2.line(img, pt1=(x1, y1), pt2=(x2, y2), color=(0, 0, 255), thickness=3)
cv2.imshow("lns", img)
cv2.waitKey(0)

playing cards detection with custom Yolo with OpenCv. How to know the inputs and outputs from the custom Yolo .cfg file

I want to detect playing cards and found .cfg and .weights for it. Classes has 52cards names. Following code is giving index out of range error. I couldn't understand the outputs of Yolo and how to get the detected labels. I am new to this, have been trying to understand. Can someone please help!
import cv2
import numpy as np
# Load Yolo
net = cv2.dnn.readNet("yolocards_608.weights", "yolocards.cfg")
classes = []
with open("cards.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
# Loading image
img = cv2.imread("playing_cards_image.jpg")
img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
print(out.shape)
for detection in out:
scores = detection[:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
for j in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
print(class_ids[i])
label = str(classes[class_ids[i]])
print(label)
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
error:
0
Ah
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-46-adaf82305ab8> in <module>
6 label = str(classes[class_ids[i]])
7 print(label)
----> 8 color = colors[i]
9 cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
10 cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
IndexError: index 52 is out of bounds for axis 0 with size 52

how to get the min a contour of the color with HSV?

I'm trying to work on an image-processing. So, I need to grab the max and min area of the contour under for pic, contour in enumerate(contours): after selecting the min area if (area > 2000):
I could grab the max and min of the contour outside for loop, the problem that I need which min contour greater than 2000 in this code.
my full code:
import cv2
import numpy as np
from imutils.video import FPS
import time
cap = cv2.VideoCapture(0)
width = cap.get(3) # float
height = cap.get(4) # float
print width, height
time.sleep(2.0)
fps = FPS().start()
while (1):
_, img = cap.read()
if _ is True:
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
else:
continue
blue_lower = np.array([86,0,90], np.uint8)
blue_upper = np.array([163, 64, 145], np.uint8)
blue = cv2.inRange(hsv, blue_lower, blue_upper)
kernal = np.ones((9, 9), "uint8")
blue = cv2.dilate(blue, kernal)
res_blue = cv2.bitwise_and(img, img, mask=blue)
(_, contours, hierarchy) = cv2.findContours(blue, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for pic, contour in enumerate(contours):
area = cv2.contourArea(contour)
if (area > 2000):
print area
x, y, w, h = cv2.boundingRect(contour)
img = cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
cv2.putText(img, "Blue Colour", (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0))
if len(contours) > 0:
c = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(c)
img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), 5)
cv2.putText(img, "Blue Colour", (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0))
cv2.imshow("Color Tracking", img)
if cv2.waitKey(10) & 0xFF == ord('q'):
cap.release()
cv2.destroyAllWindows()
break
fps.update()
Any ideas or suggestions will be appreciated

Resources