I have done image segmentation of the image using PyTorch. I am trying to get the pixel count of Boat class to measure the area. As an example in the image I want to get the pixel count to measure the boat. How do I do that? from the pixel count is it possible to measure the are of the boat?
I am confused and trying to find a way. I would appreciate if anybody can guide me for that.
**The coding is as below:
**
from torchvision import models
fcn = models.segmentation.fcn_resnet101(pretrained=True).eval()
from PIL import Image
import matplotlib.pyplot as plt
import torch
img = Image.open('boat.jpg')
plt.imshow(img)
plt.show()
# Apply the transformations needed
#Resize the image to (256 x 256)
#CenterCrop it to (224 x 224)
import torchvision.transforms as T
trf = T.Compose([T.Resize(256),
T.CenterCrop(224),
T.ToTensor(),
T.Normalize(mean = [0.485, 0.456, 0.406],
std = [0.229, 0.224, 0.225])])
inp = trf(img).unsqueeze(0)
out = fcn(inp)['out']
print (out.shape)
#now this 21 channeled output into a 2D image or a 1 channeled image, where each pixel of that image corresponds to a class.
import numpy as np
om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()
print (om.shape)
print (np.unique(om))
# Define the helper function
def decode_segmap(image, nc=21):
label_colors = np.array([(0, 0, 0), # 0=background
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
(128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
(0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
# 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
(192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
(0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
r = np.zeros_like(image).astype(np.uint8)
g = np.zeros_like(image).astype(np.uint8)
b = np.zeros_like(image).astype(np.uint8)
for l in range(0, nc):
idx = image == l
r[idx] = label_colors[l, 0]
g[idx] = label_colors[l, 1]
b[idx] = label_colors[l, 2]
rgb = np.stack([r, g, b], axis=2)
return rgb
rgb = decode_segmap(om)
plt.imshow(rgb); plt.show()
I want to find some guidance
You are looking for skimage.measure.regionprops. Once you have the predicted label map (om in your code) you can apply regionprops to it and get the area of each region.
According to your code snippet, the output om is a tensor of category indices (0 - background, 1 - aeroplane, 2 - bicycle,....).
In order to get the area of a specific category, you just need to compare the output map with the corresponding index, then sum up the results.
For example, with the category boat with the index 4:
BOAT_INDEX = 4
area = torch.sum(om == BOAT_INDEX).item()
Related
So, I am trying to measure the line (check the attached code and result), the problem is when creating the boxplot points to measure the line it also creating boxplot points of the whole canvas of window no matter the image or thresholding value is. I also tried to limit in contourarea(c) command but couldn't limit it (don't know why). Kindly improvise the code or explain where I can counter this problem of mine in layman language as I am a newbee.
Thankyou!
import cv2
import numpy as np
import imutils
import imutils.perspective as persp
import scipy.spatial.distance as dist
print("PRESSED MODE 1 CONTOUR")
# cv2.destroyAllWindows()
img = cv2.imread('opencv_frame_3.png')
# img2 = cv2.imread('shapes.jpg')
# rgb_img2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
rgb_img = cv2.cvtColor(gray, cv2.COLOR_BGR2RGB)
# thresh, thresh_img = cv2.threshold(gray,90,255,cv2.THRESH_BINARY)
# thresh_img = cv2.erode(thresh_img, None, iterations=5)
# thresh_img = cv2.dilate(thresh_img, None, iterations=1)
# rgb_img_thresh = cv2.cvtColor(thresh_img, cv2.COLOR_BGR2RGB)
thresh_img = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,17,5)
# thresh_img = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,175,15)
# thr = cv2.cvtColor(thr, cv2.COLOR_BGR2RGB)
conts = cv2.findContours(thresh_img, cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
conts = imutils.grab_contours(conts)
# print(len(conts))
cont_img = np.zeros(img.shape)
cont_img = cv2.drawContours(cont_img,conts, -1 , (0,255,0),1)
cont_img2 = np.copy(img)
cont_img2 = cv2.drawContours(cont_img2,conts, -1 , (0,255,0),1)
# cv2.imshow('black',cont_img)
def midPoint (ptA, ptB):
return ((ptA[0]+ptB[0])/2 , (ptA[1]+ptB[1])/2)
for c in conts:
box = cv2.minAreaRect(c)
# print(box)
box = cv2.boxPoints(box)
box = np.array(box, dtype='int')
area = cv2.contourArea(c)
# print(area)
if cv2.contourArea(c) < 50000:
continue
area = cv2.contourArea(c)
print(area/96)
print("________")
cv2.drawContours(cont_img,[c],-1,(0,0,255),2)
cv2.drawContours(cont_img, [box], -1, (255, 0, 0), 2)
cv2.drawContours(cont_img2,[c],-1,(0,0,255),2)
cv2.drawContours(cont_img2, [box], -1, (255, 0, 0), 2)
for (x,y) in box:
cv2.circle(cont_img,(x,y),2,(255,255,255),20)
cv2.circle(cont_img2, (x, y), 2, (255, 255, 255), 20)
(tl,tr,br,bl) = box
(trX,tlX) = midPoint(tr,tl)
(brX, blX) = midPoint(br, bl)
cv2.circle(cont_img, (int(trX), int(tlX)),1, (0, 255, 255), 20)
cv2.circle(cont_img, (int(brX), int(blX)),1, (0, 255, 255), 20)
cv2.line(cont_img, (int(trX), int(tlX)), (int(brX), int(blX)), (255,0,255), 2)
cv2.circle(cont_img2, (int(trX), int(tlX)), 1, (0, 255, 255), 20)
cv2.circle(cont_img2, (int(brX), int(blX)), 1, (0, 255, 255), 20)
cv2.line(cont_img2, (int(trX), int(tlX)), (int(brX), int(blX)), (255, 0, 255), 2)
dA = dist.euclidean((int(trX), int(tlX)), (int(brX), int(blX)))
cv2.putText(cont_img,"{:.2f} px".format(dA/300), (int(trX+20), int(blX-20)),
cv2.FONT_HERSHEY_SIMPLEX,0.7,(0,255,255),2)
cv2.putText(cont_img2,"{:.2f} px".format(dA/300), (int(trX+20), int(blX-20)),
cv2.FONT_HERSHEY_SIMPLEX,0.7,(0,255,255),2)
(tlX, blX) = midPoint(tl, bl)
(trX, brX) = midPoint(tr, br)
cv2.circle(cont_img, (int(tlX), int(blX)),1, (0, 255, 0), 20)
cv2.circle(cont_img, (int(trX), int(brX)),1, (0, 255, 0), 20)
cv2.line(cont_img, (int(tlX), int(blX)), (int(trX), int(brX)), (255,0,255), 2)
cv2.circle(cont_img2, (int(tlX), int(blX)),1, (0, 255, 0), 20)
cv2.circle(cont_img2, (int(trX), int(brX)),1, (0, 255, 0), 20)
cv2.line(cont_img2, (int(tlX), int(blX)), (int(trX), int(brX)), (255,0,255), 2)
dB = dist.euclidean((int(tlX), int(blX)), (int(trX), int(brX)))
cv2.putText(cont_img,"{:.2f} in".format(dB/300), (int(trX-120), int(blX-320)),
cv2.FONT_HERSHEY_SIMPLEX,0.7,(0, 255, 255),2)
cv2.putText(cont_img2, "{:.2f} in".format(dB/300), (int(trX-120), int(blX -320)),
cv2.FONT_HERSHEY_SIMPLEX,0.7, (0, 255, 255),2)
print("Line A", "{:.2f} in".format(dA/96))
print("Line B", "{:.2f} in".format(dB/96))
print("**************")
# cont_img = cv2.resize(cont_img,(0,0),fx=0.5,fy=0.5)
cv2.imshow('area',cont_img)
# cont_img2 = cv2.resize(cont_img2,(0,0),fx=0.5,fy=0.5)
# cv2.imshow('ORG',cont_img2)
# thresh_img = cv2.resize(thresh_img,(0,0),fx=0.5,fy=0.5)
cv2.imshow("thresh", thresh_img)
# imgStack = stackImages(0.4,([img,rgb_img],
# [rgb_img_thresh,cont_img]))
#
# cv2.imshow('imgthresh',imgStack)
cv2.waitKey(0)
cv2.destroyAllWindows()
Check the image of output the whole window size is getting measured
I tried to either limit the measurement or area of contourarea(c) or c itself but it doesn't work for me
How to display the application in windows.
Code for Reference:
from tkinter import N
import numpy as np
from keras.preprocessing.image import img_to_array
import cv2
import imutils
from keras.models import load_model
import numpy as np
# parameters for loading data and images
detection_model_path = 'ER_Project//haar-cascade-files-master/haarcascade_frontalface_default.xml'
emotion_model_path = 'ER_Project/_mini_XCEPTION.102-0.66.hdf5'
# hyper-parameters for bounding boxes shape
# loading models
face_detection = cv2.CascadeClassifier(detection_model_path)
emotion_classifier = load_model(emotion_model_path, compile=False)
EMOTIONS = ["angry", "disgust", "scared", "happy", "sad", "surprised",
"neutral"]
#feelings_faces = []
# for index, emotion in enumerate(EMOTIONS):
# feelings_faces.append(cv2.imread('emojis/' + emotion + '.png', -1))
# starting video streaming
cv2.namedWindow('your_face')
camera = cv2.VideoCapture(0)
while True:
print("Hello")
frame = camera.read()[1]
# reading the frame
frame = imutils.resize(frame, width=300)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_detection.detectMultiScale(
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE)
canvas = np.zeros((250, 300, 3), dtype="uint8")
frameClone = frame.copy()
if len(faces) > 0:
faces = sorted(faces, reverse=True,
key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))[0]
(fX, fY, fW, fH) = faces
# Extract the ROI of the face from the grayscale image, resize it to a fixed 28x28 pixels, and then prepare
# the ROI for classification via the CNN
roi = gray[fY:fY + fH, fX:fX + fW]
roi = cv2.resize(roi, (64, 64))
roi = roi.astype("float") / 255.0
roi = img_to_array(roi)
roi = np.expand_dims(roi, axis=0)
preds = emotion_classifier.predict(roi)[0]
emotion_probability = np.max(preds)
label = EMOTIONS[preds.argmax()]
else:
continue
for (i, (emotion, prob)) in enumerate(zip(EMOTIONS, preds)):
# construct the label text
text = "{}: {:.2f}%".format(emotion, prob * 100)
# draw the label + probability bar on the canvas
# emoji_face = feelings_faces[np.argmax(preds)]
w = int(prob * 300)
cv2.rectangle(canvas, (7, (i * 35) + 5),
(w, (i * 35) + 35), (0, 0, 255), -1)
cv2.putText(canvas, text, (10, (i * 35) + 23),
cv2.FONT_HERSHEY_SIMPLEX, 0.45,
(255, 255, 255), 2)
cv2.putText(frameClone, label, (fX, fY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)
cv2.rectangle(frameClone, (fX, fY), (fX + fW, fY + fH),
(0, 0, 255), 2)
# for c in range(0, 3):
# frame[200:320, 10:130, c] = emoji_face[:, :, c] * \
# (emoji_face[:, :, 3] / 255.0) + frame[200:320,
# 10:130, c] * (1.0 - emoji_face[:, :, 3] / 255.0)
cv2.imshow('your_face', frameClone)
cv2.imshow("Probabilities", canvas)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
camera.release()
cv2.destroyAllWindows()
NUMA SUPPORT:
2022-04-20 04:36:21.181568: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-04-20 04:36:21.181664: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3951 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1660 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5
I need to run this openCV gui app on windows.
I have samples images of stones present in the images. I need to identify the visible stones only. The approach which I tried is threshold based filtering and detecting cv2.contours. Also, I am looking into ENet Architecture for semantic segmentation based deep learning approach. The samples images are below.
Example image1:
Example image2:
The code which I tried for contour based detection is as below
image = cv2.imread(os.path.join(img_path, img_name2))
# threshold based customization
lower_bound = np.array([0, 0, 0])
upper_bound = np.array([250,55,100])
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#masking the image using inRange() function
imagemask = cv2.inRange(hsv, lower_bound, upper_bound)
plt.figure(figsize=(20,10))
plt.imshow(imagemask, cmap="gray")
# erode and diluation to smoothen the edeges
final_mask = cv2.erode(imagemask, np.ones((3, 3), dtype=np.uint8))
final_mask = cv2.dilate(imagemask, np.ones((5, 5), dtype=np.uint8))
# find contours based on the mask
contours = cv2.findContours(final_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# draw contours
img_conts = cv2.drawContours(image.copy(), contours[0], -1, (0,255,0), 3)
plt.figure(figsize=(20,10))
plt.imshow(img_conts, cmap="gray")
The sample contours ouput. I know that the thresholds can be tuned for better results here.
But, what I am looking here for the any better approach or solution can work in this heavy environment for detection small particles like stones. Any ideas to solve in better way?
Here is how you can use the Canny edge detector to detect the rocks in your images:
import cv2
import numpy as np
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 103, 255, cv2.THRESH_BINARY)
img_blur = cv2.GaussianBlur(thresh, (23, 23), 0)
img_canny = cv2.Canny(img_blur, 65, 0)
img_dilate = cv2.dilate(img_canny, None, iterations=2)
return cv2.erode(img_dilate, None, iterations=2)
imgs = [cv2.imread("image1.jpg"), cv2.imread("image2.jpg")]
for i, img in enumerate(imgs):
contours = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
cv2.drawContours(img, contours, -1, (0, 255, 0), 1)
cv2.imshow(str(i), img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output for sample images 1 and 2:
You can also tweak the parameters using OpenCV trackbars using the code below:
import cv2
import numpy as np
from random import randint, sample
def process(img, c_t1, c_t2):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 103, 255, cv2.THRESH_BINARY)
img_blur = cv2.GaussianBlur(thresh, (23, 23), 0)
img_canny = cv2.Canny(img_blur, c_t1, c_t2)
img_dilate = cv2.dilate(img_canny, None, iterations=2)
return cv2.erode(img_dilate, None, iterations=2)
def show(imgs, win="Image", scale=1):
imgs = [cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) if len(img.shape) == 2 else img for img in imgs]
img_concat = np.concatenate(imgs, 1)
h, w = img_concat.shape[:2]
cv2.imshow(win, cv2.resize(img_concat, (int(w * scale), int(h * scale))))
d = {"Canny Threshold 1": (65, 500),
"Canny Threshold 2": (0, 500)}
imgs = [cv2.imread("image1.jpg"), cv2.imread("image2.jpg")]
cv2.namedWindow("Track Bars")
for i in d:
cv2.createTrackbar(i, "Track Bars", *d[i], id)
while True:
c_t1, c_t2 = (cv2.getTrackbarPos(i, "Track Bars") for i in d)
for i, img in enumerate(imgs):
img_copy = img.copy()
processed = process(img, c_t1, c_t2)
contours = cv2.findContours(processed, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
cv2.drawContours(img_copy, contours, -1, (0, 255, 0), 1)
show([img_copy, processed], str(i))
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cv2.destroyAllWindows()
Output:
(Click image to expand)
I am trying to understand image segmentation using SegNet implementation in keras. I have read the original paper using the Conv and Deconv architechture and also using the Dilated conv layers. However, I have trouble understanding how the labelling of the pixel works.
I am considering the following implementation:
https://github.com/nicolov/segmentation_keras
Here the pascal dataset attributes are used:
21 Classes:
# 0=background
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
# 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
The classes are represented by:
pascal_nclasses = 21
pascal_palette = np.array([(0, 0, 0)
, (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128)
, (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0)
, (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128)
, (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)], dtype=np.uint8)
I was trying to open the labelled images for cat and boat, as cat is in only in R space and boat only in blue. I used following to show the labelled images:
For boat:
label = cv2.imread("2008_000120.png")
label = np.multiply(label, 100)
cv2.imshow("kk", label[:,:,2])
cv2.waitKey(0)
For cat:
label = cv2.imread("2008_000056.png")
label = np.multiply(label, 100)
cv2.imshow("kk", label[:,:,0])
cv2.waitKey(0)
However, it doesnt matter which space I choose both images always gives same results. i.e. the following code also gives same results
For boat:
label = cv2.imread("2008_000120.png")
label = np.multiply(label, 100)
cv2.imshow("kk", label[:,:,1]) # changed to Green space
cv2.waitKey(0)
For cat:
label = cv2.imread("2008_000056.png")
label = np.multiply(label, 100)
cv2.imshow("kk", label[:,:,1]) # changed to Green space
cv2.waitKey(0)
My assumption was that I will see the cat only in Red color space and boat only in blue. However, the output in all cases:
I am confused now how these pixels are labelled and how are they read and uniquely used to pair with categories in the process of creating the logits.
It will be great if someone can explain or put some relevant links to understand this process. I tried to search but most of the tutorials only discuss the CNN architecture, not the labelling process or how these labels are used within the CNN.
I have attached the labelled images of cat and boat for reference.
The labels are just binary image masks so single channel images. The pixel value at each location of your label image changes depending on the class present at each pixel. So it will be value 0 when there is no object at a pixel and a value 1-20 depending on the class otherwise.
Semantic segmentation is a classification task so you are trying to classify each pixel with a class ( in this case class labels 0-20).
Your model will produce an output image and you want to perform softmax cross entropy between each output image pixel and each label image pixel.
In the multiclass case where you have K classes (like here K=21) each pixel will have K channels and you perform softmax cross entropy across the channels at each pixel. Why a channel for each class? Think about in classification we produce a vector of length K for K classes and this is compared to a one hot vector of length K.
How to get the new coordinates of the points a and b in this exemple after a transformation M (40 degrees counter-clockwise rotation) ?
import cv2
cap = cv2.VideoCapture("http://i.imgur.com/7G91d2im.jpg")
a, b = (100, 100), (200, 200)
if cap.isOpened():
ret, im = cap.read()
rows, cols = im.shape[:2]
im_keypoints = im.copy()
for point in [a, b]:
cv2.circle(im_keypoints, point, 6, (0, 0, 255), -1)
cv2.imwrite("im_keypoints.jpg", im_keypoints)
M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 40, 1)
im_rotated = cv2.warpAffine(im, M, (cols, rows))
cv2.imwrite("im_rotated.jpg", im_rotated)
M is a 2 by 3 rotation matrix, so all you need to do it apply M to your points.
im_rotated_keypoints = im_rotated.copy()
for point in [a, b]:
# Convert to homogenous coordinates in np array format first so that you can pre-multiply M
rotated_point = M.dot(np.array(point + (1,)))
cv.circle(im_rotated_keypoints, (int(rotated_point[0]), int(rotated_point[1])), 6, (0, 0, 255), -1)
And you should be able to see