I'm trying to do a segmentation of Satellite images for identifying built and non-built areas using U-NET Architecture. The Model Architecture is as follows:
def get_unet(input_img, n_filters = 16, dropout = 0.1, batchnorm = True):
Function to define the UNET Model
# Contracting Path
c1 = conv2d_block(input_img, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
p1 = MaxPooling2D((2, 2))(c1)
p1 = Dropout(dropout)(p1)
c2 = conv2d_block(p1, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
p2 = MaxPooling2D((2, 2))(c2)
p2 = Dropout(dropout)(p2)
c3 = conv2d_block(p2, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
p3 = MaxPooling2D((2, 2))(c3)
p3 = Dropout(dropout)(p3)
c4 = conv2d_block(p3, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
p4 = MaxPooling2D((2, 2))(c4)
p4 = Dropout(dropout)(p4)
c5 = conv2d_block(p4, n_filters = n_filters * 16, kernel_size = 3, batchnorm = batchnorm)
# Expansive Path
u6 = Conv2DTranspose(n_filters * 8, (3, 3), strides = (2, 2), padding = 'same')(c5)
u6 = concatenate([u6, c4])
u6 = Dropout(dropout)(u6)
c6 = conv2d_block(u6, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
u7 = Conv2DTranspose(n_filters * 4, (3, 3), strides = (2, 2), padding = 'same')(c6)
u7 = concatenate([u7, c3])
u7 = Dropout(dropout)(u7)
c7 = conv2d_block(u7, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
u8 = Conv2DTranspose(n_filters * 2, (3, 3), strides = (2, 2), padding = 'same')(c7)
u8 = concatenate([u8, c2])
u8 = Dropout(dropout)(u8)
c8 = conv2d_block(u8, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
u9 = Conv2DTranspose(n_filters * 1, (3, 3), strides = (2, 2), padding = 'same')(c8)
u9 = concatenate([u9, c1])
u9 = Dropout(dropout)(u9)
c9 = conv2d_block(u9, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
outputs = Conv2D(1, (1, 1), activation='sigmoid')(c9)
model = Model(inputs=[input_img], outputs=[outputs])
return model
However, my segmentation masks are noisy and pixelated, like this:
U-NET mask for an airport
Also, When I tested my model with a sample image like this with solid colours it returned results like this.
Tested with a solid colour using Photoshop
the U-Net masks in a jagged way thats not smooth. Is there any fix for this?
Thanks in Advance
Related
I have done image segmentation of the image using PyTorch. I am trying to get the pixel count of Boat class to measure the area. As an example in the image I want to get the pixel count to measure the boat. How do I do that? from the pixel count is it possible to measure the are of the boat?
I am confused and trying to find a way. I would appreciate if anybody can guide me for that.
**The coding is as below:
**
from torchvision import models
fcn = models.segmentation.fcn_resnet101(pretrained=True).eval()
from PIL import Image
import matplotlib.pyplot as plt
import torch
img = Image.open('boat.jpg')
plt.imshow(img)
plt.show()
# Apply the transformations needed
#Resize the image to (256 x 256)
#CenterCrop it to (224 x 224)
import torchvision.transforms as T
trf = T.Compose([T.Resize(256),
T.CenterCrop(224),
T.ToTensor(),
T.Normalize(mean = [0.485, 0.456, 0.406],
std = [0.229, 0.224, 0.225])])
inp = trf(img).unsqueeze(0)
out = fcn(inp)['out']
print (out.shape)
#now this 21 channeled output into a 2D image or a 1 channeled image, where each pixel of that image corresponds to a class.
import numpy as np
om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()
print (om.shape)
print (np.unique(om))
# Define the helper function
def decode_segmap(image, nc=21):
label_colors = np.array([(0, 0, 0), # 0=background
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
(128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
(0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
# 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
(192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
(0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
r = np.zeros_like(image).astype(np.uint8)
g = np.zeros_like(image).astype(np.uint8)
b = np.zeros_like(image).astype(np.uint8)
for l in range(0, nc):
idx = image == l
r[idx] = label_colors[l, 0]
g[idx] = label_colors[l, 1]
b[idx] = label_colors[l, 2]
rgb = np.stack([r, g, b], axis=2)
return rgb
rgb = decode_segmap(om)
plt.imshow(rgb); plt.show()
I want to find some guidance
You are looking for skimage.measure.regionprops. Once you have the predicted label map (om in your code) you can apply regionprops to it and get the area of each region.
According to your code snippet, the output om is a tensor of category indices (0 - background, 1 - aeroplane, 2 - bicycle,....).
In order to get the area of a specific category, you just need to compare the output map with the corresponding index, then sum up the results.
For example, with the category boat with the index 4:
BOAT_INDEX = 4
area = torch.sum(om == BOAT_INDEX).item()
i have a problem in my code as shown in this code
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
labels = ['Background', 'Korosi', 'Tanah', 'Tanaman']
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")
net = cv2.dnn.readNetFromONNX('anomali_model1.onnx')
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers)]
capture = cv2.VideoCapture(0)
while True: re, img = capture.read()
#img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
#height, width, channels = img.shape
#blob = cv2.dnn.blobFromImage(img, 0.00392, (256, 256),
#swapRB=True, crop=False)
blob = cv2.dnn.blobFromImage(img, swapRB=True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(classes), 3))
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 2, color, 3)
cv2.imshow("Image",cv2.resize(img, (800,600)))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
cv2.destroyAllWindows()
And i get error like this:
error Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_10644\3517982312.py in <module>
9 blob = cv2.dnn.blobFromImage(img, swapRB=True, crop=False)
10 net.setInput(blob)
---> 11 outs = net.forward(output_layers)
12
13 class_ids = []
error: OpenCV(3.4.17) D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\layers\convolution_layer.cpp:331: error: (-2:Unspecified error) Number of input channels should be multiple of 3 but got 640 in function 'cv::dnn::ConvolutionLayerImpl::getMemoryShapes'
can you help me to solve this problem? because I've looked into various sources but did not find a solution.
library version for this code
python version 3.7
tensorflow version 2.0
opencv version 3.4.17
I hope you all can solve this problem and share with me
How to display the application in windows.
Code for Reference:
from tkinter import N
import numpy as np
from keras.preprocessing.image import img_to_array
import cv2
import imutils
from keras.models import load_model
import numpy as np
# parameters for loading data and images
detection_model_path = 'ER_Project//haar-cascade-files-master/haarcascade_frontalface_default.xml'
emotion_model_path = 'ER_Project/_mini_XCEPTION.102-0.66.hdf5'
# hyper-parameters for bounding boxes shape
# loading models
face_detection = cv2.CascadeClassifier(detection_model_path)
emotion_classifier = load_model(emotion_model_path, compile=False)
EMOTIONS = ["angry", "disgust", "scared", "happy", "sad", "surprised",
"neutral"]
#feelings_faces = []
# for index, emotion in enumerate(EMOTIONS):
# feelings_faces.append(cv2.imread('emojis/' + emotion + '.png', -1))
# starting video streaming
cv2.namedWindow('your_face')
camera = cv2.VideoCapture(0)
while True:
print("Hello")
frame = camera.read()[1]
# reading the frame
frame = imutils.resize(frame, width=300)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_detection.detectMultiScale(
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE)
canvas = np.zeros((250, 300, 3), dtype="uint8")
frameClone = frame.copy()
if len(faces) > 0:
faces = sorted(faces, reverse=True,
key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))[0]
(fX, fY, fW, fH) = faces
# Extract the ROI of the face from the grayscale image, resize it to a fixed 28x28 pixels, and then prepare
# the ROI for classification via the CNN
roi = gray[fY:fY + fH, fX:fX + fW]
roi = cv2.resize(roi, (64, 64))
roi = roi.astype("float") / 255.0
roi = img_to_array(roi)
roi = np.expand_dims(roi, axis=0)
preds = emotion_classifier.predict(roi)[0]
emotion_probability = np.max(preds)
label = EMOTIONS[preds.argmax()]
else:
continue
for (i, (emotion, prob)) in enumerate(zip(EMOTIONS, preds)):
# construct the label text
text = "{}: {:.2f}%".format(emotion, prob * 100)
# draw the label + probability bar on the canvas
# emoji_face = feelings_faces[np.argmax(preds)]
w = int(prob * 300)
cv2.rectangle(canvas, (7, (i * 35) + 5),
(w, (i * 35) + 35), (0, 0, 255), -1)
cv2.putText(canvas, text, (10, (i * 35) + 23),
cv2.FONT_HERSHEY_SIMPLEX, 0.45,
(255, 255, 255), 2)
cv2.putText(frameClone, label, (fX, fY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)
cv2.rectangle(frameClone, (fX, fY), (fX + fW, fY + fH),
(0, 0, 255), 2)
# for c in range(0, 3):
# frame[200:320, 10:130, c] = emoji_face[:, :, c] * \
# (emoji_face[:, :, 3] / 255.0) + frame[200:320,
# 10:130, c] * (1.0 - emoji_face[:, :, 3] / 255.0)
cv2.imshow('your_face', frameClone)
cv2.imshow("Probabilities", canvas)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
camera.release()
cv2.destroyAllWindows()
NUMA SUPPORT:
2022-04-20 04:36:21.181568: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-04-20 04:36:21.181664: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3951 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1660 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5
I need to run this openCV gui app on windows.
In my old project, I used a yolov3 model and trained it, and the used cv2.readNetFromDarknet(...), cv2.dnn.blobFromImage(...), and net.forward(...) in order to run my inference and it worked fine. I trained my yolov4 model and assumed that I only had to change the configuration and weights file in my project in order for the inference to work but instead I get no detected bounding boxes. Is there supposed to be a change other than the weights and configs? Thank you.
edit:
net = cv2.dnn.readNetFromDarknet(PATHS["model_config"], PATHS["model_weights"])
layer_names = net.getLayerNames()
output_layer_names = [ layer_names[index[0] - 1] for index in net.getUnconnectedOutLayers() ]
#during inference
def method(frame):
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (MAGIC_NUMBER_1,
MAGIC_NUMBER_1), swapRB=True, crop=False)
net.setInput(blob)
layerOutputs = net.forward(output_layer_names)
boxes = []
confidences = []
classIDs = []
for output in layerOutputs:
for detection in output:
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
if confidence > .5: # NEVER PASSES, CONFIDENCE IS ALWAYS 0 WHEN PRINTED OUT
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
return boxes, confidences, classIDs
I want to detect playing cards and found .cfg and .weights for it. Classes has 52cards names. Following code is giving index out of range error. I couldn't understand the outputs of Yolo and how to get the detected labels. I am new to this, have been trying to understand. Can someone please help!
import cv2
import numpy as np
# Load Yolo
net = cv2.dnn.readNet("yolocards_608.weights", "yolocards.cfg")
classes = []
with open("cards.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
# Loading image
img = cv2.imread("playing_cards_image.jpg")
img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
print(out.shape)
for detection in out:
scores = detection[:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
for j in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
print(class_ids[i])
label = str(classes[class_ids[i]])
print(label)
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
error:
0
Ah
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-46-adaf82305ab8> in <module>
6 label = str(classes[class_ids[i]])
7 print(label)
----> 8 color = colors[i]
9 cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
10 cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
IndexError: index 52 is out of bounds for axis 0 with size 52