I have an application like this 1 with one display to show real-time basler camera into it . I already figured out how to connect to Basler camera and show video on it but the video is not very smooth.
#Connect to a camera
for i in MainWindow.camera_db.all():
if True:
info = None
for x in pylon.TlFactory.GetInstance().EnumerateDevices():
if x.GetSerialNumber() == i['id']:
info = x
break
if info is not None:
camera = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateDevice(info))
camera.Open()
if MainWindow.viewer1 is None:
MainWindow.viewer1 = BaslerOpenCVViewer(camera)
logging.warning(f'Camera 1 - serial number: {i["id"]}-OK')
else:
logging.warning('Camera with {} serial number not found'.format(i['id']))
and then I tried
def update_frame(self):
try:
frame = MainWindow.viewer1.get_image()
# frame = cv2.imread('test.jpg')
self.load_display1(frame) # take a frame and show it on MainWindow.display
return frame
except Exception as e:
logging.warning(str(e))
self.time_get_image = QtCore.QTimer(self, interval=1)
self.time_get_image.timeout.connect(self.get_image) #call update_frame function every 1ms to get a real-time video from Basler camera but it's not work well
self.time_get_image.start()
Is there another ways to connect to Basler camera continuous mode and show it on application.
create a label and send the img to displayImage fucnbtion. you will get the image.
from pypylon import pylon
import cv2
camera = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateFirstDevice())
camera.StartGrabbing(pylon.GrabStrategy_LatestImageOnly)
converter = pylon.ImageFormatConverter()
converter.OutputPixelFormat = pylon.PixelType_BGR8packed
converter.OutputBitAlignment = pylon.OutputBitAlignment_MsbAligned
while camera.IsGrabbing():
grabResult = camera.RetrieveResult(5000, pylon.TimeoutHandling_ThrowException)
# if grabResult.GrabSucceded():
image = converter.Convert(grabResult)
img = image.GetArray()
self.displayImage(img)
cv2.imshow("video", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
cv2.waitKey()
def displayImage(self, img):
qformat = QImage.Format_Indexed8
if len(img.shape) == 3:
if (img.shape[2]) == 4:
qformat = QImage.Format_RGB888
else:
qformat = QImage.Format_RGB888
img = QImage(img, img.shape[1], img.shape[0], qformat)
img = img.rgbSwapped()
self.ui.Camera_lbl.setPixmap(QPixmap.fromImage(img))
self.ui.Camera_lbl.setAlignment(QtCore.Qt.AlignHCenter | QtCore.Qt.AlignHCenter)
You can use the following code
from pypylon import pylon
import cv2
# conecting to the first available camera
camera = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateFirstDevice())
# Grabing Continusely (video) with minimal delay
camera.StartGrabbing(pylon.GrabStrategy_LatestImageOnly)
converter = pylon.ImageFormatConverter()
# converting to opencv bgr format
converter.OutputPixelFormat = pylon.PixelType_BGR8packed
converter.OutputBitAlignment = pylon.OutputBitAlignment_MsbAligned
while camera.IsGrabbing():
grabResult = camera.RetrieveResult(5000, pylon.TimeoutHandling_ThrowException)
if grabResult.GrabSucceeded():
# Access the image data
image = converter.Convert(grabResult)
img = image.GetArray()
cv2.namedWindow('title', cv2.WINDOW_NORMAL)
cv2.imshow('title', img)
k = cv2.waitKey(1)
if k == 27:
break
grabResult.Release()
# Releasing the resource
camera.StopGrabbing()
cv2.destroyAllWindows()
The code is taken from this github:pypylon/samples/opencv.py
Related
I am currently working on instance segmentation. I follow these two tutorials:
https://haochen23.github.io/2020/06/fine-tune-mask-rcnn-pytorch.html
https://colab.research.google.com/github/dlmacedo/starter-academic/blob/master/content/courses/deeplearning/notebooks/pytorch/torchvision_finetuning_instance_segmentation.ipynb#scrollTo=mTgWtixZTs3X
However, these two tutorials work perfectly with one class like person + background. But in my case, I have two classes like a person and car + background. I didn't find any resources about making the Mask RCNN work with multiple objects.
Notice that:
I am using PyTorch ( torchvision ), torch==1.10.0+cu111 torchvision==0.11.0+cu111 torchaudio==0.10.0
I am using a Pascal VOC annotation
i used segmentation class (not the XML file) + the images
and this is my dataset class
class PennFudanDataset(torch.utils.data.Dataset):
def __init__(self, root, transforms=None):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "img"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "imgMask"))))
def __getitem__(self, idx):
# load images ad masks
img_path = os.path.join(self.root, "img", self.imgs[idx])
mask_path = os.path.join(self.root, "imgMask", self.masks[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
mask = Image.open(mask_path)
mask = np.array(mask)
# instances are encoded as different colors
obj_ids = np.unique(mask)
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# of binary masks
masks = mask == obj_ids[:, None, None]
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
anyone can help me?
In the following code, I retrieve a created blob URL which I intend to process. Could anyone suggest a tutorial that steps through how I would download the blob (which is a video), open it, and process each frame when this event is triggered?
You could refer to this article and download_blob method to download the blob.
And refer to here for processing each frame.
import json
import logging
import cv2
import azure.functions as func
from azure.storage.blob import BlobServiceClient, generate_blob_sas, AccessPolicy, BlobSasPermissions
from azure.core.exceptions import ResourceExistsError
from datetime import datetime, timedelta
def main(event: func.EventGridEvent):
result = json.dumps({
'id': event.id,
'data': event.get_json(),
'topic': event.topic,
'subject': event.subject,
'event_type': event.event_type,
})
logging.info('Python EventGrid trigger processed an event: %s', result)
connect_string = "connect string of storage"
DEST_FILE = "path to download the video"
blob_service_client = BlobServiceClient.from_connection_string(connect_string)
blob_url = event.get_json().get('url')
logging.info('blob URL: %s', blob_url)
blob_name = blob_url.split("/")[-1].split("?")[0]
container_name = blob_url.split("/")[-2].split("?")[0]
# Download blob to DEST_FILE
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
with open(DEST_FILE, "wb") as my_blob:
download_stream = blob_client.download_blob()
my_blob.write(download_stream.readall())
# Process images of a video, frame by frame
video_path = DEST_FILE + "/" +blob_name
logging.info('video path: %s', video_path)
cap = cv2.VideoCapture(video_path)
count = 0
while cap.isOpened():
ret,frame = cap.read()
cv2.imshow('window-name', frame)
cv2.imwrite("frame%d.jpg" % count, frame)
count = count + 1
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows() # destroy all opened windows
here is my simple code to display the image in QGraphicsView in pyqt python 3.7. I want an image pixel when the mouse is pressed on a scene or window of QGraphicsView or QGraphicsScene.
Mouse Press Function
Mouse Press Event Handler
def mousePressEvent(self):
p = QtGui.QCursor.pos()
print("pressed here: ", p)
Mouse Press Event caller
self.scene1.mousePressEvent = mousePressEvent
Main Code
import cv2
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtGui import *
from PyQt5.QtWidgets import QGraphicsScene, QAction
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(800, 600)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.graphicsView = QtWidgets.QGraphicsView(self.centralwidget)
self.graphicsView.setGeometry(QtCore.QRect(20, 10, 761, 561))
self.graphicsView.setObjectName("graphicsView")
MainWindow.setCentralWidget(self.centralwidget)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
# ---- Mouse Press Event Handler ---- #
def mousePressEvent(self):
p = QtGui.QCursor.pos() # Here I want image pixel coordinate (x,y) how we can..?
print("pressed here: ", p)
# ---- Mouse Press Event caller ---- #
self.scene1.mousePressEvent = mousePressEvent
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
# -------------------------------------------------
image = cv2.imread('lena.jpg') # Read image
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
height, width = image.shape # read image size
self.image_disp = QImage(image.data, width, height, QImage.Format_Grayscale8)
# -------------------------------------------------
self.scene1 = QGraphicsScene()
pixMap = QPixmap.fromImage(self.image_disp)
self.scene1.addPixmap(pixMap)
self.graphicsView.setScene(self.scene1)
if __name__ == "__main__":
import sys
app = QtWidgets.QApplication(sys.argv)
MainWindow = QtWidgets.QMainWindow()
ui = Ui_MainWindow()
ui.setupUi(MainWindow)
MainWindow.show()
sys.exit(app.exec_())
You should not modify the class generated by Qt Designer(1), instead create another class that inherits from a widget and use the initial class as an interface.
Do not override the mousePressEvent method using self.scene1.mousePressEvent = mousePressEvent because you are deleting the default implementation, instead you can create a class that inherits from QGraphicsScene or use an event filter, in this case I will use the second method.
To obtain the position of the mouse with respect to the image (QGraphicsPixmapItem), you must use the transformations between the different elements of the Qt Graphics Framework.
import os
import cv2
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(800, 600)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.graphicsView = QtWidgets.QGraphicsView(self.centralwidget)
self.graphicsView.setGeometry(QtCore.QRect(20, 10, 761, 561))
self.graphicsView.setObjectName("graphicsView")
MainWindow.setCentralWidget(self.centralwidget)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
def __init__(self, parent=None):
super().__init__(parent)
self.setupUi(self)
self.scene = QtWidgets.QGraphicsScene(self)
self.graphicsView.setScene(self.scene)
self.scene.installEventFilter(self)
current_dir = os.path.dirname(os.path.realpath(__file__))
filename = os.path.join(current_dir, "lena.jpg")
image = cv2.imread(filename)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
height, width = image.shape
image_disp = QtGui.QImage(
image.data, width, height, QtGui.QImage.Format_Grayscale8
)
pixMap = QtGui.QPixmap.fromImage(image_disp)
self.pixmap_item = self.scene.addPixmap(pixMap)
def eventFilter(self, obj, event):
if obj is self.scene and event.type() == QtCore.QEvent.GraphicsSceneMousePress:
spf = event.scenePos()
lpf = self.pixmap_item.mapFromScene(spf)
brf = self.pixmap_item.boundingRect()
if brf.contains(lpf):
lp = lpf.toPoint()
print(lp)
return super().eventFilter(obj, event)
if __name__ == "__main__":
import sys
app = QtWidgets.QApplication(sys.argv)
w = MainWindow()
w.show()
sys.exit(app.exec_())
(1) Using the Generated Code
I'm completely new to opencv and tesseract.
I spent all day trying to make code that would parse game duration from images like that: original image (game duration is in the top left corner)
I came to code that manages to recognize the duration sometimes (about 40% of all cases). Here it is:
try:
from PIL import Image
except ImportError:
import Image
import os
import cv2
import pytesseract
import re
import json
def non_digit_split(s):
return filter(None, re.split(r'(\d+)', s))
def time_to_sec(min, sec):
return (int(min) * 60 + int(sec)).__str__()
def process_img(image_url):
img = cv2.resize(cv2.imread('./images/' + image_url), None, fx=5, fy=5, interpolation=cv2.INTER_CUBIC)
str = pytesseract.image_to_string(img)
if "WIN " in str:
time = list(non_digit_split(str.split("WIN ",1)[1][0:6].strip()))
str = time_to_sec(time[0], time[2])
else:
str = 'Not recognized'
return str
res = {}
img_list = os.listdir('./images')
print(img_list)
for i in img_list:
res[i] = process_img(i)
with open('output.txt', 'w') as file:
file.write(json.dumps(res))
Don't even ask how I came to resizing image, but it helped a little.
I also tried to crop image first like that:
cropped image
but tesseract couldn't find any text here.
I'm sure that the issue I'm trying to solve is pretty easy. Can you please point me the right direction? How should I preprocess it so tesseract will parse it right?
Thanks to #DmitriiZ comment I managed to produce working piece of code.
I made a preprocessor that outputs something like that:
Preprocessed image
Tesseract handles it just fine.
Here is the full code:
try:
from PIL import Image
except ImportError:
import Image
import os
import pytesseract
import json
def is_dark(image):
pixels = image.getdata()
black_thresh = 100
nblack = 0
for pixel in pixels:
if (sum(pixel) / 3) < black_thresh:
nblack += 1
n = len(pixels)
if (nblack / float(n)) > 0.5:
return True
else:
return False
def preprocess(img):
basewidth = 500
wpercent = (basewidth/float(img.size[0]))
hsize = int((float(img.size[1])*float(wpercent)))
#Enlarging image
img = img.resize((basewidth,hsize), Image.ANTIALIAS)
#Converting image to black and white
img = img.convert("1", dither=Image.NONE)
return img
def process_img(image_url):
img = Image.open('./images/' + image_url)
#Area we need to crop can be found in one of two different areas,
#depending on which team won. You can replace that block and is_dark()
#function by just img.crop().
top_area = (287, 15, 332, 32)
crop = img.crop(top_area)
if is_dark(crop):
bot_area = (287, 373, 332, 390)
crop = img.crop(bot_area)
img = preprocess(crop)
str = pytesseract.image_to_string(img)
return str
res = {}
img_list = os.listdir('./images')
print(img_list)
for i in img_list:
res[i] = process_img(i)
with open('output.txt', 'w') as file:
file.write(json.dumps(res))
I use cv2 and pil module to edit a image
image = cv2.medianBlur(img_gray, 7)
image = Image.fromarray(image)
how can I show the image on the pyqt4?
use this code
def pil2pixmap(self,im):
if im.mode == "RGB":
pass
#elif im.mode =="L":
#im = im.convert("RGBA")
data = im.convert("RGBA").tostring("raw","RGBA")
qim = QtGui.QImage(data, im.size[0], im.size[1], QtGui.QImage.Format_ARGB32)
pixmap = QtGui.QPixmap.fromImage(qim)
return pixmap
and it works.