Adding dynamic plots to video - opencv

I'm trying to append dynamic graph plots to an existing video.
Currently I create a pyplot figure for each frame and concatenate the video frame with the figure (using np.from_buffer to extract the frame of each figure), which is very slow. In the end I use OpenCV's VideoWriter to export the concatenated frames into a video file.
I tried to use the FuncAnimation class, but I couldn't figure how to adopt it into my pipeline.
Below is my current code. Any way I could speed things up?
class DynamicGraph(ABC):
def __init__(self, data, height, dpi):
self._data = data.copy()
self.width, self.height = int(1.5 * height), height
self.filters = filters
self.dpi = dpi
self.data = data
nona = data[~np.isnan(data)]
self.ylim = (np.min(nona), np.max(nona))
def to_numpy(self, fig):
fig.tight_layout()
fig.canvas.draw()
frame = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
frame = cv2.cvtColor(frame.reshape(fig.canvas.get_width_height()[::-1] + (3,)), cv2.COLOR_RGB2BGR)
plt.close(fig)
return frame
def __call__(self, i):
return self.plot(i)
#abstractmethod
def plot(self, i):
pass
class DynamicSignal(DynamicGraph):
def __init__(self, title, data, legend, xlabel, ylabel, window_size, height, filters=(), dpi=128):
self.title, self.legend, self.xlabel, self.ylabel = title, legend, xlabel, ylabel
self.window_size = window_size
self.radius = self.window_size // 2
self.ticks = 50
pad = np.zeros((self.radius, data.shape[1]))
super().__init__(data=np.concatenate((pad, data, pad), axis=0), height=height, filters=filters, dpi=dpi)
def plot(self, i):
fig, ax = plt.subplots(figsize=(self.width / self.dpi, self.height / self.dpi), dpi=self.dpi)
x = np.arange(i-self.radius, i+self.radius+1)
y = self.data[i:i+2*self.radius+1]
ax.plot(x, y)
mn, mx = x.min(), x.max()
xticks = np.linspace(*(np.round(np.array([mn, mx])/self.ticks) * self.ticks), (mx - mn) // self.ticks + 1)
ax.set(title=self.title, xlabel=self.xlabel, xlim=(mn, mx), xticks=xticks, ylabel=self.ylabel, ylim=self.ylim)
ax.axvline(x=i, color='r', linestyle='dotted')
ax.grid()
ax.legend(self.legend, loc='upper right')
return self.to_numpy(fig)
class VideoCreator:
def __init__(self, painters=(), graphs=()):
self.painters = painters
self.graphs = graphs
def process_frame(self, frame, i):
out_frame = frame.copy()
if any(self.graphs):
graphs = [graph(i) for graph in self.graphs]
out_frame = np.concatenate((out_frame, np.concatenate(graphs, axis=0)), axis=1)
return out_frame
def create_video(self, video_path, video_data, out_path, start=None, end=None):
fps, length, (width, height) = video_data['fps'], video_data['frame_count'], video_data['resolution']
start, end = int(0 if start is None else start), int(length if end is None else end)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(out_path, fourcc, fps, width + self.graphs[0].width if any(self.graphs) else 0, height))
cap = cv2.VideoCapture(video_path)
cap.set(cv2.CAP_PROP_POS_FRAMES, start)
for i in tqdm(range(start, end), desc="Writing video result"):
ret, frame = cap.read()
frame = self.process_frame(frame, i)
out.write(frame)
cap.release()
out.release()

Related

Frame generation for video in python

I am using the Wand library to generate images. Next, I create a video from these frames. Each frame represents a set of animated objects, text, shapes, embedded images. To create a one-minute sequence, my machine takes about three minutes of working time. How can i use resources more efficiently and speed up execution using python?
from wand.color import Color
from wand.image import Image
from wand.drawing import Drawing
from wand.color import Color
from wand.font import Font
from pathlib import Path
resolution = "1080, 1920".split(",")
duration = "2"
framerate = "24"
width, height = int(resolution[0]), int(resolution[1])
amount = int(duration) * int(framerate)
temp_dir = Path().absolute() / "generator" / "temp_2"
font_path = "C:\\Windows\\Fonts\\arial.ttf"
src_path = Path().absolute() / "generator" / "src"
def framegen(width, height, amount) -> list:
frames = []
x_position = int(width/2)
y_position = int(height/2)
font_size = 80
text = "Hello world!"
background_pattern = Image(filename=src_path / "pattern.png")
for i in range(amount+1):
frames.append(Image(width=width, height=height,
background=Color("white")))
with Drawing() as draw:
# Draw pattern image
draw.composite(operator='over', left=0, top=0, width=background_pattern.width,
height=background_pattern.height, image=background_pattern)
draw.fill_color = Color("blue")
draw.rectangle(left=x_position-100, top=y_position -
100, right=x_position+100, bottom=y_position+100)
draw.font = font_path
draw.font_size = font_size
draw.fill_color = Color("black")
draw.text_alignment = "center"
draw.text(x_position, y_position, text)
draw(frames[i])
background_pattern.rotate(1)
y_position -= 5
font_size += 2
return frames
def save_frames(frames, temp_dir):
paths = []
for i, frame in enumerate(frames):
frame.save(filename=f"{temp_dir}/frame_{i}.png")
paths.append(f"{temp_dir}/frame_{i}.png")
return paths
if __name__ == "__main__":
frames = framegen(width, height, amount)
save_frames(frames, temp_dir)

UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd42c66d3b0>

I am trying to do an object detection problem and been working with aquarium dataset from roboflow. I have been trying to create a bounding box for the fishes, but I have getting the error:
UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd42c66d3b0>
I also tried to see what images are corrupted and ran a code
import PIL
from pathlib import Path
from PIL import UnidentifiedImageError
count = 0
path = Path("/content/drive/MyDrive/archive/Aquarium Combined").rglob("*.jpg")
for img_p in path:
try:
img = PIL.Image.open(img_p)
except PIL.UnidentifiedImageError:
print(img_p)
count +=1
print(count)
It has given me a count of 651 images, but my dataset has 662 images. I guess PIL doesn't know how to decode it or I don't know what the problem is. I will attach a sample image file name
/content/drive/MyDrive/archive/Aquarium Combined/test/IMG_2301_jpeg_jpg.rf.2c19ae5efbd1f8611b5578125f001695.jpg
Full traceback:
UnidentifiedImageError Traceback (most recent call last)
<ipython-input-31-2785d562a97e> in <module>()
4 sample[1]['boxes'][:, [1, 0, 3, 2]],
5 [classes[i] for i in sample[1]['labels']],
----> 6 width=4).permute(1, 2, 0)
7 )
3 frames
/usr/local/lib/python3.7/dist-packages/PIL/Image.py in open(fp, mode)
2894 if mode == "P":
2895 from . import ImagePalette
-> 2896
2897 im.palette = ImagePalette.ImagePalette("RGB", im.im.getpalette("RGB"))
2898 im.readonly = 1
UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd42c66d3b0>
Also I am providing the class functions"
class AquariumDetection(datasets.VisionDataset):
def __init__(
self,
root: str,
split = "train",
transform= None,
target_transform = None,
transforms = None,
) -> None:
super().__init__(root, transforms, transform, target_transform)
self.split = split
self.coco = COCO(os.path.join(root, split, "_annotations.coco.json"))
self.ids = list(sorted(self.coco.imgs.keys()))
self.ids = [id for id in self.ids if (len(self._load_target(id)) > 0)]
def _load_image(self, id: int) -> Image.Image:
path = self.coco.loadImgs(id)[0]["file_name"]
image = cv2.imread(os.path.join(self.root, self.split, path))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return image
def _load_target(self, id: int):
return self.coco.loadAnns(self.coco.getAnnIds(id))
def __getitem__(self, index: int):
id = self.ids[index]
image = self._load_image(id)
target = copy.deepcopy(self._load_target(id))
boxes = [t['bbox'] + [t['category_id']] for t in target]
if self.transforms is not None:
transformed = self.transforms(image=image, bboxes=boxes)
image = transformed['image']
boxes = transformed['bboxes']
new_boxes = []
for box in boxes:
xmin = box[0]
ymin = box[1]
xmax = xmin + box[2]
ymax = ymin + box[3]
new_boxes.append([ymin, xmin, ymax, xmax])
boxes = torch.tensor(new_boxes, dtype=torch.float32)
_, h, w = image.shape
targ = {}
targ["boxes"] = boxes
targ["labels"] = torch.tensor([t["category_id"] for t in target], dtype=torch.int64)
targ["image_id"] = torch.tensor([t["image_id"] for t in target])
targ["area"] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
targ["iscrowd"] = torch.tensor([t["iscrowd"] for t in target], dtype=torch.int64)
targ["img_scale"] = torch.tensor([1.0])
targ['img_size'] = (h, w)
image = image.div(255)
normalize = T.Compose([T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
return normalize(image), targ, index
def __len__(self) -> int:
return len(self.ids)

Dimension error in neural network model for classification

Below is the code for Hierarchical Attention Networks, taken from https://github.com/arunarn2/HierarchicalAttentionNetworks. The only difference in the code on the link and mine is that I have 3 classes for classification, whereas they are using 2
maxlen = 100
max_sentences = 15
max_words = 20000
embedding_dim = 100
validation_split = 0.2
#class defining the custom attention layer
class HierarchicalAttentionNetwork(Layer):
def __init__(self, attention_dim):
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = attention_dim
super(HierarchicalAttentionNetwork, self).__init__()
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
self.b = K.variable(self.init((self.attention_dim,)))
self.u = K.variable(self.init((self.attention_dim, 1)))
self.trainable_weightss = [self.W, self.b, self.u]
super(HierarchicalAttentionNetwork, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
# size of x :[batch_size, sel_len, attention_dim]
# size of u :[batch_size, attention_dim]
# uit = tanh(xW+b)
uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
ait = K.exp(K.squeeze(K.dot(uit, self.u), -1))
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting
ait *= K.cast(mask, K.floatx())
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
weighted_input = x * K.expand_dims(ait)
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return input_shape[0], input_shape[-1]
# building Hierachical Attention network
embedding_matrix = np.random.random((len(word_index) + 1, embedding_dim))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
# words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector
embedding_layer = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix],
input_length=maxlen, trainable=True, mask_zero=True)
sentence_input = Input(shape=(maxlen,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)
lstm_word = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences)
attn_word = HierarchicalAttentionNetwork(100)(lstm_word)
sentenceEncoder = Model(sentence_input, attn_word)
review_input = Input(shape=(max_sentences, maxlen), dtype='int32')
review_encoder = TimeDistributed(sentenceEncoder)(review_input)
lstm_sentence = Bidirectional(GRU(100, return_sequences=True))(review_encoder)
attn_sentence = HierarchicalAttentionNetwork(100)(lstm_sentence)
preds = Dense(3, activation='softmax')(attn_sentence)
model = Model(review_input, preds)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
print("model fitting - Hierachical attention network")
Following is the error I get. Please help me understand what the error means and how I can possibly resolve it.

Struggling with Normalization values when training model

I am trying to train an adversarial patch located at the bottom left corner of the image to cause a misclassification. Currently, I am using these parameters to normalize the CIFAR10 dataset.
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.201))]
This would result in the images having a maximum and minimum value of around 2.55 and -2.55 respectively. However, I'm not sure how to work with this range when training my patch. I struggle between converting the patch from a range of (0,1) to (-2.55,2.55). Any help is appreciated!
My code for training is below: (I don't think its training properly for now)
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt
import numpy as np
from torch.autograd import Variable
import torchattacks
import random
import torch.nn.functional as F
dictionary ={
'0':'airplane',
'1':'automobile',
'2':'bird',
'3':'cat',
'4':'deer',
'5':'dog',
'6':'frog',
'7':'horse',
'8':'ship',
'9':'truck',
}
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.201))])
#transform1 = transforms.Compose([transforms.ToTensor()])
normalize = transforms.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.201))
mean =(0.4914,0.4822,0.4465)
std =(0.2023,0.1994,0.201)
inv_normalize = transforms.Normalize(
mean=[-0.4914/0.2023, -0.4822/0.1994, -0.4465/0.201],
std=[1/0.2023, 1/0.1994, 1/0.201])
batch_size = 1
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=False, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=True, num_workers=2)
model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_resnet20", pretrained=True)
model = model.cuda()
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
patch = np.random.rand(3,32,32)
model.eval()
def mask_generation(mask_type='rectangle', patch = patch, image_size=(3, 7, 7)):
applied_patch = np.zeros(image_size) #0,1
#patch = torch.tensor(patch)
#padding = (3,3,3,3)
#patch = F.pad(patch, padding)
if mask_type == 'rectangle':
rotation_angle = 0
for i in range(patch.shape[0]):
patch[i] = np.rot90(patch[i], rotation_angle)
x_location , y_location = 25,0
for i in range(patch.shape[0]):
applied_patch[:, x_location:x_location + patch.shape[1], y_location:y_location + patch.shape[2]] = patch
mask = applied_patch.copy()
mask[mask != 0] = 1.0
return patch , applied_patch, mask, x_location, y_location , rotation_angle
def patch_attack(image, applied_patch, mask, target, probability_threshold, model, lr, max_iteration):
applied_patch = torch.from_numpy(applied_patch)
mask = torch.from_numpy(mask)
image = inv_normalize(image)
target_probability, count = 0,0
perturbated_image = torch.mul(mask.type(torch.FloatTensor), applied_patch.type(torch.FloatTensor)) + torch.mul((1 - mask.type(torch.FloatTensor)), image.type(torch.FloatTensor))
perturbated_image = normalize(perturbated_image)
while target_probability < probability_threshold and count < max_iteration:
count += 1
# Optimize the patch
perturbated_image = Variable(perturbated_image.data, requires_grad=True)
per_image = perturbated_image.cuda()
output = model(per_image)
target_log_softmax = torch.nn.functional.log_softmax(output, dim=1)[0][target]
target_log_softmax.backward()
patch_grad = perturbated_image.grad.clone().cpu()
applied_patch = (lr * patch_grad) + applied_patch.type(torch.FloatTensor)
applied_patch = torch.clamp(applied_patch,0,1)
perturbated_image.grad.data.zero_()
# Test the patch
perturbated_image = torch.mul(mask.type(torch.FloatTensor), applied_patch.type(torch.FloatTensor)) + torch.mul((1-mask.type(torch.FloatTensor)), image.type(torch.FloatTensor))
perturbated_image = normalize(perturbated_image)
perturbated_image = perturbated_image.cuda()
output = model(perturbated_image)
target_probability = torch.nn.functional.softmax(output, dim=1).data[0][target]
perturbated_image = perturbated_image.detach().cpu().numpy()
applied_patch = applied_patch.cpu().numpy()
return perturbated_image, applied_patch
def test_patch(patch_type, target, patch, test_loader, model):
test_total, test_actual_total, test_success = 0, 0, 0
for (image, label) in test_loader:
test_total += label.shape[0]
assert image.shape[0] == 1, 'Only one picture should be loaded each time.'
image = image.cuda() #-3,3
label = label.cuda()
output = model(image)
_, predicted = torch.max(output.data, 1)
if predicted[0] != label and predicted[0].data.cpu().numpy() != target:
test_actual_total += 1
patch ,applied_patch, mask, x_location, y_location = mask_generation('rectangle', patch, (3, 32, 32))
applied_patch = torch.from_numpy(applied_patch)
mask = torch.from_numpy(mask)
mask = normalize(mask)
applied_patch = normalize(applied_patch)
perturbated_image = torch.mul(mask.type(torch.FloatTensor), applied_patch.type(torch.FloatTensor)) + torch.mul((1 - mask.type(torch.FloatTensor)), image.type(torch.FloatTensor))
perturbated_image = perturbated_image.cuda() #-3,3
output = model(perturbated_image)
_, predicted = torch.max(output.data, 1)
if predicted[0].data.cpu().numpy() == target:
test_success += 1
return test_success / test_actual_total
#training parameters
epochs = 1
target = 0
probability_threshold = 0.99
lr = 1/255
max_iteration = 1
runs = 0
for epoch in range(epochs):
train_total, train_actual_total, train_success = 0, 0, 0
for (image, label) in trainloader:
runs+=1
assert image.shape[0] == 1
image = image.cuda()
label = label.cuda()
train_total += label.shape[0]
output = model(image)
_, predicted = torch.max(output.data, 1)
if predicted[0] != label or predicted[0].data.cpu().numpy() != target:
train_actual_total += 1
patch , applied_patch, mask, x_location, y_location ,rotation_angle = mask_generation('rectangle', patch, (3, 32, 32))
perturbated_image, applied_patch = patch_attack(image, applied_patch, mask, target, probability_threshold, model, lr,max_iteration)
perturbated_image = torch.from_numpy(perturbated_image).cuda()
output = model(perturbated_image)
_, predicted = torch.max(output.data, 1)
if predicted[0].data.cpu().numpy() == target:
train_success += 1
patch = applied_patch[0][:, x_location:x_location + patch.shape[1], y_location:y_location + patch.shape[2]]
patch = np.array(patch)
To convert a number x in the range [0,1] to the range [-2.55,2.55]:
Multiply by size of final range / size of original range or in this case 5.1/1.0.
Add min of final range - min of starting range to the result, so in this case -2.55+0 = 0.

darknet_images.py doesn't detect any objects. Darknet YOLOv4

So I'm running the default darknet_images.py script in pycharm (the one from Alexey's github repo) and when I give the img path, the img shows but the bounding boxes don't. I tried to solve the problem but couldn't find a solution. I found that my predictions variable is empty(in the main() function:
image, detections = image_detection(image_name, network, class_names, class_colors, args.thresh )
Just in case I am missing something, I'll print here the darknet.py code and darknet_images.py code.
darknet.py:
#!python3
"""
Python 3 wrapper for identifying objects in images
Requires DLL compilation
Both the GPU and no-GPU version should be compiled; the no-GPU version should be renamed "yolo_cpp_dll_nogpu.dll".
On a GPU system, you can force CPU evaluation by any of:
- Set global variable DARKNET_FORCE_CPU to True
- Set environment variable CUDA_VISIBLE_DEVICES to -1
- Set environment variable "FORCE_CPU" to "true"
- Set environment variable "DARKNET_PATH" to path darknet lib .so (for Linux)
Directly viewing or returning bounding-boxed images requires scikit-image to be installed (`pip install scikit-image`)
Original *nix 2.7: https://github.com/pjreddie/darknet/blob/0f110834f4e18b30d5f101bf8f1724c34b7b83db/python/darknet.py
Windows Python 2.7 version: https://github.com/AlexeyAB/darknet/blob/fc496d52bf22a0bb257300d3c79be9cd80e722cb/build/darknet/x64/darknet.py
#author: Philip Kahn
#date: 20180503
"""
from ctypes import *
import math
import random
import os
class BOX(Structure):
_fields_ = [("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float)]
class DETECTION(Structure):
_fields_ = [("bbox", BOX),
("classes", c_int),
("prob", POINTER(c_float)),
("mask", POINTER(c_float)),
("objectness", c_float),
("sort_class", c_int),
("uc", POINTER(c_float)),
("points", c_int),
("embeddings", POINTER(c_float)),
("embedding_size", c_int),
("sim", c_float),
("track_id", c_int)]
class DETNUMPAIR(Structure):
_fields_ = [("num", c_int),
("dets", POINTER(DETECTION))]
class IMAGE(Structure):
_fields_ = [("w", c_int),
("h", c_int),
("c", c_int),
("data", POINTER(c_float))]
class METADATA(Structure):
_fields_ = [("classes", c_int),
("names", POINTER(c_char_p))]
def network_width(net):
return lib.network_width(net)
def network_height(net):
return lib.network_height(net)
def bbox2points(bbox):
"""
From bounding box yolo format
to corner points cv2 rectangle
"""
x, y, w, h = bbox
xmin = int(round(x - (w / 2)))
xmax = int(round(x + (w / 2)))
ymin = int(round(y - (h / 2)))
ymax = int(round(y + (h / 2)))
return xmin, ymin, xmax, ymax
def class_colors(names):
"""
Create a dict with one random BGR color for each
class name
"""
return {name: (
random.randint(0, 255),
random.randint(0, 255),
random.randint(0, 255)) for name in names}
def load_network(config_file, data_file, weights, batch_size=1):
"""
load model description and weights from config files
args:
config_file (str): path to .cfg model file
data_file (str): path to .data model file
weights (str): path to weights
returns:
network: trained model
class_names
class_colors
"""
network = load_net_custom(
config_file.encode("ascii"),
weights.encode("ascii"), 0, batch_size)
metadata = load_meta(data_file.encode("ascii"))
class_names = [metadata.names[i].decode("ascii") for i in range(metadata.classes)]
colors = class_colors(class_names)
return network, class_names, colors
def print_detections(detections, coordinates=False):
print("\nObjects:")
for label, confidence, bbox in detections:
x, y, w, h = bbox
if coordinates:
print("{}: {}% (left_x: {:.0f} top_y: {:.0f} width: {:.0f} height: {:.0f})".format(label, confidence, x, y, w, h))
else:
print("{}: {}%".format(label, confidence))
def draw_boxes(detections, image, colors):
import cv2
for label, confidence, bbox in detections:
left, top, right, bottom = bbox2points(bbox)
cv2.rectangle(image, (left, top), (right, bottom), colors[label], 1)
cv2.putText(image, "{} [{:.2f}]".format(label, float(confidence)),
(left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
colors[label], 2)
return image
def decode_detection(detections):
decoded = []
for label, confidence, bbox in detections:
confidence = str(round(confidence * 100, 2))
decoded.append((str(label), confidence, bbox))
return decoded
def remove_negatives(detections, class_names, num):
"""
Remove all classes with 0% confidence within the detection
"""
predictions = []
for j in range(num):
for idx, name in enumerate(class_names):
if detections[j].prob[idx] > 0:
bbox = detections[j].bbox
bbox = (bbox.x, bbox.y, bbox.w, bbox.h)
predictions.append((name, detections[j].prob[idx], (bbox)))
return predictions
def detect_image(network, class_names, image, thresh=.5, hier_thresh=.5, nms=.45):
"""
Returns a list with highest confidence class and their bbox
"""
pnum = pointer(c_int(0))
predict_image(network, image)
detections = get_network_boxes(network, image.w, image.h,
thresh, hier_thresh, None, 0, pnum, 0)
num = pnum[0]
if nms:
do_nms_sort(detections, num, len(class_names), nms)
predictions = remove_negatives(detections, class_names, num)
predictions = decode_detection(predictions)
free_detections(detections, num)
return sorted(predictions, key=lambda x: x[1])
# lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
# lib = CDLL("libdarknet.so", RTLD_GLOBAL)
hasGPU = True
if os.name == "nt":
cwd = os.path.dirname(__file__)
os.environ['PATH'] = cwd + ';' + os.environ['PATH']
winGPUdll = os.path.join(cwd, "yolo_cpp_dll.dll")
winNoGPUdll = os.path.join(cwd, "yolo_cpp_dll_nogpu.dll")
envKeys = list()
for k, v in os.environ.items():
envKeys.append(k)
try:
try:
tmp = os.environ["FORCE_CPU"].lower()
if tmp in ["1", "true", "yes", "on"]:
raise ValueError("ForceCPU")
else:
print("Flag value {} not forcing CPU mode".format(tmp))
except KeyError:
# We never set the flag
if 'CUDA_VISIBLE_DEVICES' in envKeys:
if int(os.environ['CUDA_VISIBLE_DEVICES']) < 0:
raise ValueError("ForceCPU")
try:
global DARKNET_FORCE_CPU
if DARKNET_FORCE_CPU:
raise ValueError("ForceCPU")
except NameError as cpu_error:
print(cpu_error)
if not os.path.exists(winGPUdll):
raise ValueError("NoDLL")
lib = CDLL(winGPUdll, RTLD_GLOBAL)
except (KeyError, ValueError):
hasGPU = False
if os.path.exists(winNoGPUdll):
lib = CDLL(winNoGPUdll, RTLD_GLOBAL)
print("Notice: CPU-only mode")
else:
# Try the other way, in case no_gpu was compile but not renamed
lib = CDLL(winGPUdll, RTLD_GLOBAL)
print("Environment variables indicated a CPU run, but we didn't find {}. Trying a GPU run anyway.".format(winNoGPUdll))
else:
lib = CDLL(os.path.join(
os.environ.get('DARKNET_PATH', './'),
"libdarknet.so"), RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int
copy_image_from_bytes = lib.copy_image_from_bytes
copy_image_from_bytes.argtypes = [IMAGE,c_char_p]
predict = lib.network_predict_ptr
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)
if hasGPU:
set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]
init_cpu = lib.init_cpu
make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE
get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int), c_int]
get_network_boxes.restype = POINTER(DETECTION)
make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)
free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]
free_batch_detections = lib.free_batch_detections
free_batch_detections.argtypes = [POINTER(DETNUMPAIR), c_int]
free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]
network_predict = lib.network_predict_ptr
network_predict.argtypes = [c_void_p, POINTER(c_float)]
reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]
load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p
load_net_custom = lib.load_network_custom
load_net_custom.argtypes = [c_char_p, c_char_p, c_int, c_int]
load_net_custom.restype = c_void_p
free_network_ptr = lib.free_network_ptr
free_network_ptr.argtypes = [c_void_p]
free_network_ptr.restype = c_void_p
do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
free_image = lib.free_image
free_image.argtypes = [IMAGE]
letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE
load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE
rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]
predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)
predict_image_letterbox = lib.network_predict_image_letterbox
predict_image_letterbox.argtypes = [c_void_p, IMAGE]
predict_image_letterbox.restype = POINTER(c_float)
network_predict_batch = lib.network_predict_batch
network_predict_batch.argtypes = [c_void_p, IMAGE, c_int, c_int, c_int,
c_float, c_float, POINTER(c_int), c_int, c_int]
network_predict_batch.restype = POINTER(DETNUMPAIR)
darknet_images.py:
import argparse
import os
import glob
import random
import darknet
import time
import cv2
import numpy as np
import darknet
def parser():
parser = argparse.ArgumentParser(description="YOLO Object Detection")
parser.add_argument("--input", type=str, default="",
help="image source. It can be a single image, a"
"txt with paths to them, or a folder. Image valid"
" formats are jpg, jpeg or png."
"If no input is given, ")
parser.add_argument("--batch_size", default=1, type=int,
help="number of images to be processed at the same time")
parser.add_argument("--weights", default="yolov4.weights",
help="yolo weights path")
parser.add_argument("--dont_show", action='store_true',
help="windown inference display. For headless systems")
parser.add_argument("--ext_output", action='store_true',
help="display bbox coordinates of detected objects")
parser.add_argument("--save_labels", action='store_true',
help="save detections bbox for each image in yolo format")
parser.add_argument("--config_file", default="./cfg/yolov4.cfg",
help="path to config file")
parser.add_argument("--data_file", default="./cfg/coco.data",
help="path to data file")
parser.add_argument("--thresh", type=float, default=.25,
help="remove detections with lower confidence")
return parser.parse_args()
def check_arguments_errors(args):
assert 0 < args.thresh < 1, "Threshold should be a float between zero and one (non-inclusive)"
if not os.path.exists(args.config_file):
raise(ValueError("Invalid config path {}".format(os.path.abspath(args.config_file))))
if not os.path.exists(args.weights):
raise(ValueError("Invalid weight path {}".format(os.path.abspath(args.weights))))
if not os.path.exists(args.data_file):
raise(ValueError("Invalid data file path {}".format(os.path.abspath(args.data_file))))
if args.input and not os.path.exists(args.input):
raise(ValueError("Invalid image path {}".format(os.path.abspath(args.input))))
def check_batch_shape(images, batch_size):
"""
Image sizes should be the same width and height
"""
shapes = [image.shape for image in images]
if len(set(shapes)) > 1:
raise ValueError("Images don't have same shape")
if len(shapes) > batch_size:
raise ValueError("Batch size higher than number of images")
return shapes[0]
def load_images(images_path):
"""
If image path is given, return it directly
For txt file, read it and return each line as image path
In other case, it's a folder, return a list with names of each
jpg, jpeg and png file
"""
input_path_extension = images_path.split('.')[-1]
if input_path_extension in ['jpg', 'jpeg', 'png']:
return [images_path]
elif input_path_extension == "txt":
with open(images_path, "r") as f:
return f.read().splitlines()
else:
return glob.glob(
os.path.join(images_path, "*.jpg")) + \
glob.glob(os.path.join(images_path, "*.png")) + \
glob.glob(os.path.join(images_path, "*.jpeg"))
def prepare_batch(images, network, channels=3):
width = darknet.network_width(network)
height = darknet.network_height(network)
darknet_images = []
for image in images:
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_resized = cv2.resize(image_rgb, (width, height),
interpolation=cv2.INTER_LINEAR)
custom_image = image_resized.transpose(2, 0, 1)
darknet_images.append(custom_image)
batch_array = np.concatenate(darknet_images, axis=0)
batch_array = np.ascontiguousarray(batch_array.flat, dtype=np.float32)/255.0
darknet_images = batch_array.ctypes.data_as(darknet.POINTER(darknet.c_float))
return darknet.IMAGE(width, height, channels, darknet_images)
def image_detection(image_path, network, class_names, class_colors, thresh):
# Darknet doesn't accept numpy images.
# Create one with image we reuse for each detect
width = darknet.network_width(network)
height = darknet.network_height(network)
darknet_image = darknet.make_image(width, height, 3)
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_resized = cv2.resize(image_rgb, (width, height),
interpolation=cv2.INTER_LINEAR)
darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes())
detections = darknet.detect_image(network, class_names, darknet_image, thresh=thresh)
darknet.free_image(darknet_image)
image = darknet.draw_boxes(detections, image_resized, class_colors)
return cv2.cvtColor(image, cv2.COLOR_BGR2RGB), detections
def batch_detection(network, images, class_names, class_colors,
thresh=0.25, hier_thresh=.5, nms=.45, batch_size=4):
image_height, image_width, _ = check_batch_shape(images, batch_size)
darknet_images = prepare_batch(images, network)
batch_detections = darknet.network_predict_batch(network, darknet_images, batch_size, image_width,
image_height, thresh, hier_thresh, None, 0, 0)
batch_predictions = []
for idx in range(batch_size):
num = batch_detections[idx].num
detections = batch_detections[idx].dets
if nms:
darknet.do_nms_obj(detections, num, len(class_names), nms)
predictions = darknet.remove_negatives(detections, class_names, num)
images[idx] = darknet.draw_boxes(predictions, images[idx], class_colors)
batch_predictions.append(predictions)
darknet.free_batch_detections(batch_detections, batch_size)
return images, batch_predictions
def image_classification(image, network, class_names):
width = darknet.network_width(network)
height = darknet.network_height(network)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_resized = cv2.resize(image_rgb, (width, height),
interpolation=cv2.INTER_LINEAR)
darknet_image = darknet.make_image(width, height, 3)
darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes())
detections = darknet.predict_image(network, darknet_image)
predictions = [(name, detections[idx]) for idx, name in enumerate(class_names)]
darknet.free_image(darknet_image)
return sorted(predictions, key=lambda x: -x[1])
def convert2relative(image, bbox):
"""
YOLO format use relative coordinates for annotation
"""
x, y, w, h = bbox
height, width, _ = image.shape
return x/width, y/height, w/width, h/height
def save_annotations(name, image, detections, class_names):
"""
Files saved with image_name.txt and relative coordinates
"""
file_name = os.path.splitext(name)[0] + ".txt"
with open(file_name, "w") as f:
for label, confidence, bbox in detections:
x, y, w, h = convert2relative(image, bbox)
label = class_names.index(label)
f.write("{} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}\n".format(label, x, y, w, h, float(confidence)))
def batch_detection_example():
args = parser()
check_arguments_errors(args)
batch_size = 3
random.seed(3) # deterministic bbox colors
network, class_names, class_colors = darknet.load_network(
args.config_file,
args.data_file,
args.weights,
batch_size=batch_size
)
image_names = ['data/horses.jpg', 'data/horses.jpg', 'data/eagle.jpg']
images = [cv2.imread(image) for image in image_names]
images, detections, = batch_detection(network, images, class_names,
class_colors, batch_size=batch_size)
for name, image in zip(image_names, images):
cv2.imwrite(name.replace("data/", ""), image)
print(detections)
def main():
args = parser()
check_arguments_errors(args)
random.seed(3) # deterministic bbox colors
network, class_names, class_colors = darknet.load_network(
args.config_file,
args.data_file,
args.weights,
batch_size=args.batch_size
)
images = load_images(args.input)
index = 0
while True:
# loop asking for new image paths if no list is given
if args.input:
if index >= len(images):
break
image_name = images[index]
else:
image_name = input("Enter Image Path: ")
prev_time = time.time()
image, detections = image_detection(
image_name, network, class_names, class_colors, args.thresh
)
if args.save_labels:
save_annotations(image_name, image, detections, class_names)
darknet.print_detections(detections, args.ext_output)
fps = int(1/(time.time() - prev_time))
print("FPS: {}".format(fps))
if not args.dont_show:
cv2.imshow('Inference', image)
if cv2.waitKey() & 0xFF == ord('q'):
break
index += 1
if __name__ == "__main__":
# unconmment next line for an example of batch processing
# batch_detection_example()
main()

Resources