Related
I am trying to create strips from https://physionet.org/content/mitdb/1.0.0/ MIT BIH Strip data. Some how this strip logics seems to have issue. Anyone having experience with MIT BIH data can help me to verify the logic
def strip_validate(strip, pre_aux):
sym_list = list(set(strip['symbol']))
# print(sym_list)
aux_note_list = list(set(strip['aux_note']))
aux_note_list = list(filter(None,aux_note_list))
curr_aux = pre_aux # ''
if len(aux_note_list) > 0:
# print("start")
if 'AFIB' in aux_note_list:
curr_aux = 'AFIB'
# print("curr_aux_previous_1 = ", curr_aux)
elif 'AFL' in aux_note_list:
curr_aux = 'AFL'
# print("curr_aux_previous_2 = ", curr_aux)
elif 'N' in aux_note_list:
curr_aux = 'N'
# print("curr_aux_previous_3 = ", curr_aux)
elif len(aux_note_list) == 1:
curr_aux = aux_note_list[0]
else:
curr_aux = pre_aux # replace " " with curr_aux
if curr_aux != 'AFIB' and curr_aux != 'AFL' :
other_symbols = pd.Series(sym_list)
flag = sum(other_symbols!='N')
if flag > 0:
other_symbols = other_symbols[other_symbols!='N']
other_symbols = other_symbols[other_symbols!='+']
if len(other_symbols)>0:
curr_aux = curr_aux ### Issue curr_aux = other_symbols.values[0], change: (= curr_aux)
else:
curr_aux = curr_aux ### Issue curr_aux = other_symbols.values[0], change: (= curr_aux)
if curr_aux == "AFIB":
curr_aux = curr_aux.replace("AFIB", "A" )
elif curr_aux == "AFL":
curr_aux = curr_aux.replace("AFL", "A" )
return curr_aux,flag
I have a GNN that works for regression however I have changed the nature of the task from regression to classification. I thought it would be as simple as converting the loss functions and output size but I am getting multiple errors. This is my code:
Class GNN(torch.nn.Module):
def __init__(self, gnn, n_layer, tfeature_len, dim, mlp_hidden_unit, feature_mode):
super(GNN, self).__init__()
self.gnn = gnn
self.n_layer = n_layer
self.tfeature_len = tfeature_len
self.dim = dim
self.gnn_layers = ModuleList([])
if gnn in ['gcn', 'gat', 'sage', 'tag']:
for i in range(n_layer):
if gnn == 'gcn':
self.gnn_layers.append(GraphConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim,
activation=None if i == n_layer - 1 else torch.relu))
elif gnn == 'gat':
num_heads = 16 # make sure that dim is dividable by num_heads
self.gnn_layers.append(GATConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim // num_heads,
activation=None if i == n_layer - 1 else torch.relu,
num_heads=num_heads))
elif gnn == 'sage':
agg = 'pool'
self.gnn_layers.append(SAGEConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim,
activation=None if i == n_layer - 1 else torch.relu,
aggregator_type=agg))
elif gnn == 'tag':
hops = 2
self.gnn_layers.append(TAGConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim,
activation=None if i == n_layer - 1 else torch.relu,
k=hops))
elif gnn == 'sgc':
self.gnn_layers.append(SGConv(in_feats=tfeature_len, out_feats=dim, k=n_layer))
else:
raise ValueError('unknown GNN model')
self.factor = None
self.pooling_layer = SumPooling()
self.mlp_hidden_unit = mlp_hidden_unit
self.feature_mode = feature_mode
if self.feature_mode == 'concat':
self.mlp_hidden_layer = torch.nn.Linear(2 * self.dim, self.mlp_hidden_unit)
elif self.feature_mode == 'subtract':
self.mlp_hidden_layer = torch.nn.Linear(self.dim, self.mlp_hidden_unit)
else:
raise ValueError('unknown feature mode')
self.mlp_output_layer = torch.nn.Linear(self.mlp_hidden_unit, 2)
def forward(self, graph1, graph2):
graph1_embedding = self.calculate_embedding(graph1)
graph2_embedding = self.calculate_embedding(graph2)
if self.feature_mode == 'concat':
hidden = relu(self.mlp_hidden_layer(torch.concat([graph1_embedding, graph2_embedding], dim=-1)))
elif self.feature_mode == 'subtract':
hidden = relu(self.mlp_hidden_layer(graph1_embedding - graph2_embedding))
else:
raise ValueError('unknown feature mode')
output = self.mlp_output_layer(hidden)
return output
def calculate_embedding(self, graph):
feature = graph.ndata['feature']
h = one_hot(feature, num_classes=self.tfeature_len)
h = torch.sum(h, dim=1, dtype=torch.float)
for layer in self.gnn_layers:
h = layer(graph, h)
if self.gnn == 'gat':
h = torch.reshape(h, [h.size()[0], -1])
if self.factor is None:
self.factor = math.sqrt(self.dim) / float(torch.mean(torch.linalg.norm(h, dim=1)))
h *= self.factor
graph_embedding = self.pooling_layer(graph, h)
return graph_embedding
def train(data,model,optimizer):
train_loader, val_loader, test_loader, tfeature_len = data
loss_fn = torch.nn.CrossEntropyLoss()
epoch = 23
model = model.to(device)
print('start training\n')
#evaluate(model, 'train', train_loader)
evaluate(model, 'val', val_loader)
evaluate(model, 'test', test_loader)
epoch_losses = []
for i in range(epoch):
print('epoch %d:' % i)
epoch_loss = 0
model.train()
for graph1, graph2, target in train_loader:
pred = torch.squeeze(model(graph1, graph2))
loss = loss_fn(pred, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.detach().item()
epoch_loss /= (iter + 1)
print('Epoch {}, loss {:.4f}'.format(epoch, epoch_loss))
epoch_losses.append(epoch_loss)
#evaluate(model, 'train', train_loader)
evaluate(model, 'val', val_loader)
evaluate(model, 'test', test_loader)
print()
def evaluate(model, mode, data):
pred_list = []
target_list = []
model.eval()
with torch.no_grad():
for graph1, graph2, target in data:
outputs = torch.softmax(model(graph1, graph2), 1)
_, predicted = torch.max(outputs.data, 1)
pred_list.append(predicted)
target_list.append(target)
#torch.sum(preds == targets).detach().cpu().numpy().
pred_list = torch.concat(pred_list)
target_list = torch.concat(target_list)
#print('%s Acc: %.4f' % (mode (sklearn.metrics.accuracy_score(target_list, pred_list, normalize=False)) / len(pred_list) * 10
The accuracy is commented out at the moment because that too gave me an error. My first error however was the following:
start training
epoch 0:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-84-9de6c6dbd2ee> in <module>
----> 1 train(data,model,optimizer)
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
3012 if size_average is not None or reduce is not None:
3013 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3014 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
3015
3016
RuntimeError: "nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Float'
My targets are as follows:
targets = training_data['Class'].tolist()
targets = torch.Tensor(targets)
targets = targets.to(device)
And is just a list of 1s and 0s.
I call my model as follows:
model = GNN('sage', 3, tfeature_len, 2048, 100, 'subtract')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
How I can fix this?
yolov5 is detecting perfect while I run detect.py but unfortunately with deepsort track.py is not tracking even not detecting with tracker. how to set parameter my tracker ?
yolov5:
>> python detect.py --source video.mp4 --weights best.pt
yolov5+deepsort:
>> python track.py --yolo-weights best.pt --source video.mp4 --strong-sort-weights osnet_x0_25_msmt17.pt --show-vid --imgsz 640 --hide-labels
import argparse
from email.headerregistry import ContentDispositionHeader
import os
from pkg_resources import fixup_namespace_packages
# limit the number of cpus used by high performance libraries
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
import sys
import numpy as np
from pathlib import Path
import torch
import torch.backends.cudnn as cudnn
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # yolov5 strongsort root directory
WEIGHTS = ROOT / 'weights'
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
if str(ROOT / 'yolov5') not in sys.path:
sys.path.append(str(ROOT / 'yolov5')) # add yolov5 ROOT to PATH
if str(ROOT / 'strong_sort') not in sys.path:
sys.path.append(str(ROOT / 'strong_sort')) # add strong_sort ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
import logging
from yolov5.models.common import DetectMultiBackend
from yolov5.utils.dataloaders import VID_FORMATS, LoadImages, LoadStreams
from yolov5.utils.general import (LOGGER, check_img_size, non_max_suppression, scale_coords, check_requirements, cv2,
check_imshow, xyxy2xywh, increment_path, strip_optimizer, colorstr, print_args, check_file)
from yolov5.utils.torch_utils import select_device, time_sync
from yolov5.utils.plots import Annotator, colors, save_one_box
from strong_sort.utils.parser import get_config
from strong_sort.strong_sort import StrongSORT
# remove duplicated stream handler to avoid duplicated logging
logging.getLogger().removeHandler(logging.getLogger().handlers[0])
list_ball_cord = list()
#torch.no_grad()
def run(
source='0',
yolo_weights=WEIGHTS / 'yolov5m.pt', # model.pt path(s),
strong_sort_weights=WEIGHTS / 'osnet_x0_25_msmt17.pt', # model.pt path,
config_strongsort=ROOT / 'strong_sort/configs/strong_sort.yaml',
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
show_vid=False, # show results
save_txt=False, # save results to *.txt
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
save_vid=False, # save confidences in --save-txt labels
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
project=ROOT / 'runs/track', # save results to project/name
name='exp', # save results to project/name
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
hide_class=False, # hide IDs
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
):
source = str(source)
save_img = not nosave and not source.endswith('.txt') # save inference images
is_file = Path(source).suffix[1:] in (VID_FORMATS)
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
if is_url and is_file:
source = check_file(source) # download
# Directories
if not isinstance(yolo_weights, list): # single yolo model
exp_name = str(yolo_weights).rsplit('/', 1)[-1].split('.')[0]
elif type(yolo_weights) is list and len(yolo_weights) == 1: # single models after --yolo_weights
exp_name = yolo_weights[0].split(".")[0]
else: # multiple models after --yolo_weights
exp_name = 'ensemble'
exp_name = name if name is not None else exp_name + "_" + str(strong_sort_weights).split('/')[-1].split('.')[0]
save_dir = increment_path(Path(project) / exp_name, exist_ok=exist_ok) # increment run
(save_dir / 'tracks' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
device = select_device(device)
model = DetectMultiBackend(yolo_weights, device=device, dnn=dnn, data=None, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
if webcam:
show_vid = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
nr_sources = len(dataset)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
nr_sources = 1
vid_path, vid_writer, txt_path = [None] * nr_sources, [None] * nr_sources, [None] * nr_sources
# initialize StrongSORT
cfg = get_config()
cfg.merge_from_file(opt.config_strongsort)
# Create as many strong sort instances as there are video sources
strongsort_list = []
for i in range(nr_sources):
strongsort_list.append(
StrongSORT(
strong_sort_weights,
device,
max_dist=cfg.STRONGSORT.MAX_DIST,
max_iou_distance=cfg.STRONGSORT.MAX_IOU_DISTANCE,
max_age=cfg.STRONGSORT.MAX_AGE,
n_init=cfg.STRONGSORT.N_INIT,
nn_budget=cfg.STRONGSORT.NN_BUDGET,
mc_lambda=cfg.STRONGSORT.MC_LAMBDA,
ema_alpha=cfg.STRONGSORT.EMA_ALPHA,
)
)
outputs = [None] * nr_sources
# Run tracking
model.warmup(imgsz=(1 if pt else nr_sources, 3, *imgsz)) # warmup
dt, seen = [0.0, 0.0, 0.0, 0.0], 0
curr_frames, prev_frames = [None] * nr_sources, [None] * nr_sources
for frame_idx, (path, im, im0s, vid_cap, s) in enumerate(dataset):
t1 = time_sync()
im = torch.from_numpy(im).to(device)
im = im.half() if half else im.float() # uint8 to fp16/32
im /= 255.0 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
t2 = time_sync()
dt[0] += t2 - t1
# Inference
visualize = increment_path(save_dir / Path(path[0]).stem, mkdir=True) if opt.visualize else False
pred = model(im, augment=opt.augment, visualize=visualize)
t3 = time_sync()
dt[1] += t3 - t2
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, opt.classes, opt.agnostic_nms, max_det=opt.max_det)
dt[2] += time_sync() - t3
# Process detections
for i, det in enumerate(pred): # detections per image
seen += 1
if webcam: # nr_sources >= 1
p, im0, _ = path[i], im0s[i].copy(), dataset.count
p = Path(p) # to Path
s += f'{i}: '
txt_file_name = p.name
save_path = str(save_dir / p.name) # im.jpg, vid.mp4, ...
else:
p, im0, _ = path, im0s.copy(), getattr(dataset, 'frame', 0)
p = Path(p) # to Path
# video
### =============================================================================================
### ROI Rectangle ( I will use cv2.selectROI later )
# left_roi = [(381,331), (647,336), (647,497), (334,492)]
# right_roi = [(648,335), (914,338), (958,498), (646,495)]
# table_roi = [(381,331), (914,338), (958,498), (334,492)]
# table_roi = [(0,0), (1280,0), (1280,720), (0,720)]
table_roi = [(381,331), (1280,0), (1280,720), (0,720)]
cv2.polylines(im0, [np.array(table_roi, np.int32)],True, (0,0,255),2 )
# cv2.polylines(im0, [np.array(right_roi, np.int32)],True, (0,0,255),2 )
### =============================================================================================
if source.endswith(VID_FORMATS):
txt_file_name = p.stem
save_path = str(save_dir / p.name) # im.jpg, vid.mp4, ...
# folder with imgs
else:
txt_file_name = p.parent.name # get folder name containing current img
save_path = str(save_dir / p.parent.name) # im.jpg, vid.mp4, ...
curr_frames[i] = im0
txt_path = str(save_dir / 'tracks' / txt_file_name) # im.txt
s += '%gx%g ' % im.shape[2:] # print string
imc = im0.copy() if save_crop else im0 # for save_crop
annotator = Annotator(im0, line_width=2, pil=not ascii)
if cfg.STRONGSORT.ECC: # camera motion compensation
strongsort_list[i].tracker.camera_update(prev_frames[i], curr_frames[i])
if det is not None and len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
xywhs = xyxy2xywh(det[:, 0:4])
confs = det[:, 4]
clss = det[:, 5]
# pass detections to strongsort
t4 = time_sync()
outputs[i] = strongsort_list[i].update(xywhs.cpu(), confs.cpu(), clss.cpu(), im0)
t5 = time_sync()
dt[3] += t5 - t4
# draw boxes for visualization
if len(outputs[i]) > 0:
for j, (output, conf) in enumerate(zip(outputs[i], confs)):
### ========================================================================================================================================================
### Results ROI
### ========================================================================================================================================================
# if output[5] == 0.0:
# bboxes = output[0:4]
# id = output[4]
# cls = output[5]
# center = int((((output[0]) + (output[2]))/2) , (((output[1]) + (output[3]))/2))
# print("center",center)
"""
- create rectangle left/right
- display ball cordinates
- intersect ball & rectangle left/right
"""
## ball cord..
if output[5] == 0.0:
# print("bbox----------", output[0:4])
print("class----------", output[5])
# print("id -------------", output[4])
print("=============================================")
# display ball rectangle
## cv2.rectangle(im0,(int(output[0]),int(output[1])),(int(output[2]),int(output[3])),(0,255,0),2 )
ball_box = output[0:4]
list_ball_cord.append(ball_box)
bbox_left = output[0]
bbox_top = output[1]
bbox_w = output[2] - output[0]
bbox_h = output[3] - output[1]
# print("bbox_left--------", bbox_left)
# print("bbox_top--------", bbox_top)
# print("bbox_w--------", bbox_w)
# print("bbox_h--------", bbox_h)
## ball center point
ball_cx = int(bbox_left + bbox_w /2)
ball_cy = int(bbox_top + bbox_h /2)
# cv2.circle(im0, (ball_cx,ball_cy),5, (0,0,255),-1)
# # ball detect only on table >> return three output +1-inside the table -1-outside the table 0-on the boundry
ball_on_table_res = cv2.pointPolygonTest(np.array(table_roi,np.int32), (int(ball_cx),int(ball_cy)), False)
if ball_on_table_res >= 0:
cv2.circle(im0, (ball_cx,ball_cy),20, (0,0,0),-1)
### ========================================================================================================================================================
bboxes = output[0:4]
id = output[4]
cls = output[5]
# print("bboxes--------", bboxes)
# print("cls-----------", cls)
if save_txt:
# to MOT format
bbox_left = output[0]
bbox_top = output[1]
bbox_w = output[2] - output[0]
bbox_h = output[3] - output[1]
# Write MOT compliant results to file
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * 10 + '\n') % (frame_idx + 1, id, bbox_left, # MOT format
bbox_top, bbox_w, bbox_h, -1, -1, -1, i))
if save_vid or save_crop or show_vid: # Add bbox to image
c = int(cls) # integer class
id = int(id) # integer id
label = None if hide_labels else (f'{id} {names[c]}' if hide_conf else \
(f'{id} {conf:.2f}' if hide_class else f'{id} {names[c]} {conf:.2f}'))
annotator.box_label(bboxes, label, color=colors(c, True))
#####################print("label---------", label)
if save_crop:
txt_file_name = txt_file_name if (isinstance(path, list) and len(path) > 1) else ''
save_one_box(bboxes, imc, file=save_dir / 'crops' / txt_file_name / names[c] / f'{id}' / f'{p.stem}.jpg', BGR=True)
fps_StrongSORT = 1 / (t5-t4)
fps_yolo = 1/ (t3-t2)
LOGGER.info(f'{s}Done. YOLO:({t3 - t2:.3f}s), StrongSORT:({t5 - t4:.3f}s), ')
print("fps_StrongSORT-----", fps_StrongSORT)
print("fps_yolo-----", fps_yolo)
else:
strongsort_list[i].increment_ages()
LOGGER.info('No detections')
# Stream results
im0 = annotator.result()
if show_vid:
# im0 = cv2.resize(im0, (640,640))
cv2.imshow(str(p), im0)
cv2.waitKey(1) # 1 millisecond
# Save results (image with detections)
if save_vid:
if vid_path[i] != save_path: # new video
vid_path[i] = save_path
if isinstance(vid_writer[i], cv2.VideoWriter):
vid_writer[i].release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer[i].write(im0)
prev_frames[i] = curr_frames[i]
print("fffffffffffffffffffffffffffffffff----------------------------------------",list_ball_cord)
# Print results
t = tuple(x / seen * 1E3 for x in dt) # speeds per image
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS, %.1fms strong sort update per image at shape {(1, 3, *imgsz)}' % t)
if save_txt or save_vid:
s = f"\n{len(list(save_dir.glob('tracks/*.txt')))} tracks saved to {save_dir / 'tracks'}" if save_txt else ''
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
if update:
strip_optimizer(yolo_weights) # update model (to fix SourceChangeWarning)
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--yolo-weights', nargs='+', type=str, default='v5best_bp.pt', help='model.pt path(s)')
parser.add_argument('--strong-sort-weights', type=str, default=WEIGHTS / 'osnet_x0_25_msmt17.pt')
parser.add_argument('--config-strongsort', type=str, default='strong_sort/configs/strong_sort.yaml')
parser.add_argument('--source', type=str, default='0', help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.5, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--show-vid', action='store_true', help='display tracking video results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--save-vid', action='store_true', help='save video tracking results')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
# class 0 is person, 1 is bycicle, 2 is car... 79 is oven
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default=ROOT / 'runs/track', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--hide-class', default=False, action='store_true', help='hide IDs')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def main(opt):
check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)
enter image description here
I am also using the same model and was facing the same issue.
Try annotating more image and increase the image size to 1024. Also make sure to use the best weights of yolov5 in yolov5+deepsort.
DISCLAIMER: I am the creator of https://github.com/mikel-brostrom/Yolov5_StrongSORT_OSNet
First of all:
Does Yolov5+StrongSORT+OSNet run correctly without you custom modifications?
Secondly:
Have you checked that you are loading the same weights for Yolov5 and Yolov5+StrongSORT+OSNet?
Moreover:
Why all the custom modifications? If you only want to track class 0. Then you can run the following command:
python track.py --source 0 --yolo-weights best.pt --classes 0
I have been trying to make a Random Forest model for Sign Language classification. The Data Set is balanced and the accuracy is 98%, but it always predicts the same class.
This is how I get and process the data:
train_dir = "../input/asl-alphabet/asl_alphabet_train/asl_alphabet_train/"
test_dir = "../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/"
train_len = 87000
image_size=50
def get_data(folder):
X = np.empty((train_len, image_size, image_size, 3), dtype = np.float32)
y = np.empty((train_len), dtype = int)
cnt = 0
for folderName in os.listdir(folder):
if not folderName.startswith('.'):
if folderName in ['A']:
label = 0
elif folderName in ['B']:
label = 1
elif folderName in ['C']:
label = 2
elif folderName in ['D']:
label = 3
elif folderName in ['E']:
label = 4
elif folderName in ['F']:
label = 5
elif folderName in ['G']:
label = 6
elif folderName in ['H']:
label = 7
elif folderName in ['I']:
label = 8
elif folderName in ['J']:
label = 9
elif folderName in ['K']:
label = 10
elif folderName in ['L']:
label = 11
elif folderName in ['M']:
label = 12
elif folderName in ['N']:
label = 13
elif folderName in ['O']:
label = 14
elif folderName in ['P']:
label = 15
elif folderName in ['Q']:
label = 16
elif folderName in ['R']:
label = 17
elif folderName in ['S']:
label = 18
elif folderName in ['T']:
label = 19
elif folderName in ['U']:
label = 20
elif folderName in ['V']:
label = 21
elif folderName in ['W']:
label = 22
elif folderName in ['X']:
label = 23
elif folderName in ['Y']:
label = 24
elif folderName in ['Z']:
label = 25
elif folderName in ['del']:
label = 26
elif folderName in ['nothing']:
label = 27
elif folderName in ['space']:
label = 28
else:
label = 29
for image_filename in tqdm(os.listdir(folder + folderName)):
img_file = cv2.imread(folder + folderName + '/' + image_filename)
if img_file is not None:
img_file = skimage.transform.resize(img_file, (image_size, image_size, 3))
img_arr = np.asarray(img_file).reshape((-1, image_size, image_size, 3))
X[cnt] = img_arr
y[cnt] = label
cnt += 1
return X,y
letters, labels= get_data(train_dir)
print("The shape of letters is : ", letters.shape)
#preprocessing
import tensorflow as tf
X_train, X_test, y_train, y_test = train_test_split(letters, labels, test_size=0.3, random_state=42, stratify=labels)
X_valid, X_train = X_train[:20000] / 255., X_train[20000:] / 255.
y_valid, y_train = y_train[:20000], y_train[20000:]
X_test = X_test / 255.
#reshape to 2d array
nsamples, nx, ny, nrgb = X_train.shape
x_train2 = X_train.reshape((nsamples,nx*ny*nrgb))
#so,eventually,model.predict() should also be a 2d input
nsamples, nx, ny, nrgb = X_test.shape
x_test2 = X_test.reshape((nsamples,nx*ny*nrgb))
This is how I made the model:
from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier(random_state=42, n_jobs=-1, max_depth=20,
n_estimators=100, oob_score=True)
model.fit(x_train2,y_train)
y_pred=model.predict(x_test2)
y_pred
#y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)
accuracy_score(y_pred,y_test)
print(classification_report(y_pred,y_test))
To test the predictions I use:
label_names = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N',
'O','P','Q','R','S','T','U','V','W','X','Y','Z','del','nothing','space']
img_path='../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/L_test.jpg'
img_arr=cv2.imread(img_path)
img_arr=cv2.resize(img_arr,(64,64))
#so,eventually,model.predict() should also be a 2d input
nx, ny, nrgb = img_arr.shape
img_arr2 = img_arr.reshape(1,(nx*ny*nrgb))
classes = ["A","B","C","D","E","F","G","H","I","J", "K", "L", "M" "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "nothing", "space"]
ans=model.predict(img_arr2)
print(label_names[ans[0]])
I have tried to implement Grid Search but I wasn't able to make it work. (I am writing it on Kaggle and it tells "Your notebook tried to allocate more memory than is available")
I am quite new to this, so I am not completely certain about anything in my code.
You are missing the preprocessing of the images between training and testing. You need to be applying the same steps on the test set so your model is getting same kind of inputs. Try the following:
label_names = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N',
'O','P','Q','R','S','T','U','V','W','X','Y','Z','del','nothing','space']
img_path='../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/L_test.jpg'
img_arr=cv2.imread(img_path)
# Preprocess test image
img_arr = skimage.transform.resize(img_arr, (50, 50, 3))
img_arr = img_arr/255.
#so,eventually,model.predict() should also be a 2d input
nx, ny, nrgb = img_arr.shape
img_arr2 = img_arr.reshape(1,(nx*ny*nrgb))
classes = ["A","B","C","D","E","F","G","H","I","J", "K", "L", "M" "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "nothing", "space"]
ans=model.predict(img_arr2)
print(label_names[ans[0]])
I'm running a python script to detect and track persons with satisfying results. The problem is that I only want to track persons and nothing else but the Caffe model tracks about 20 objects and the created rectangles for cars and other objects overlay the whole picture. Is there any chance to reduce the amount of objects detected? Removing them from the script was not successful, only the label has disappeared.
#Import the neccesary libraries
import numpy as np
import argparse
import cv2
# construct the argument parse
parser = argparse.ArgumentParser(
description='Script to run MobileNet-SSD object detection network ')
parser.add_argument("--video", help="path to video file. If empty, camera's stream will be used")
parser.add_argument("--prototxt", default="MobileNetSSD_deploy.prototxt",
help='Path to text network file: ''MobileNetSSD_deploy.prototxt for Caffe model or ')
parser.add_argument("--weights", default="MobileNetSSD_deploy.caffemodel",
help='Path to weights: ''MobileNetSSD_deploy.caffemodel for Caffe model or ')
parser.add_argument("--thr", default=0.2, type=float, help="confidence threshold to filter out weak detections")
args = parser.parse_args()
#Load the Caffe model
net = cv2.dnn.readNetFromCaffe(args.prototxt, args.weights)
# Labels of Network.
classNames = { 0: 'background',
1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat',
5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair',
10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse',
14: 'motorbike', 15: 'person', 16: 'pottedplant',
17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor' }
# Open video file or capture device.
if args.video:
cap = cv2.VideoCapture(args.video)
else:
cap = cv2.VideoCapture(0)
# cap.open("http://192.168.0.17:8081")
while True:
# Capture frame-by-frame
ret, frame = cap.read()
frame_resized = cv2.resize(frame,(300, 300)) # resize frame for prediction
# MobileNet requires fixed dimensions for input image so we have to resize to 300x300 pixels.
blob = cv2.dnn.blobFromImage(frame_resized, 0.007843, (300, 300), (127.5, 127.5, 127.5), False)
#Set to network the input blob
net.setInput(blob)
#Prediction of network
detections = net.forward()
#Size of frame resize (300x300)
cols = frame_resized.shape[1]
rows = frame_resized.shape[0]
#For get the class and location of object detected,
# There is a fix index for class, location and confidence
# value in #detections array .
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2] #Confidence of prediction
if confidence > args.thr: # Filter prediction
class_id = int(detections[0, 0, i, 1]) # Class label
# Object location
xLeftBottom = int(detections[0, 0, i, 3] * cols)
yLeftBottom = int(detections[0, 0, i, 4] * rows)
xRightTop = int(detections[0, 0, i, 5] * cols)
yRightTop = int(detections[0, 0, i, 6] * rows)
# Factor for scale to original size of frame
heightFactor = frame.shape[0]/300.0
widthFactor = frame.shape[1]/300.0
# Scale object detection to frame
xLeftBottom = int(widthFactor * xLeftBottom)
yLeftBottom = int(heightFactor * yLeftBottom)
xRightTop = int(widthFactor * xRightTop)
yRightTop = int(heightFactor * yRightTop)
# Draw location of object
cv2.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),
(0, 255, 0),2)
# Draw label and confidence of prediction in frame resized
if class_id in classNames:
label = classNames[class_id] + ": " + str(confidence)
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
yLeftBottom = max(yLeftBottom, labelSize[1])
cv2.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
(xLeftBottom + labelSize[0], yLeftBottom + baseLine),
(255, 255, 255), cv2.FILLED)
cv2.putText(frame, label, (xLeftBottom, yLeftBottom),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
print(label) #print class and confidence
cv2.namedWindow("frame", cv2.WINDOW_NORMAL)
cv2.imshow("frame", frame)
if cv2.waitKey(1) >= 0: # Break with ESC
break