Clipping rasters based on a ShapeFile using R - mapping

I am using the WorldPop Population Count for DR Congo as a GeoTIFF (.tif) file (See here). This single-band raster file has about 5.17 million grids cells each at the 30-arc second resolution for the entire country.
I would like to clip out just the two northeastern provinces ([Ituri and North-Kivu] in R and save the clipped file as a .tif file. I am able to do this using QGIS (See here) however this is way too complicated. I am looking to using GADM inside the R workspace and overlay it on top of the population count raster and clip out the two provinces.
Here is what I have for my preliminary attempt at importing the raster (.tif) file and GADM (.rds) in R.
###########################################################################
# #
# Spatial tracking of the 2018-2020 Kivu Ebola outbreak in DRC #
# #
# This source code is issued under the GNU General Public License, v3.0. #
# #
# This script is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation; either version 3.0 of the License, or #
# (at your option) any later version. #
# #
# See the GNU General Public License for more details. #
# #
# https://www.gnu.org/licenses/gpl-3.0.en.html #
###########################################################################
rm(list = ls())
#install.packages("raster", dependencies = T)
#install.packages("rgdal", dependencies = T)
#install.packages("ncdf4", dependencies = T)
#install.packages("rstudioapi", dependencies = T)
library(sp)
library(raster)
library(rgdal)
library(ncdf4)
library(rstudioapi)
# library(colorRamps)
# library(ggmap)
# library(ggplot2)
#----------------------------#
# Set your working directory #
#----------------------------#
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # RStudio IDE preferred
getwd() # Path to your working directory
#----------------------------------------------------------------#
# Source 1: WorldPop UN-Adjusted Population Count GeoTIFF raster #
#----------------------------------------------------------------#
# Downloaded from https://www.worldpop.org/geodata/summary?id=35845
DRCWorldPop <- raster("cod_ppp_2020_1km_Aggregated_UNadj.tif")
DRCWorldPop # this original raster layer has 2,261 rows and 2,289 columns = 5,175,429 grid cells
dim(DRCWorldPop); length(DRCWorldPop); extent(DRCWorldPop)
names(DRCWorldPop) <- "Susceptible"
names(DRCWorldPop); res(DRCWorldPop); projection(DRCWorldPop)
summary(getValues(DRCWorldPop))
minValue(DRCWorldPop); maxValue(DRCWorldPop)
DRCWorldPop <- replace(DRCWorldPop, is.na(DRCWorldPop), 0)
summary(getValues(DRCWorldPop))
sum(getValues(DRCWorldPop))
nlayers(DRCWorldPop)
#---------------------#
# Source 2: From GADM #
#---------------------#
#?getData
drc <- getData("GADM", level=1, country="COD")
#drc$NAME_1 # List of all provinces
drc <- drc[drc$NAME_1 %in% c("Ituri", "Nord-Kivu"), ]
r <- raster(drc, resolution = res(DRCWorldPop)[1])
values(r) <- 0
r # this raster layer has 689 rows and 487 columns
extent(r)
plot(log(DRCWorldPop), main = "2020 UN-Adjusted Population Count \n for DR Congo (log-scale)", col=topo.colors(100), legend.width=2, legend.shrink=1, legend.args=list(text='Persons', side=4, font=2, line=2.5, cex=0.8), axes=T)
lines(drc, col="black", lwd=1)
# plot(drc, col = "light yellow")
# lines(drc, col="black", lwd=2)
#--------------------------------------------#
# Merging and Cropping Source 1 and Source 2 #
#--------------------------------------------#
# ?merge
merged <- merge(DRCWorldPop, r, tolerance = 0.07)
# Task: What is the significance of tolerance?
# ?crop
cropped <- crop(merged, drc)
cropped # this raster layer has 689 rows and 487 columns
extent(cropped)
plot(log(cropped), main = "2020 UN-Adjusted Population Count \n for DR Congo (log-scale)", legend.width=2, legend.shrink=1, legend.args=list(text='Persons', side=4, font=2, line=2.5, cex=0.8), axes=T)
lines(drc, col="red", lwd=2)
# Task: How to change the plot Legend to make it more colorful?
# Task: The plot is in log-scale, how to construct a similar colorful plot in raw scale?
writeRaster(cropped, "cropped.tif", format = "GTiff", overwrite = TRUE)
croppedDRCWorldPop <- raster("cropped.tif")
croppedDRCWorldPop # this cropped raster layer has 689 rows and 487 columns
#--------------------------------#
# Aggregating the cropped raster #
#--------------------------------#
aggregationFactor <- 10 # in km
DRC_aggr_count <- aggregate(croppedDRCWorldPop, fact = c(aggregationFactor, aggregationFactor), fun = sum, na.rm = TRUE)
DRC_aggr_count # this raster layer has 69 rows and 49 columns
names(DRC_aggr_count); res(DRC_aggr_count); projection(DRC_aggr_count)
dim(DRC_aggr_count); length(DRC_aggr_count); extent(DRC_aggr_count); isLonLat(DRC_aggr_count)
summary(getValues(DRC_aggr_count))
sum(getValues(DRC_aggr_count))
nrow(DRC_aggr_count)
ncol(DRC_aggr_count)
# ?xyFromCell
#xyFromCell(DRC_aggr_count, 1:ncell(DRC_aggr_count), spatial=FALSE)
# #xy for corners of a raster:
# xyFromCell(DRC_aggr_count, c(1, ncol(DRC_aggr_count), ncell(DRC_aggr_count)-ncol(DRC_aggr_count)+1, ncell(DRC_aggr_count)))
#
# xmin(DRC_aggr_count)
# xmax(DRC_aggr_count)
# ymin(DRC_aggr_count)
# ymax(DRC_aggr_count)
# origin(DRC_aggr_count)
# https://www.nationalgeographic.org/encyclopedia/latitude/
# https://www.nationalgeographic.org/encyclopedia/longitude/
plot(log(DRC_aggr_count), main = "2020 UN-Adjusted Population Count \n for DR Congo (log-scale)", legend.width=2, legend.shrink=1, legend.args=list(text='Persons', side=4, font=2, line=2.5, cex=0.8))
lines(drc, col="red", lwd=2)
text(32,3,"aggregated", xpd = TRUE)
# Task: How to change the plot Legend to make it more colorful?
# Task: The plot is in log-scale, how to construct a similar colorful plot in raw scale?
# library(RColorBrewer)
# warna <- brewer.pal(n = 11, name = "RdYlGn")
# #warna <- rev(warna)
# plot(log(DRC_aggr_count), col=palette(warna), main = "2020 UN-Adjusted Population Count \n for DR Congo (log-scale)", legend.width=2, legend.shrink=1, legend.args=list(text='Persons', side=4, font=2, line=2.5, cex=0.8))
# lines(drc, col="red", lwd=2)
# #plot.window(xlim=extent(DRC_aggr_count)[1:2], ylim=extent(DRC_aggr_count)[3:4])
#spplot(log(DRC_aggr_count))
#-----------------------------------#
# Export cropped raster to a netCDF #
#-----------------------------------#
if (require(ncdf4)) {
#rnc <- writeRaster(DRCWorldPop, filename ='Congo_full_0000.nc', format = "CDF", varname = "Susceptible", varunit = "Persons", longname = "Susceptible", overwrite = TRUE)
rnc_aggr_tif <- writeRaster(DRC_aggr_count, filename ='cod_ppp_2020_10km_Aggregated_UNadj.tif', format = "GTiff", varname = "Susceptible", varunit = "Persons", longname = "Susceptible", overwrite = TRUE)
rnc_aggr <- writeRaster(DRC_aggr_count, filename ='Congo_0000.nc', format = "CDF", varname = "Susceptible", varunit = "Persons", longname = "Susceptible", overwrite = TRUE)
}
I would like the cell values for the neighbouring countries (Uganda, Rwanda, South Sudan, Burundi) set equal to zero. The resulting tif has 689 rows and 487 columns which is equivalent to 335,543 nonoverlapping grid cells each containing the population count. This is too big for my simulation purposes so I am aggregating the cropped raster by a factor of 10 to get a raster with 69 rows by 49 columns. Aggregation is done using the above code and finally saved as a NetCDF file and viewed using the Panoply tool. The base NetCDF file has one layer (Population Count data which we call "Susceptible.). To this NetCDF file we add additional epidemic compartments as follows.
#---------------------------------------------------------#
# Adding more epidemic compartments to an existing netCDF #
#---------------------------------------------------------#
episim <- nc_open("Congo_0000.nc", write = TRUE)
episim$dim$latitude$vals; length(episim$dim$latitude$vals) # lat is vertical axis (or)rows in our case
episim$dim$longitude$vals; length(episim$dim$longitude$vals) # lon is horizontal axis (or) columns in our case
nrows <- length(episim$dim$latitude$vals)
ncols <- length(episim$dim$longitude$vals)
# Longitude is "East - West" means columns
# Latitude is "North - South" means rows
#ncatt_get(episim, 0, attname=NA, verbose=FALSE)
ncatt_put(episim, 0, "created_by", attval = c("Ashok Krishnamurthy"), verbose=FALSE)
ncatt_put(episim, 0, "contact", attval = c("Ashok Krishnamurthy <akrishnamurthy#mtroyal.ca>"), verbose=FALSE)
ncatt_put(episim, 0, "nRows", attval = nrows, verbose=FALSE)
ncatt_put(episim, 0, "nCols", attval = ncols, verbose=FALSE)
#Upper Left Corner pair is (first row, first column) = (27.22375, 3.674584)
ncatt_put(episim, 0, "ULCornerLongitude", attval = episim$dim$longitude$vals[1], verbose=FALSE)
ncatt_put(episim, 0, "ULCornerLatitude", attval = episim$dim$latitude$vals[1], verbose=FALSE)
#Lower Left Corner pair is (last row, first column) = (27.22375, -2.075416)
ncatt_put(episim, 0, "LLCornerLongitude", attval = episim$dim$longitude$vals[1], verbose=FALSE) #
ncatt_put(episim, 0, "LLCornerLatitude", attval = episim$dim$latitude$vals[nrows], verbose=FALSE) #
#ncatt_put(episim, 0, "cellSize", attval = abs(episim$dim$longitude$vals[1] - episim$dim$longitude$vals[2]), verbose=FALSE)
ncatt_put(episim, 0, "hcellSize", attval = res(DRC_aggr_count)[1], verbose=FALSE)
ncatt_put(episim, 0, "vcellSize", attval = res(DRC_aggr_count)[2], verbose=FALSE)
# episim$var[[1]] # str(episim$var[[1]]$dim) # str(episim$var[[1]])
#----------------------------------------------------------------#
# Adding new Epidemic State Variables to an existing netCDF file #
#----------------------------------------------------------------#
x <- ncdim_def(name = "longitude", units = "degrees_east", vals = episim$dim$longitude$vals, unlim = FALSE, create_dimvar = TRUE, calendar = NA, longname = "longitude")
y <- ncdim_def(name = "latitude", units = "degrees_north", vals = episim$dim$latitude$vals, unlim = FALSE, create_dimvar = TRUE, calendar = NA, longname = "latitude")
#?ncvar_def
Vaccinated <- ncvar_def(name = "Vaccinated", units = "Persons", dim = list(x,y), missval = NULL, prec = "float", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Vaccinated")
Exposed <- ncvar_def(name = "Exposed", units = "Persons", dim = list(x,y), missval = NULL, prec = "float", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Exposed")
Infected <- ncvar_def(name = "Infected", units = "Persons", dim = list(x,y), missval = NULL, prec = "float", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Infected")
Recovered <- ncvar_def(name = "Recovered", units = "Persons", dim = list(x,y), missval = NULL, prec = "float", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Recovered")
Dead <- ncvar_def(name = "Dead", units = "Persons", dim = list(x,y), missval = NULL, prec = "float", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Dead")
Inhabitable <- ncvar_def(name = "Inhabitable", units = "Binary", dim = list(x,y), missval = NULL, prec = "integer", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Inhabitable")
#ProvinceIdentifier <- ncvar_def(name = "ProvinceIdentifier", units = "Province", dim = list(x,y), missval = NULL, prec = "integer", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "ProvinceIdentifier")
# Epidemic State Variables (or Epidemic Compartments) are added in the following order
ncvar_add(episim, Vaccinated)
ncvar_add(episim, Exposed)
ncvar_add(episim, Infected)
ncvar_add(episim, Recovered)
ncvar_add(episim, Dead)
ncvar_add(episim, Inhabitable)
#ncvar_add(episim, ProvinceIdentifier)
currSusceptible <- ncvar_get(episim, episim$var[[2]]) # WorldPop
#ProvinceIdentifier <- ncvar_get(epiProvince, epiProvince$var[[2]]) # Sitansu
dim(currSusceptible) # 49 rows and 69 columns
dim(t(currSusceptible)) # 69 rows and 49 columns
# sum(currSusceptible)
# sum(currSusceptible>0)
# sum(currSusceptible == 0)
# max(currSusceptible)
# currSusceptible[ncols, nrows]
# currSusceptible[nrows, ncols] # Subscript out of bounds error! AS EXPECTED
# dim(ProvinceIdentifier)
# dim(t(ProvinceIdentifier))
# table(ProvinceIdentifier)
# I could use a combination of transpose and flip from raster package
currVaccinated <- currExposed <- currInfected <- currRecovered <- currDead <- currInhabitable <- matrix(0, length(episim$dim$longitude$vals),length(episim$dim$latitude$vals))
for(i in 1:ncols)
{ # ncols
for(j in 1:nrows)
{ # nrows
if (currSusceptible[i,j] > 0)
{
currInhabitable[i,j] <- 1
}
if (currSusceptible[i,j] == 0)
{
currInhabitable[i,j] <- 0
}
}
}
table(currInhabitable)
# Some cells in DRC have a population count equal to zero. Possibly forests, deserts or uninhabited areas
# currInhabitable
# 0 1
# 903 2478
nc_close(episim)
################################################################################################
episim <- nc_open("Congo_0000.nc", write = TRUE) # Fill values to an existing ncdf file
ncvar_put(episim, episim$var[[2]], currSusceptible)
ncvar_put(episim, episim$var[[3]], currVaccinated)
ncvar_put(episim, episim$var[[4]], currExposed)
ncvar_put(episim, episim$var[[5]], currInfected)
ncvar_put(episim, episim$var[[6]], currRecovered)
ncvar_put(episim, episim$var[[7]], currDead)
ncvar_put(episim, episim$var[[8]], currInhabitable)
# ncvar_put(episim, episim$var[[9]], ProvinceIdentifier)
cat(paste("The file", episim$filename, "has", episim$nvars, "variables"), fill=TRUE)
for (v in 1:episim$nvars) cat(paste("Variable ", v, " is ", episim$var[[v]]$name,".",sep=""), fill=TRUE)
#episim # class(episim) # str(episim)
nc_close(episim)
I able to overlay these two layers and clip just the two provinces (Ituri and North-Kivu) and save it back as a .tif file.

Here is a minimal, reproducible, self-contained example:
library(raster)
# example polygons
f <- system.file("external/lux.shp", package="raster")
p <- shapefile(f)
# example raster
r <- raster(p, res=0.01)
values(r) <- 1:ncell(r)
# select two areas
pp <- p[1:2, ]
# have a look
plot(r)
lines(p)
lines(pp, col="red", lwd=3)
Solution
x <- crop(r, pp)
m <- mask(x, pp)
plot(m)
lines(pp)
Also see this question from only yesterday...

Related

deepsort is not tracking all classes how to finetune my tracker yolov5+deepsort

yolov5 is detecting perfect while I run detect.py but unfortunately with deepsort track.py is not tracking even not detecting with tracker. how to set parameter my tracker ?
yolov5:
>> python detect.py --source video.mp4 --weights best.pt
yolov5+deepsort:
>> python track.py --yolo-weights best.pt --source video.mp4 --strong-sort-weights osnet_x0_25_msmt17.pt --show-vid --imgsz 640 --hide-labels
import argparse
from email.headerregistry import ContentDispositionHeader
import os
from pkg_resources import fixup_namespace_packages
# limit the number of cpus used by high performance libraries
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
import sys
import numpy as np
from pathlib import Path
import torch
import torch.backends.cudnn as cudnn
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # yolov5 strongsort root directory
WEIGHTS = ROOT / 'weights'
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
if str(ROOT / 'yolov5') not in sys.path:
sys.path.append(str(ROOT / 'yolov5')) # add yolov5 ROOT to PATH
if str(ROOT / 'strong_sort') not in sys.path:
sys.path.append(str(ROOT / 'strong_sort')) # add strong_sort ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
import logging
from yolov5.models.common import DetectMultiBackend
from yolov5.utils.dataloaders import VID_FORMATS, LoadImages, LoadStreams
from yolov5.utils.general import (LOGGER, check_img_size, non_max_suppression, scale_coords, check_requirements, cv2,
check_imshow, xyxy2xywh, increment_path, strip_optimizer, colorstr, print_args, check_file)
from yolov5.utils.torch_utils import select_device, time_sync
from yolov5.utils.plots import Annotator, colors, save_one_box
from strong_sort.utils.parser import get_config
from strong_sort.strong_sort import StrongSORT
# remove duplicated stream handler to avoid duplicated logging
logging.getLogger().removeHandler(logging.getLogger().handlers[0])
list_ball_cord = list()
#torch.no_grad()
def run(
source='0',
yolo_weights=WEIGHTS / 'yolov5m.pt', # model.pt path(s),
strong_sort_weights=WEIGHTS / 'osnet_x0_25_msmt17.pt', # model.pt path,
config_strongsort=ROOT / 'strong_sort/configs/strong_sort.yaml',
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
show_vid=False, # show results
save_txt=False, # save results to *.txt
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
save_vid=False, # save confidences in --save-txt labels
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
project=ROOT / 'runs/track', # save results to project/name
name='exp', # save results to project/name
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
hide_class=False, # hide IDs
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
):
source = str(source)
save_img = not nosave and not source.endswith('.txt') # save inference images
is_file = Path(source).suffix[1:] in (VID_FORMATS)
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
if is_url and is_file:
source = check_file(source) # download
# Directories
if not isinstance(yolo_weights, list): # single yolo model
exp_name = str(yolo_weights).rsplit('/', 1)[-1].split('.')[0]
elif type(yolo_weights) is list and len(yolo_weights) == 1: # single models after --yolo_weights
exp_name = yolo_weights[0].split(".")[0]
else: # multiple models after --yolo_weights
exp_name = 'ensemble'
exp_name = name if name is not None else exp_name + "_" + str(strong_sort_weights).split('/')[-1].split('.')[0]
save_dir = increment_path(Path(project) / exp_name, exist_ok=exist_ok) # increment run
(save_dir / 'tracks' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
device = select_device(device)
model = DetectMultiBackend(yolo_weights, device=device, dnn=dnn, data=None, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
if webcam:
show_vid = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
nr_sources = len(dataset)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
nr_sources = 1
vid_path, vid_writer, txt_path = [None] * nr_sources, [None] * nr_sources, [None] * nr_sources
# initialize StrongSORT
cfg = get_config()
cfg.merge_from_file(opt.config_strongsort)
# Create as many strong sort instances as there are video sources
strongsort_list = []
for i in range(nr_sources):
strongsort_list.append(
StrongSORT(
strong_sort_weights,
device,
max_dist=cfg.STRONGSORT.MAX_DIST,
max_iou_distance=cfg.STRONGSORT.MAX_IOU_DISTANCE,
max_age=cfg.STRONGSORT.MAX_AGE,
n_init=cfg.STRONGSORT.N_INIT,
nn_budget=cfg.STRONGSORT.NN_BUDGET,
mc_lambda=cfg.STRONGSORT.MC_LAMBDA,
ema_alpha=cfg.STRONGSORT.EMA_ALPHA,
)
)
outputs = [None] * nr_sources
# Run tracking
model.warmup(imgsz=(1 if pt else nr_sources, 3, *imgsz)) # warmup
dt, seen = [0.0, 0.0, 0.0, 0.0], 0
curr_frames, prev_frames = [None] * nr_sources, [None] * nr_sources
for frame_idx, (path, im, im0s, vid_cap, s) in enumerate(dataset):
t1 = time_sync()
im = torch.from_numpy(im).to(device)
im = im.half() if half else im.float() # uint8 to fp16/32
im /= 255.0 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
t2 = time_sync()
dt[0] += t2 - t1
# Inference
visualize = increment_path(save_dir / Path(path[0]).stem, mkdir=True) if opt.visualize else False
pred = model(im, augment=opt.augment, visualize=visualize)
t3 = time_sync()
dt[1] += t3 - t2
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, opt.classes, opt.agnostic_nms, max_det=opt.max_det)
dt[2] += time_sync() - t3
# Process detections
for i, det in enumerate(pred): # detections per image
seen += 1
if webcam: # nr_sources >= 1
p, im0, _ = path[i], im0s[i].copy(), dataset.count
p = Path(p) # to Path
s += f'{i}: '
txt_file_name = p.name
save_path = str(save_dir / p.name) # im.jpg, vid.mp4, ...
else:
p, im0, _ = path, im0s.copy(), getattr(dataset, 'frame', 0)
p = Path(p) # to Path
# video
### =============================================================================================
### ROI Rectangle ( I will use cv2.selectROI later )
# left_roi = [(381,331), (647,336), (647,497), (334,492)]
# right_roi = [(648,335), (914,338), (958,498), (646,495)]
# table_roi = [(381,331), (914,338), (958,498), (334,492)]
# table_roi = [(0,0), (1280,0), (1280,720), (0,720)]
table_roi = [(381,331), (1280,0), (1280,720), (0,720)]
cv2.polylines(im0, [np.array(table_roi, np.int32)],True, (0,0,255),2 )
# cv2.polylines(im0, [np.array(right_roi, np.int32)],True, (0,0,255),2 )
### =============================================================================================
if source.endswith(VID_FORMATS):
txt_file_name = p.stem
save_path = str(save_dir / p.name) # im.jpg, vid.mp4, ...
# folder with imgs
else:
txt_file_name = p.parent.name # get folder name containing current img
save_path = str(save_dir / p.parent.name) # im.jpg, vid.mp4, ...
curr_frames[i] = im0
txt_path = str(save_dir / 'tracks' / txt_file_name) # im.txt
s += '%gx%g ' % im.shape[2:] # print string
imc = im0.copy() if save_crop else im0 # for save_crop
annotator = Annotator(im0, line_width=2, pil=not ascii)
if cfg.STRONGSORT.ECC: # camera motion compensation
strongsort_list[i].tracker.camera_update(prev_frames[i], curr_frames[i])
if det is not None and len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
xywhs = xyxy2xywh(det[:, 0:4])
confs = det[:, 4]
clss = det[:, 5]
# pass detections to strongsort
t4 = time_sync()
outputs[i] = strongsort_list[i].update(xywhs.cpu(), confs.cpu(), clss.cpu(), im0)
t5 = time_sync()
dt[3] += t5 - t4
# draw boxes for visualization
if len(outputs[i]) > 0:
for j, (output, conf) in enumerate(zip(outputs[i], confs)):
### ========================================================================================================================================================
### Results ROI
### ========================================================================================================================================================
# if output[5] == 0.0:
# bboxes = output[0:4]
# id = output[4]
# cls = output[5]
# center = int((((output[0]) + (output[2]))/2) , (((output[1]) + (output[3]))/2))
# print("center",center)
"""
- create rectangle left/right
- display ball cordinates
- intersect ball & rectangle left/right
"""
## ball cord..
if output[5] == 0.0:
# print("bbox----------", output[0:4])
print("class----------", output[5])
# print("id -------------", output[4])
print("=============================================")
# display ball rectangle
## cv2.rectangle(im0,(int(output[0]),int(output[1])),(int(output[2]),int(output[3])),(0,255,0),2 )
ball_box = output[0:4]
list_ball_cord.append(ball_box)
bbox_left = output[0]
bbox_top = output[1]
bbox_w = output[2] - output[0]
bbox_h = output[3] - output[1]
# print("bbox_left--------", bbox_left)
# print("bbox_top--------", bbox_top)
# print("bbox_w--------", bbox_w)
# print("bbox_h--------", bbox_h)
## ball center point
ball_cx = int(bbox_left + bbox_w /2)
ball_cy = int(bbox_top + bbox_h /2)
# cv2.circle(im0, (ball_cx,ball_cy),5, (0,0,255),-1)
# # ball detect only on table >> return three output +1-inside the table -1-outside the table 0-on the boundry
ball_on_table_res = cv2.pointPolygonTest(np.array(table_roi,np.int32), (int(ball_cx),int(ball_cy)), False)
if ball_on_table_res >= 0:
cv2.circle(im0, (ball_cx,ball_cy),20, (0,0,0),-1)
### ========================================================================================================================================================
bboxes = output[0:4]
id = output[4]
cls = output[5]
# print("bboxes--------", bboxes)
# print("cls-----------", cls)
if save_txt:
# to MOT format
bbox_left = output[0]
bbox_top = output[1]
bbox_w = output[2] - output[0]
bbox_h = output[3] - output[1]
# Write MOT compliant results to file
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * 10 + '\n') % (frame_idx + 1, id, bbox_left, # MOT format
bbox_top, bbox_w, bbox_h, -1, -1, -1, i))
if save_vid or save_crop or show_vid: # Add bbox to image
c = int(cls) # integer class
id = int(id) # integer id
label = None if hide_labels else (f'{id} {names[c]}' if hide_conf else \
(f'{id} {conf:.2f}' if hide_class else f'{id} {names[c]} {conf:.2f}'))
annotator.box_label(bboxes, label, color=colors(c, True))
#####################print("label---------", label)
if save_crop:
txt_file_name = txt_file_name if (isinstance(path, list) and len(path) > 1) else ''
save_one_box(bboxes, imc, file=save_dir / 'crops' / txt_file_name / names[c] / f'{id}' / f'{p.stem}.jpg', BGR=True)
fps_StrongSORT = 1 / (t5-t4)
fps_yolo = 1/ (t3-t2)
LOGGER.info(f'{s}Done. YOLO:({t3 - t2:.3f}s), StrongSORT:({t5 - t4:.3f}s), ')
print("fps_StrongSORT-----", fps_StrongSORT)
print("fps_yolo-----", fps_yolo)
else:
strongsort_list[i].increment_ages()
LOGGER.info('No detections')
# Stream results
im0 = annotator.result()
if show_vid:
# im0 = cv2.resize(im0, (640,640))
cv2.imshow(str(p), im0)
cv2.waitKey(1) # 1 millisecond
# Save results (image with detections)
if save_vid:
if vid_path[i] != save_path: # new video
vid_path[i] = save_path
if isinstance(vid_writer[i], cv2.VideoWriter):
vid_writer[i].release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer[i].write(im0)
prev_frames[i] = curr_frames[i]
print("fffffffffffffffffffffffffffffffff----------------------------------------",list_ball_cord)
# Print results
t = tuple(x / seen * 1E3 for x in dt) # speeds per image
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS, %.1fms strong sort update per image at shape {(1, 3, *imgsz)}' % t)
if save_txt or save_vid:
s = f"\n{len(list(save_dir.glob('tracks/*.txt')))} tracks saved to {save_dir / 'tracks'}" if save_txt else ''
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
if update:
strip_optimizer(yolo_weights) # update model (to fix SourceChangeWarning)
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--yolo-weights', nargs='+', type=str, default='v5best_bp.pt', help='model.pt path(s)')
parser.add_argument('--strong-sort-weights', type=str, default=WEIGHTS / 'osnet_x0_25_msmt17.pt')
parser.add_argument('--config-strongsort', type=str, default='strong_sort/configs/strong_sort.yaml')
parser.add_argument('--source', type=str, default='0', help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.5, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--show-vid', action='store_true', help='display tracking video results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--save-vid', action='store_true', help='save video tracking results')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
# class 0 is person, 1 is bycicle, 2 is car... 79 is oven
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default=ROOT / 'runs/track', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--hide-class', default=False, action='store_true', help='hide IDs')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def main(opt):
check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)
enter image description here
I am also using the same model and was facing the same issue.
Try annotating more image and increase the image size to 1024. Also make sure to use the best weights of yolov5 in yolov5+deepsort.
DISCLAIMER: I am the creator of https://github.com/mikel-brostrom/Yolov5_StrongSORT_OSNet
First of all:
Does Yolov5+StrongSORT+OSNet run correctly without you custom modifications?
Secondly:
Have you checked that you are loading the same weights for Yolov5 and Yolov5+StrongSORT+OSNet?
Moreover:
Why all the custom modifications? If you only want to track class 0. Then you can run the following command:
python track.py --source 0 --yolo-weights best.pt --classes 0

How to create multiline string sf object to create a time slider in Leaflet map

I'm trying to create a leaflet map which shows different movement paths for different months of the year. I.e. I've got a dataset showing multiple journeys per month and I want to display the movement paths separately for each month using the addTimeslider feature of the leaflet.extras2 package.
To do so I have been trying to adapt the code posted by SymbolixAU I found here: leaflet add multiple polylines
This code uses sf functions including st_linestring to create an object that can be supplied to a addPolylines leaflet function to show all movement paths at once.
I'm pretty sure for my purposes (showing data separately for each month) I have to use st_multilinestring, which takes a list of matrices containing the coordinates for multiple polylines per row (with one row per month) rather than a single polyline per row.
Once I have that I think I could supply that object to the addTimeslider function of leaflet.extras2 to achieve what I need. I'm quite sure of this because when I used the sf object created using sf_linestring inthe AddTimeslider feature I was able to use the time slider on the map to individual movement paths at a time.
However, I have been trying for hours and haven't been successful. Would be hugely grateful for any pointers, please and thank you.
Some example data:
#load packages
library(dplyr)
library(leaflet)
library(leaflet.extras2)
library(sf)
library(data.table)
# create the example dataset
data <- structure(list(arrival_month = structure(c(3L, 3L, 4L, 4L, 4L,
5L, 5L, 6L, 6L, 6L), .Label = c("January", "February", "March",
"April", "May", "June", "July", "August", "September", "October",
"November", "December"), class = c("ordered", "factor")), start_lat = c(33.40693,
33.64672, 33.57127, 33.42848, 33.54936, 33.53418, 33.60399, 33.49554,
33.5056, 33.61696), start_long = c(-112.0298, -111.9255, -112.049,
-112.0998, -112.0912, -112.0911, -111.9273, -111.9687, -112.0563,
-111.9866), finish_lat...4 = c(33.40687, 33.64776, 33.57125,
33.42853, 33.54893, 33.53488, 33.60401, 33.49647, 33.5056, 33.61654
), finish_lat...5 = c(-112.0343, -111.9303, -112.0481, -112.0993,
-112.0912, -112.0911, -111.931, -111.9711, -112.0541, -111.986
)), row.names = c(NA, -10L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x0000026e5df41ef0>)
My attempt at the code:
# Convert the data into a list of matrices for each month
mnths <- c("May","March","April","June")
mat_list <- list()
for (i in mnths) {
month <- as.matrix(data %>% filter(arrival_month == i) %>% select(-1))
mat_list[[i]] <- month
}
# convert to an sf object
data_DT <- setDT(data)
sf <- data_DT[
, {
geometry <- sf::st_multilinestring(x = mat_list)
geometry <- sf::st_sfc(geometry)
geometry <- sf::st_sf(geometry = geometry)
}
, by = arrival_month
]
sf <- sf::st_as_sf(sf)
This yields the following result:
It's not correct because each row contains the coordinates for all the months, rather than just for the month in the respective row. I'm at a loss as to where to go from here - any help would be hugely appreciated.
Thanks
I would do it slightly differently today, making use of {sfheaders} to build the linestrings.
library(sfheaders)
library(sf)
library(data.table)
setDT( data )
data[, line_id := .I ] ## Assuming each row is a line
## create a long-form of the data
dt_line <- rbindlist(
list(
data[, .(arrival_month, line_id, lon = start_long, lat = start_lat, sequence = 1)]
, data[, .(arrival_month, line_id, lon = finish_lat...5, lat = finish_lat...4, sequence = 2)] ## I think 'finish_lat...5' is actually the 'long'
)
)
setorder(dt_line, line_id, sequence)
sf <- sfheaders::sf_multilinestring(
obj = dt_line
, x = "lon"
, y = "lat"
, multilinestring_id = "arrival_month"
, linestring_id = "line_id"
, keep = T
)
sf::st_crs( sf ) <- 4326 ## Assuming it's in Web Mercator
# Simple feature collection with 4 features and 2 fields
# Geometry type: MULTILINESTRING
# Dimension: XY
# Bounding box: xmin: -112.0998 ymin: 33.40687 xmax: -111.9255 ymax: 33.64776
# Geodetic CRS: WGS 84
#. arrival_month sequence geometry
# 1 3 1 MULTILINESTRING ((-112.0298...
# 2 4 1 MULTILINESTRING ((-112.049 ...
# 3 5 1 MULTILINESTRING ((-112.0911...
# 4 6 1 MULTILINESTRING ((-111.9687...
Note the arrival_month has been re-coded to it's factor levels.
I managed to adapt the code provided by SymbolixAU to suit my purposes, by using sf_multilinestring instead of sf_line string:
## Generate data
data <- structure(list(arrival_month = structure(c(3L, 3L, 4L, 4L, 4L,
5L, 5L, 6L, 6L, 6L), .Label = c("January", "February", "March",
"April", "May", "June", "July", "August", "September", "October",
"November", "December"), class = c("ordered", "factor")), start_lat = c(33.40693,
33.64672, 33.57127, 33.42848, 33.54936, 33.53418, 33.60399, 33.49554,
33.5056, 33.61696), start_long = c(-112.0298, -111.9255, -112.049,
-112.0998, -112.0912, -112.0911, -111.9273, -111.9687, -112.0563,
-111.9866), finish_lat...4 = c(33.40687, 33.64776, 33.57125,
33.42853, 33.54893, 33.53488, 33.60401, 33.49647, 33.5056, 33.61654
), finish_lat...5 = c(-112.0343, -111.9303, -112.0481, -112.0993,
-112.0912, -112.0911, -111.931, -111.9711, -112.0541, -111.986
)), row.names = c(NA, -10L), class = c("data.table", "data.frame"
))
## Add id column and convert to data table
data[, line_id := .I ]
setDT(data)
## create a long-form of the data
dt_line <- rbindlist(
list(
data[, .(arrival_month, line_id, lon = start_long, lat = start_lat, sequence = 1)]
, data[, .(arrival_month, line_id, lon = finish_lat...5, lat = finish_lat...4, sequence = 2)] ## I think 'finish_lat...5' is actually the 'long'
)
)
setorder(dt_line, line_id, sequence)
## Create multistring sf object
sf <- sfheaders::sf_multilinestring(
obj = dt_line
, x = "lon"
, y = "lat"
, linestring_id = "line_id"
, multilinestring_id = "arrival_month"
, keep = TRUE
)
sf::st_as_sf(sf)
## Convert arrival_month back to factor
sf$arrival_month <- as.factor(sf$arrival_month)
Might not be the most elegant but this does the job.
The key was to use sf_multilinestring and include/specify both linestring_id and multilinestring_id to distinguish between separate sets of polylines.
This is the result:
Now when this sf object is used in AddTimeslider it behaves as desired.
Credit to SymbolixAU for most of the code and bringing my attention to the sfheaders package

Variable importance from a tidymodels/stacks output?

Is it possible to retrieve the variable importance for one, many, or the full stacked model after running tidymodels/stacks? This is not yet supported by the VIP package, but is there an alternative method to extracting that information?
Using the bulk of the blog from Simon Couch here this is what I am generally trying to attempt. Instead I will use random forests and SVMs to then try to retrieve a variable importance.
library(tidyverse)
library(tidymodels)
library(stacks)
library(vip)
wind_raw <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-10-27/wind-turbine.csv')
wind <-
wind_raw %>%
dplyr::select(
province_territory,
total_project_capacity_mw,
turbine_rated_capacity_kw = turbine_rated_capacity_k_w,
rotor_diameter_m,
hub_height_m,
year = commissioning_date
) %>%
group_by(province_territory) %>%
mutate(
year = as.numeric(year),
province_territory = case_when(
n() < 50 ~ "Other",
TRUE ~ province_territory
)
) %>%
filter(!is.na(year)) %>%
ungroup() %>%
drop_na(turbine_rated_capacity_kw)
# split into training and testing sets
set.seed(1)
wind_split <- initial_split(wind)
wind_train <- training(wind_split)
wind_test <- testing(wind_split)
# use a 5-fold cross-validation
set.seed(1)
folds <- rsample::vfold_cv(wind_train, v = 5)
# set up a basic recipe
wind_rec <-
recipe(turbine_rated_capacity_kw ~ ., data = wind_train) %>%
step_impute_knn(all_predictors()) %>%
step_dummy(all_nominal()) %>%
step_zv(all_predictors())
# define a minimal workflow
wind_wflow <-
workflow() %>%
add_recipe(wind_rec)
ctrl_res <- control_stack_resamples()
rf_spec <-
rand_forest(mtry = tune(),
min_n = tune(),
trees = 1000) %>%
set_mode('regression') %>%
set_engine("ranger", importance = "impurity")
# add it to a workflow
rf_wflow <-
wind_wflow %>%
add_model(rf_spec)
# tune cost and rand_forest and fit to the 5-fold cv
set.seed(1)
rf_res <-
tune_grid(
rf_wflow ,
resamples = folds,
grid = 5,
control = ctrl_grid
)
# define a model using parsnip
svm_spec <-
svm_rbf(
cost = tune(),
rbf_sigma = tune()
) %>%
set_engine("kernlab") %>%
set_mode("regression")
# add it to a workflow
svm_wflow <-
wind_wflow %>%
add_model(svm_spec)
# tune cost and rbf_sigma and fit to the 5-fold cv
set.seed(1)
svm_res <-
tune_grid(
svm_wflow,
resamples = folds,
grid = 5,
control = ctrl_grid
)
# add the models to the stack
wind_data_st <-
stacks() %>%
add_candidates(rf_res) %>%
add_candidates(svm_res) %>%
blend_predictions() %>%
fit_members()
# attempt to plot the variable importance of the stacked model
wind_data_st %>%
vip()
I return Error: Model-specific variable importance scores are currently not available for this type of model., which is self explanatory, but is there a work around to extract this information? Maybe outside of VIP? Is it possible to pluck out one of the viable models that went into the stack to evaluated? Does anyone know if VIP is planning on putting out a solution to this? Thanks in advance!
I've had a similar issue, and what I've done is make a tibble of variable importance for each member of the stack, then normalize them onto the same scale, and multiply by their relative weight in the stack to have a summed total relative importance.
I couldn't reproduce your code, but here's an example of what you can try...
After you've ran blend_predictions(), you can extract the weights. Then create a tibble for each that includes a column for Variable and a column for importance. Then join together and you'll have the weight importance.
library("DALEX")
library("dplyr")
library("tidymodels")
colnames(fifa)
fifa_small <- fifa %>%
select(value_eur, age,
attacking_crossing:attacking_volleys,
defending_marking:defending_sliding_tackle) %>%
as_tibble() %>% dplyr::slice_sample(n = 1000)
fifa_small_folds <- vfold_cv(fifa_small, v = 8, repeats = 1)
fifa_small_folds
basic_rec <-
recipe(value_eur ~ ., data = fifa_small) %>%
step_nzv(all_numeric_predictors()) %>%
step_normalize(all_numeric(), -all_outcomes())
model1 <-
boost_tree(trees = 1000) %>%
set_engine('xgboost', importance = TRUE) %>%
set_mode('regression')
model2 <-
linear_reg(penalty = 0.1, mixture = 1) %>%
set_engine('glmnet')
model3 <-
linear_reg(penalty = tune(), mixture = 0) %>%
set_engine('glmnet')
wfs <-
workflow_set(
preproc = list(basic_rec),
models = list(model1, model2, model3),
cross = T )
wfs
doParallel::registerDoParallel()
wfs_rs <-
workflow_map(
wfs,
"tune_grid",
resamples = fifa_small_folds,
grid = 10,
control = control_grid(save_pred = TRUE,
parallel_over = "everything",
save_workflow = TRUE
) )
doParallel::stopImplicitCluster()
library(stacks)
tidymodels_prefer()
wfs_stack <-
stacks() %>%
add_candidates(wfs_rs)
blend_ens <- blend_predictions(wfs_stack, penalty = 10^seq(-2, 0, length = 10))
blend_ens
ens1_wt <- stacks:::top_coefs(blend_ens) %>% slice(1) %>% pull(weight)
ens2_wt <- stacks:::top_coefs(blend_ens) %>% slice(2) %>% pull(weight)
## Get the workflowset
individ_ens1_best_fit <- extract_workflow(wfs_rs, id = "recipe_boost_tree")
## extract the tuned results from the workflow
individ_ens1_best_tuned <- wfs_rs[wfs_rs$wflow_id == "recipe_boost_tree",
"result"][[1]][[1]]
individ_ens1_lowest_rmse <- individ_ens1_best_tuned %>%
show_best("rmse") %>%
slice(1)
## fit the final model
individ_ens1_best_final <- finalize_workflow(individ_ens1_best_fit, individ_ens1_lowest_rmse)
individ_ens1_bestfinal_1 <- individ_ens1_best_final %>% fit(fifa_small)
individ_ens1_vi_tbl <- individ_ens1_bestfinal_1 %>%
extract_fit_parsnip() %>%
vip::vi() %>%
mutate(
ens1_Importance = abs(Importance),
Variable = factor(Variable), .keep = "unused")
blend_ens
ens2_config <- ens_name_fn(blend_ens, 2)
ens2_id <- ens_id_fn(blend_ens, 2)
## Get the workflowset
individ_ens2_best_fit <- extract_workflow(wfs_rs, id = "recipe_linear_reg_3")
## extract the tuned results from the best workflow
individ_ens2_best_tuned <- wfs_rs[wfs_rs$wflow_id == "recipe_linear_reg_3",
"result"][[1]][[1]]
individ_ens2_lowest_rmse <- individ_ens2_best_tuned %>%
show_best("rmse") %>% filter(.config == "Preprocessor1_Model01") %>% slice(1)
## fit the final model
individ_ens2_best_final <- finalize_workflow(individ_ens2_best_fit, individ_ens2_lowest_rmse)
individ_ens2_bestfinal_1 <- individ_ens2_best_final %>% fit(fifa_small)
individ_ens2_vi_tbl <- individ_ens2_bestfinal_1 %>%
extract_fit_parsnip() %>%
vip::vi(lambda = individ_ens2_lowest_rmse$penalty) %>% # include lambda for lasso or ridge
mutate(
ens2_Importance = abs(Importance),
Variable = factor(Variable), .keep = "unused")
ens_vi_joined <- individ_ens1_vi_tbl %>%
left_join(individ_ens2_vi_tbl, by = c("Variable")) %>%
mutate(across(2:ncol(.), ~ifelse(is.na(.), 0, .)),
ens1_normed = ens1_Importance/ sum(ens1_Importance),
ens2_normed = ens2_Importance/ sum(ens2_Importance),
ens1_wted = ens1_normed * ens1_wt,
ens2_wted = ens2_normed * ens2_wt,
) %>%
rowwise() %>%
mutate(summed_importance = sum(c_across(ends_with("wted"))) ) %>%
ungroup() %>%
mutate(
total_importance = summed_importance/ sum(summed_importance), #normalized
)
ens_vi_joined %>% select(Variable, total_importance) %>%
ggplot(aes(total_importance, fct_reorder(Variable, total_importance)))+
geom_col()

Crosstalk links broken by second Leaflet addCircleMarkers call

I am working with multivariant data linking Leaflet and d3scatter plots. It works well for one variable. If I try to include a second variable in Leaflet by a second addCircleMarkers and addLayersControl then the sharedData links break, the filtering doesn't work and the brushing doesn't work. Thanks in advance.
A MWE is attached:
library("crosstalk")
library("d3scatter")
library("leaflet")
Long <- c(117.4,117.5,117.6)
Lat<- c(-33.7,-33.8,-33.9)
var1 <- c(21,22,23)
var2 <- c(31,32,33)
species <- c(8,9,10)
df1<- data.frame(Long, Lat, var1, var2, species)
sdf1 <- SharedData$new(df1)
col_1 <- c( "yellow" ,"black" ,"orange")
col_2 <- c("red" ,"green" ,"blue")
l <- leaflet(sdf1)%>%
setView(117.5, -33.8, 10) %>%
addCircleMarkers(radius = 1, color = col_1, group = "1") %>%
# addCircleMarkers(radius = 1, color = col_2, group = "2") %>%
# PROBLEM - adding the second "addCircleMarkers" enables the overlayGroups but
# it breaks the link between the plots and breaks the filter
addLayersControl(overlayGroups=c("1","2"))
m <- list(l, filter_checkbox("unique_id_for_species", "Animal Species", sdf1, ~species))
n <- list(d3scatter(sdf1, ~var2, ~var1, color = ~species, x_lim = c(30,40), y_lim = c(20,25), width="70%", height=200),
d3scatter(sdf1, ~var1, ~var2, color = ~species, y_lim = c(30,40), x_lim = c(20,25), width="70%", height=200))
bscols(m, n)

Forecasting using mutiple seasonal STL and arima

I am attempting to forecast half hourly electricity data. The method I am using is to decompose the electricity consumption data using 'mstl' from the 'Forecast' package by Rob Hyndman and then forecast the seasonally adjusted data using ARIMA.
df <- IntervalData %>% select(CONSUMPTION_MW)
length_test_set = 17520
h = 17520
# create msts object with daily, weekly and monthly seasonality
data_msts <- msts(df, seasonal.periods=c(48,48*7,365/12*48))
train_msts = msts(df[1:(nrow(df)-length_test_set),],seasonal.periods=c(48,48*7,365/12*48))
test_msts = msts(df[((nrow(df)-length_test_set)+1):(nrow(df)),],seasonal.periods=c(48,48*7,365/12*48))
fit_mstl = mstl(train_msts, iterate = 4, s.window = 19, robust = TRUE)
fcast_arima=forecast(fit_mstl,method='arima',h=h)
How do I specify the order of my ARIMA model eg. ARIMA(2,1,6)?
You will need to write your own forecast function like this (using fake data so it can be reproduced).
library(forecast)
df <- data.frame(y=rnorm(50000))
length_test_set <- 17520
h <- 17520
# create msts object with daily, weekly and monthly seasonality
data_msts <- msts(df, seasonal.periods = c(48, 48*7, 365/12*48))
train_msts <- msts(df[1:(nrow(df) - length_test_set), ], seasonal.periods = c(48, 48 * 7, 365 / 12 * 48))
test_msts <- msts(df[((nrow(df) - length_test_set) + 1):(nrow(df)), ], seasonal.periods = c(48, 48 * 7, 365 / 12 * 48))
fit_mstl <- mstl(train_msts, iterate = 4, s.window = 19, robust = TRUE)
# Function to fit specific ARIMA model and return forecasts
arima_forecast <- function(x, h, level, order, ...) {
fit <- Arima(x, order=order, seasonal = c(0,0,0), ...)
return(forecast(fit, h = h, level = level))
}
# Example using an ARIMA(3,0,0) model
fcast_arima <- forecast(fit_mstl, forecastfunction=arima_forecast, h = h, order=c(3,0,0))
Created on 2020-07-25 by the reprex package (v0.3.0)

Resources