Hello guys I have a offboard code which give about 50 setpoints to drone. It draws spiral with that setpoints. My problem is I couldnt get smooth travel. In every setpoint drone gives a high roll or pitch instant and then floats to the next setpoint. Is there a way to have stable velocity while passing the setpoints. Here is the code:
#include <px4_msgs/msg/offboard_control_mode.hpp>
#include <px4_msgs/msg/trajectory_setpoint.hpp>
#include <px4_msgs/msg/timesync.hpp>
#include <px4_msgs/msg/vehicle_command.hpp>
#include <px4_msgs/msg/vehicle_control_mode.hpp>
#include <px4_msgs/msg/vehicle_local_position.hpp>
#include <rclcpp/rclcpp.hpp>
#include <stdint.h>
#include <chrono>
#include <iostream>
#include "std_msgs/msg/string.hpp"
#include <math.h>
float X;
float Y;
using namespace std::chrono;
using namespace std::chrono_literals;
using namespace px4_msgs::msg;
class setpoint : public rclcpp::Node {
setpoint() : Node("setpoint") {
offboard_control_mode_publisher_ =
this->create_publisher<OffboardControlMode>("fmu/offboard_control_mode/in", 10);
trajectory_setpoint_publisher_ =
this->create_publisher<TrajectorySetpoint>("fmu/trajectory_setpoint/in", 10);
vehicle_command_publisher_ =
this->create_publisher<VehicleCommand>("fmu/vehicle_command/in", 10);
// get common timestamp
timesync_sub_ =
this->create_subscription<px4_msgs::msg::Timesync>("fmu/timesync/out", 10,
[this](const px4_msgs::msg::Timesync::UniquePtr msg) {
offboard_setpoint_counter_ = 0;
auto sendCommands = [this]() -> void {
if (offboard_setpoint_counter_ == 10) {
// Change to Offboard mode after 10 setpoints
this->publish_vehicle_command(VehicleCommand::VEHICLE_CMD_DO_SET_MODE, 1, 6);
// Arm the vehicle
subscription_ = this->create_subscription<px4_msgs::msg::VehicleLocalPosition>(
[this](const px4_msgs::msg::VehicleLocalPosition::UniquePtr msg) {
X = msg->x;
Y = msg->y;
std::cout << "\n\n\n\n\n\n\n\n\n\n";
std::cout << "RECEIVED VEHICLE GPS POSITION DATA" << std::endl;
std::cout << "==================================" << std::endl;
std::cout << "ts: " << msg->timestamp << std::endl;
//std::cout << "lat: " << msg->x << std::endl;
//std::cout << "lon: " << msg->y << std::endl;
std::cout << "lat: " << X << std::endl;
std::cout << "lon: " << Y << std::endl;
std::cout << "waypoint: " << waypoints[waypointIndex][0] << std::endl;
std::cout << "waypoint: " << waypoints[waypointIndex][1] << std::endl;
if((waypoints[waypointIndex][0] + 0.3 > X && waypoints[waypointIndex][0] - 0.3 < X)&&(waypoints[waypointIndex][1] + 0.3 > Y && waypoints[waypointIndex][1] - 0.3 < Y)){
if (waypointIndex >= waypoints.size())
//waypointIndex = 0;
RCLCPP_INFO(this->get_logger(), "Next waypoint: %.2f %.2f %.2f", waypoints[waypointIndex][0], waypoints[waypointIndex][1], waypoints[waypointIndex][2]);
// offboard_control_mode needs to be paired with trajectory_setpoint
// stop the counter after reaching 11
if (offboard_setpoint_counter_ < 11) {
auto nextWaypoint = [this]() -> void {
if (waypointIndex >= waypoints.size())
waypointIndex = 0;
RCLCPP_INFO(this->get_logger(), "Next waypoint: %.2f %.2f %.2f", waypoints[waypointIndex][0], waypoints[waypointIndex][1], waypoints[waypointIndex][2]);
commandTimer = this->create_wall_timer(100ms, sendCommands);
//waypointTimer = this->create_wall_timer(2s, nextWaypoint); //EA
void arm() const;
void disarm() const;
void topic_callback() const;
std::vector<std::vector<float>> waypoints = {{0,0,-5,},
}; // Land
int waypointIndex = 0;
rclcpp::TimerBase::SharedPtr commandTimer;
rclcpp::TimerBase::SharedPtr waypointTimer;
rclcpp::Publisher<OffboardControlMode>::SharedPtr offboard_control_mode_publisher_;
rclcpp::Publisher<TrajectorySetpoint>::SharedPtr trajectory_setpoint_publisher_;
rclcpp::Publisher<VehicleCommand>::SharedPtr vehicle_command_publisher_;
rclcpp::Subscription<px4_msgs::msg::Timesync>::SharedPtr timesync_sub_;
rclcpp::Subscription<px4_msgs::msg::VehicleLocalPosition>::SharedPtr subscription_;
std::atomic<uint64_t> timestamp_; //!< common synced timestamped
uint64_t offboard_setpoint_counter_; //!< counter for the number of setpoints sent
void publish_offboard_control_mode() const;
void publish_trajectory_setpoint() const;
void publish_vehicle_command(uint16_t command, float param1 = 0.0,
float param2 = 0.0) const;
void setpoint::arm() const {
publish_vehicle_command(VehicleCommand::VEHICLE_CMD_COMPONENT_ARM_DISARM, 1.0);
RCLCPP_INFO(this->get_logger(), "Arm command send");
void setpoint::disarm() const {
publish_vehicle_command(VehicleCommand::VEHICLE_CMD_COMPONENT_ARM_DISARM, 0.0);
RCLCPP_INFO(this->get_logger(), "Disarm command send");
void setpoint::publish_offboard_control_mode() const {
OffboardControlMode msg{};
msg.timestamp = timestamp_.load();
msg.position = true;
msg.velocity = false;
msg.acceleration = false;
msg.attitude = false;
msg.body_rate = false;
void setpoint::publish_trajectory_setpoint() const {
TrajectorySetpoint msg{};
msg.timestamp = timestamp_.load();
msg.position = {waypoints[waypointIndex][0],waypoints[waypointIndex][1],waypoints[waypointIndex][2]};
msg.yaw = std::nanf("0"); //-3.14; // [-PI:PI]
void setpoint::publish_vehicle_command(uint16_t command, float param1,
float param2) const {
VehicleCommand msg{};
msg.timestamp = timestamp_.load();
msg.param1 = param1;
msg.param2 = param2;
msg.command = command;
msg.target_system = 1;
msg.target_component = 1;
msg.source_system = 1;
msg.source_component = 1;
msg.from_external = true;
int main(int argc, char* argv[]) {
std::cout << "Starting setpoint node..." << std::endl;
setvbuf(stdout, NULL, _IONBF, BUFSIZ);
rclcpp::init(argc, argv);
return 0;
We send the setpoints to the controller by giving reference points. The aircraft will then try to maneuver to the given points via its control strategy (usually PID). Therefore, to have a smooth maneuver, it is usually suggested to give a series of discrete points between two waypoints, i.e., trajectory which parameterized by time. It should then solve the abrupt motion of your UAV. I'm no expert, but I hope this helps.
I have some code to draw a line between two points on an image which are selected by mouse, and then to display a histogram.
However, when I press q as required by code I get an error saying R6010 abort() has been called and saying VC++ run time error.
Please advise me how I can find this error.
#include <vector>
#include "opencv2/highgui/highgui.hpp"
#include <opencv\cv.h>
#include <iostream>
using namespace cv;
using namespace std;
struct Data_point
int x;
unsigned short int y;
int PlotMeNow(unsigned short int *values, unsigned int nSamples)
std::vector<Data_point> graph(nSamples);
for (unsigned int i = 0; i < nSamples; i++)
graph[i].x = i;
graph[i].y = values[i];
cv::Size imageSize(5000, 500); // your window size
cv::Mat image(imageSize, CV_8UC1);
if (image.empty()) //check whether the image is valid or not
std::cout << "Error : Image cannot be created..!!" << std::endl;
system("pause"); //wait for a key press
return 0;
std::cout << "Good job : Image created successfully..!!" << std::endl;
// tru to do some ofesseting so the graph do not hide on x or y axis
Data_point dataOffset;
dataOffset.x = 20;
// we have to mirror the y axis!
dataOffset.y = 5000;
for (unsigned int i = 0; i<nSamples; ++i)
graph[i].x = (graph[i].x + dataOffset.x) * 3;
graph[i].y = (graph[i].y + dataOffset.y) / 200;
// draw the samples
for (unsigned int i = 0; i<nSamples - 1; ++i)
cv::Point2f p1;
p1.x = graph[i].x;
p1.y = graph[i].y;
cv::Point2f p2;
p2.x = graph[i + 1].x;
p2.y = graph[i + 1].y;
cv::line(image, p1, p2, 'r', 1, 4, 0);
cv::namedWindow("MyWindow1", CV_WINDOW_AUTOSIZE); //create a window with the name "MyWindow"
cv::imshow("MyWindow1", image); //display the image which is stored in the 'img' in the "MyWindow" window
while (true)
char c = cv::waitKey(10);
if (c == 'q')
destroyWindow("MyWindow"); //destroy the window with the name, "MyWindow"
return 0;
void IterateLine(const Mat& image, vector<ushort>& linePixels, Point p2, Point p1, int* count1)
LineIterator it(image, p2, p1, 8);
for (int i = 0; i < it.count; i++, it++)
linePixels.push_back(image.at<ushort>(it.pos())); //doubt
*count1 = it.count;
//working line with mouse
void onMouse(int evt, int x, int y, int flags, void* param)
std::vector<cv::Point>* ptPtr = (std::vector<cv::Point>*)param;
ptPtr->push_back(cv::Point(x, y));
void drawline(Mat image, std::vector<Point>& points)
cv::namedWindow("Output Window");
cv::setMouseCallback("Output Window", onMouse, (void*)&points);
int X1 = 0, Y1 = 0, X2 = 0, Y2 = 0;
while (1)
cv::imshow("Output Window", image);
if (points.size() > 1) //we have 2 points
for (auto it = points.begin(); it != points.end(); ++it)
//just for testing that we are getting pixel values
X1 = points[0].x;
X2 = points[1].x;
Y1 = points[0].y;
Y2 = points[1].y;
// Draw a line
line(image, Point(X1, Y1), Point(X2, Y2), 'r', 2, 8);
cv::imshow("Output Window", image);
//exit image window
while (true)
char c = cv::waitKey(10);
if (c == 'q')
destroyWindow("Output Window");
void show_histogram_image(Mat img1)
int sbins = 65536;
int histSize[] = { sbins };
float sranges[] = { 0, 65536 };
const float* ranges[] = { sranges };
cv::MatND hist;
int channels[] = { 0 };
cv::calcHist(&img1, 1, channels, cv::Mat(), // do not use mask
hist, 1, histSize, ranges,
true, // the histogram is uniform
double maxVal = 0;
minMaxLoc(hist, 0, &maxVal, 0, 0);
int xscale = 10;
int yscale = 10;
cv::Mat hist_image;
hist_image = cv::Mat::zeros(65536, sbins*xscale, CV_16UC1);
for int s = 0; s < sbins; s++)
float binVal = hist.at<float>(s, 0);
int intensity = cvRound(binVal * 65535 / maxVal);
rectangle(hist_image, cv::Point(s*xscale, hist_image.rows),
cv::Point((s + 1)*xscale - 1, hist_image.rows - intensity),
cv::Scalar::all(65535), 1);
imshow("Histogram", hist_image);
int main()
vector<Point> points1;
Mat img = cvLoadImage("desert.jpg");
if (img.empty()) //check whether the image is valid or not
cout << "Error : Image cannot be read..!!" << endl;
system("pause"); //wait for a key press
return -1;
//Draw the line
drawline(img, points1);
//now check the collected points
Mat img1 = cvLoadImage("desert.jpg");
if (img1.empty()) //check whether the image is valid or not
cout << "Error : Image cannot be read..!!" << endl;
system("pause"); //wait for a key press
return -1;
int *t = new int;
IterateLine( img1, linePixels, points1[1], points1[0], t );
PlotMeNow(&linePixels[0], t[0]);
delete t;
return 0;
This is one of the bad smells in your code:
void IterateLine(const Mat& image, vector<ushort>& linePixels, Point p2, Point p1, int* count1)
linePixels.push_back(image.at<ushort>(it.pos())); //doubt
Now image is a CV_8UC3 image (from Mat img1 = cvLoadImage("desert.jpg");, but you are accessing here like it is CV_16UC1, so what gets put in linePixels is garbage. This will almost certainly cause PlotMeNow() to draw outside its image and corrupt something, which is probably why your code is crashing.
Sine it is very unclear what your code is trying to do, I can't suggest what you should have here instead.
I have just managed to do this, you only have to put "-1" to your loop limit:
for (unsigned int i = 0; i < nSamples-1; i++)
graph[i].x = i;
graph[i].y = values[i];
I'm trying to implement the Particle Swarm Optimization on CUDA. I'm partially initializing data arrays on host, then I allocate memory on CUDA and copy it there, and then try to proceed with the initialization.
The problem is, when I'm trying to modify array element like so
__global__ void kernelInit(
float* X,
size_t pitch,
int width,
float X_high,
float X_low
) {
// Silly, but pretty reliable way to address array elements
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
int r = tid / width;
int c = tid % width;
float* pElement = (float*)((char*)X + r * pitch) + c;
*pElement = *pElement * (X_high - X_low) - X_low;
//*pElement = (X_high - X_low) - X_low;
It corrupts the values and gives me 1.#INF00 as array element. When I uncomment the last line *pElement = (X_high - X_low) - X_low; and comment the previous, it works as expected: I get values like 15.36 and so on.
I believe the problem is either with my memory allocation and copying, and/or with adressing the specific array element. I read the CUDA manual about these both topics, but I can't spot the error: I still get corrupt array if I do anything with the element of the array. For example, *pElement = *pElement * 2 gives unreasonable big results like 779616...00000000.00000 when the initial pElement is expected to be just a float in [0;1].
Here is the full source. Initialization of arrays begins in main (bottom of the source), then f1 function does the work for CUDA and launches the initialization kernel kernelInit:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <cuda.h>
#include <cuda_runtime.h>
const unsigned f_n = 3;
const unsigned n = 2;
const unsigned p = 64;
typedef struct {
unsigned k_max;
float c1;
float c2;
unsigned p;
float inertia_factor;
float Ef;
float X_low[f_n];
float X_high[f_n];
float X_min[n][f_n];
} params_t;
typedef void (*kernelWrapperType) (
float *X,
float *X_highVec,
float *V,
float *X_best,
float *Y,
float *Y_best,
float *X_swarmBest,
bool &termination,
const float &inertia,
const params_t *params,
const unsigned &f
typedef float (*twoArgsFuncType) (
float x1,
float x2
__global__ void kernelInit(
float* X,
size_t pitch,
int width,
float X_high,
float X_low
) {
// Silly, but pretty reliable way to address array elements
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
int r = tid / width;
int c = tid % width;
float* pElement = (float*)((char*)X + r * pitch) + c;
*pElement = *pElement * (X_high - X_low) - X_low;
//*pElement = (X_high - X_low) - X_low;
__device__ float kernelF1(
float x1,
float x2
) {
float y = pow(x1, 2.f) + pow(x2, 2.f);
return y;
void f1(
float *X,
float *X_highVec,
float *V,
float *X_best,
float *Y,
float *Y_best,
float *X_swarmBest,
bool &termination,
const float &inertia,
const params_t *params,
const unsigned &f
) {
float *X_d = NULL;
float *Y_d = NULL;
unsigned length = n * p;
const cudaChannelFormatDesc desc = cudaCreateChannelDesc<float4>();
size_t pitch;
size_t dpitch;
cudaError_t err;
unsigned width = n;
unsigned height = p;
err = cudaMallocPitch (&X_d, &dpitch, width * sizeof(float), height);
pitch = n * sizeof(float);
err = cudaMemcpy2D(X_d, dpitch, X, pitch, width * sizeof(float), height, cudaMemcpyHostToDevice);
err = cudaMalloc (&Y_d, sizeof(float) * p);
err = cudaMemcpy (Y_d, Y, sizeof(float) * p, cudaMemcpyHostToDevice);
dim3 threads; threads.x = 32;
dim3 blocks; blocks.x = (length/threads.x) + 1;
kernelInit<<<threads,blocks>>>(X_d, dpitch, width, params->X_high[f], params->X_low[f]);
err = cudaMemcpy2D(X, pitch, X_d, dpitch, n*sizeof(float), p, cudaMemcpyDeviceToHost);
err = cudaFree(X_d);
err = cudaMemcpy(Y, Y_d, sizeof(float) * p, cudaMemcpyDeviceToHost);
err = cudaFree(Y_d);
float F1(
float x1,
float x2
) {
float y = pow(x1, 2.f) + pow(x2, 2.f);
return y;
* Generates random float in [0.0; 1.0]
float frand(){
return (float)rand()/(float)RAND_MAX;
* This is the main routine which declares and initializes the integer vector, moves it to the device, launches kernel
* brings the result vector back to host and dumps it on the console.
int main() {
const params_t params = {
{-5.12, -2.048, -5.12},
{5.12, 2.048, 5.12},
{{0, 1, 0}, {0, 1, 0}}
float X[p][n];
float X_highVec[n];
float V[p][n];
float X_best[p][n];
float Y[p] = {0};
float Y_best[p] = {0};
float X_swarmBest[n];
kernelWrapperType F_wrapper[f_n] = {&f1, &f1, &f1};
twoArgsFuncType F[f_n] = {&F1, &F1, &F1};
for (unsigned f = 0; f < f_n; f++) {
printf("Optimizing function #%u\n", f);
srand ( time(NULL) );
for (unsigned i = 0; i < p; i++)
for (unsigned j = 0; j < n; j++)
X[i][j] = X_best[i][j] = frand();
for (int i = 0; i < n; i++)
X_highVec[i] = params.X_high[f];
for (unsigned i = 0; i < p; i++)
for (unsigned j = 0; j < n; j++)
V[i][j] = frand();
for (unsigned i = 0; i < p; i++)
Y_best[i] = F[f](X[i][0], X[i][1]);
for (unsigned i = 0; i < n; i++)
X_swarmBest[i] = params.X_high[f];
float y_swarmBest = F[f](X_highVec[0], X_highVec[1]);
bool termination = false;
float inertia = 1.;
for (unsigned k = 0; k < params.k_max; k++) {
F_wrapper[f]((float *)X, X_highVec, (float *)V, (float *)X_best, Y, Y_best, X_swarmBest, termination, inertia, ¶ms, f);
for (unsigned i = 0; i < p; i++)
for (unsigned j = 0; j < n; j++)
printf("%f\t", X[i][j]);
printf("F = %f\n", Y[i]);
Update: I tried adding error handling like so
err = cudaMallocPitch (&X_d, &dpitch, width * sizeof(float), height);
if (err != cudaSuccess) {
fprintf(stderr, cudaGetErrorString(err));
after each API call, but it gave me nothing and didn't return (I still get all the results and program works to the end).
This is an unnecessarily complex piece of code for what should be a simple repro case, but this immediately jumps out:
const unsigned n = 2;
const unsigned p = 64;
unsigned length = n * p
dim3 threads; threads.x = 32;
dim3 blocks; blocks.x = (length/threads.x) + 1;
kernelInit<<<threads,blocks>>>(X_d, dpitch, width, params->X_high[f], params->X_low[f]);
So you are firstly computing the incorrect number of blocks, and then reversing the order of the blocks per grid and threads per block arguments in the kernel launch. That may well lead to out of bounds memory access, either hosing something in GPU memory or causing an unspecified launch failure, which your lack of error handling might not be catching. There is a tool called cuda-memcheck which has been shipped with the toolkit since about CUDA 3.0. If you run it, it will give you valgrind style memory access violation reports. You should get into the habit of using it, if you are not already doing so.
As for infinite values, that is to be expected isn't it? Your code starts with values in (0,1), and then does
X[i] = X[i] * (5.12--5.12) - -5.12
100 times, which is the rough equivalent of multiplying by 10^100, which is then followed by
X[i] = X[i] * (2.048--2.048) - -2.048
100 times, which is the rough equivalent of multiplying by 4^100, finally followed by
X[i] = X[i] * (5.12--5.12) - -5.12
again. So your results should be of the order of 1E250, which is much larger than the maximum 3.4E38 which is the rough upper limit of representable numbers in IEEE 754 single precision.