Received throughput issue with saturated traffic - network-programming

I am using NS3 (v3.13) Wi-Fi model in infrastructure topology configured as follows (simulation file attached):
Single AP (BSS)
Multiple STAs (stations)
Application duration = 10s
Saturated downlink traffic (OnOffApplication with OnTime=2s and OffTime=0) from AP to all STAs
Phy: 802.11a
Default YansWifiChannelHelper and YansWifiPhyHelper
Rate control: ConstantRateWifiManager
Mobility: ConstantPositionMobilityModel (STAs are positioned on a circle of 2 meters radius around the AP)
Although all is going well, for a high bitrate (saturated traffic), when the number of STAs per BSS increases a lot, some STAs don't receive any BYTE !!
Experiments:
OnOffApplication DataRate = 60Mb/s, Phy DataMode=OfdmRate54Mbps and 30 STAs, one STA receives packets with a bitrate of 7.2Mb/s and another with 15.3Mb/s (all other 28 STAs don't receive any BYTE)
OnOffApplication DataRate = 60Mb/s, DataMode=OfdmRate6Mbps and 30 STAs, one STA receives packets with a bitrate of 1.95Mb/s and another with 4.3Mb/s (all other 28 STAs don't receive any BYTE)
I think that the problem comes from the OnOff Application configurations; how should I configure it to simulate a full buffer downlink traffic?
Thanks in advance for any suggestion.
#include "ns3/core-module.h"
#include "ns3/point-to-point-module.h"
#include "ns3/network-module.h"
#include "ns3/applications-module.h"
#include "ns3/wifi-module.h"
#include "ns3/mobility-module.h"
#include "ns3/csma-module.h"
#include "ns3/internet-module.h"
#include "ns3/flow-monitor-helper.h"
#include "ns3/flow-monitor-module.h"
#include "ns3/applications-module.h"
#include "ns3/internet-module.h"
#include "ns3/gnuplot.h"
#include "ns3/constant-velocity-helper.h"
#include "ns3/integer.h"
#include "ns3/mpi-interface.h"
#include "math.h"
#include <iostream>
/**
* PARAMETERS
*/
#define StaNb 30
#define Distance 2
#define Duration 10
#define DataRate 90000000
#define PacketSize 1500
#define couleur(param) printf("\033[%sm",param)
using namespace ns3;
class Experiment {
public:
Experiment();
void CreateArchi(void);
void CreateApplis();
private:
Ptr<ListPositionAllocator> positionAllocAp;
Ptr<ListPositionAllocator> positionAllocSta;
Ptr<GridPositionAllocator> positionAllocStaCouloir;
Ptr<RandomDiscPositionAllocator> positionAllocStaAmphi;
std::vector<Ptr<ConstantPositionMobilityModel> > constant;
NodeContainer m_wifiAP, m_wifiQSta;
NetDeviceContainer m_APDevice;
NetDeviceContainer m_QStaDevice;
YansWifiChannelHelper m_channel;
Ptr<YansWifiChannel> channel;
YansWifiPhyHelper m_phyLayer_Sta, m_phyLayer_AP;
WifiHelper m_wifi;
QosWifiMacHelper m_macSta, m_macAP;
InternetStackHelper m_stack;
Ipv4InterfaceContainer m_StaInterface;
Ipv4InterfaceContainer m_ApInterface;
Ssid m_ssid;
};
Experiment::Experiment() {
positionAllocStaCouloir = CreateObject<GridPositionAllocator>();
positionAllocAp = CreateObject<ListPositionAllocator>();
positionAllocSta = CreateObject<ListPositionAllocator>();
positionAllocStaAmphi = CreateObject<RandomDiscPositionAllocator>();
m_wifi = WifiHelper::Default();
constant.resize(StaNb + 1);
for (int i = 0; i < StaNb + 1; i++) {
constant[i] = CreateObject<ConstantPositionMobilityModel>();
}
}
void Experiment::CreateArchi(void) {
m_wifiQSta.Create(StaNb);
m_wifiAP.Create(1);
m_ssid = Ssid("BSS_circle");
m_channel = YansWifiChannelHelper::Default();
channel = m_channel.Create();
m_wifi.SetStandard(WIFI_PHY_STANDARD_80211a);
m_wifi.SetRemoteStationManager("ns3::ConstantRateWifiManager", "DataMode",
StringValue("OfdmRate6Mbps"));
m_phyLayer_Sta = YansWifiPhyHelper::Default();
m_phyLayer_AP = YansWifiPhyHelper::Default();
m_phyLayer_Sta.SetChannel(channel);
m_phyLayer_AP.SetChannel(channel);
positionAllocAp->Add(Vector3D(0.0, 0.0, 0.0));
MobilityHelper mobilityAp;
mobilityAp.SetPositionAllocator(positionAllocAp);
mobilityAp.SetMobilityModel("ns3::ConstantPositionMobilityModel");
mobilityAp.Install(m_wifiAP.Get(0));
constant[0]->SetPosition(Vector3D(0.0, 0.0, 0.0));
float deltaAngle = 2 * M_PI / StaNb;
float angle = 0.0;
double x = 0.0;
double y = 0.0;
for (int i = 0; i < StaNb; i++) {
x = cos(angle) * Distance;
y = sin(angle) * Distance;
positionAllocSta->Add(Vector3D(x, y, 0.0));
MobilityHelper mobilitySta;
mobilitySta.SetPositionAllocator(positionAllocSta);
mobilitySta.SetMobilityModel("ns3::ConstantPositionMobilityModel");
mobilitySta.Install(m_wifiQSta.Get(i));
constant[i]->SetPosition(Vector3D(x, y, 0.0));
angle += deltaAngle;
}
m_macSta = QosWifiMacHelper::Default();
m_macSta.SetType("ns3::StaWifiMac", "ActiveProbing", BooleanValue(true),
"Ssid", SsidValue(m_ssid));
m_macAP = QosWifiMacHelper::Default();
m_macAP.SetType("ns3::ApWifiMac", "Ssid", SsidValue(m_ssid),
"BeaconInterval", TimeValue(Time(std::string("100ms"))));
m_APDevice.Add(m_wifi.Install(m_phyLayer_AP, m_macAP, m_wifiAP));
for (int i = 0; i < StaNb; i++) {
m_QStaDevice.Add(
m_wifi.Install(m_phyLayer_Sta, m_macSta, m_wifiQSta.Get(i)));
}
m_stack.Install(m_wifiAP);
m_stack.Install(m_wifiQSta);
Ipv4AddressHelper address;
address.SetBase("192.168.1.0", "255.255.255.0");
m_ApInterface.Add(address.Assign(m_APDevice.Get(0)));
for (int i = 0; i < StaNb; i++) {
m_StaInterface.Add(address.Assign(m_QStaDevice.Get(i)));
}
Ipv4GlobalRoutingHelper::PopulateRoutingTables();
}
void Experiment::CreateApplis() {
ApplicationContainer source;
OnOffHelper onoff("ns3::UdpSocketFactory", Address());
onoff.SetAttribute("OnTime", RandomVariableValue(ConstantVariable(2)));
onoff.SetAttribute("OffTime", RandomVariableValue(ConstantVariable(0)));
onoff.SetAttribute("DataRate", StringValue("500kb/s"));
for (int i = 0; i < StaNb; i++) {
AddressValue remoteAddress(
InetSocketAddress(m_StaInterface.GetAddress(i), 5010));
onoff.SetAttribute("Remote", remoteAddress);
source.Add(onoff.Install(m_wifiAP.Get(0)));
source.Start(Seconds(3.0));
source.Stop(Seconds(Duration));
}
ApplicationContainer sinks;
PacketSinkHelper packetSinkHelper("ns3::UdpSocketFactory",
Address(InetSocketAddress(Ipv4Address::GetAny(), 5010)));
for (int i = 0; i < StaNb; i++) {
sinks.Add(packetSinkHelper.Install(m_wifiQSta.Get(i)));
sinks.Start(Seconds(3.0));
sinks.Stop(Seconds(Duration));
}
}
int main(int argc, char *argv[]) {
Experiment exp = Experiment();
Config::SetDefault("ns3::WifiRemoteStationManager::RtsCtsThreshold",
StringValue("2346"));
exp.CreateArchi();
exp.CreateApplis();
FlowMonitorHelper flowmon;
Ptr<FlowMonitor> monitor = flowmon.InstallAll();
Simulator::Stop(Seconds(Duration));
Simulator::Run();
monitor->CheckForLostPackets();
Ptr<Ipv4FlowClassifier> classifier = DynamicCast<Ipv4FlowClassifier>(
flowmon.GetClassifier());
std::map<FlowId, FlowMonitor::FlowStats> stats = monitor->GetFlowStats();
int c = 0;
for (std::map<FlowId, FlowMonitor::FlowStats>::const_iterator i =
stats.begin(); i != stats.end(); ++i) {
Ipv4FlowClassifier::FiveTuple t = classifier->FindFlow(i->first);
std::cout << "Flux " << i->first << " (" << t.sourceAddress << " -> "
<< t.destinationAddress << ")\n";
std::cout << " Tx Bytes : " << i->second.txBytes << "\n";
std::cout << " Rx Bytes : " << i->second.rxBytes << "\n";
couleur("33");
std::cout << " Bitrate : "
<< i->second.rxBytes * 8.0
/ (i->second.timeLastRxPacket.GetSeconds()
- i->second.timeFirstRxPacket.GetSeconds())
/ 1000000 << " Mbps\n\n";
couleur("0");
if (i->second.rxBytes > 0)
c++;
}
std::cout << " Number of receiving nodes : " << c << "\n";
Simulator::Destroy();
}

I think the medium is too busy.
You need to tuning down onoff datarate e.g 1 mbps. Practically, full buffer 720p video only need no more than 1mbps
You may also check tracing using pcap, ascii or netanim too see either the packet dropping, packet never being send or bug in your code

Related

I am trying to have stable flight with px4 and ros2 offboard control

Hello guys I have a offboard code which give about 50 setpoints to drone. It draws spiral with that setpoints. My problem is I couldnt get smooth travel. In every setpoint drone gives a high roll or pitch instant and then floats to the next setpoint. Is there a way to have stable velocity while passing the setpoints. Here is the code:
#include <px4_msgs/msg/offboard_control_mode.hpp>
#include <px4_msgs/msg/trajectory_setpoint.hpp>
#include <px4_msgs/msg/timesync.hpp>
#include <px4_msgs/msg/vehicle_command.hpp>
#include <px4_msgs/msg/vehicle_control_mode.hpp>
#include <px4_msgs/msg/vehicle_local_position.hpp>
#include <rclcpp/rclcpp.hpp>
#include <stdint.h>
#include <chrono>
#include <iostream>
#include "std_msgs/msg/string.hpp"
#include <math.h>
float X;
float Y;
using namespace std::chrono;
using namespace std::chrono_literals;
using namespace px4_msgs::msg;
class setpoint : public rclcpp::Node {
public:
setpoint() : Node("setpoint") {
offboard_control_mode_publisher_ =
this->create_publisher<OffboardControlMode>("fmu/offboard_control_mode/in", 10);
trajectory_setpoint_publisher_ =
this->create_publisher<TrajectorySetpoint>("fmu/trajectory_setpoint/in", 10);
vehicle_command_publisher_ =
this->create_publisher<VehicleCommand>("fmu/vehicle_command/in", 10);
// get common timestamp
timesync_sub_ =
this->create_subscription<px4_msgs::msg::Timesync>("fmu/timesync/out", 10,
[this](const px4_msgs::msg::Timesync::UniquePtr msg) {
timestamp_.store(msg->timestamp);
});
offboard_setpoint_counter_ = 0;
auto sendCommands = [this]() -> void {
if (offboard_setpoint_counter_ == 10) {
// Change to Offboard mode after 10 setpoints
this->publish_vehicle_command(VehicleCommand::VEHICLE_CMD_DO_SET_MODE, 1, 6);
// Arm the vehicle
this->arm();
}
//-------------
subscription_ = this->create_subscription<px4_msgs::msg::VehicleLocalPosition>(
"/fmu/vehicle_local_position/out",
#ifdef ROS_DEFAULT_API
10,
#endif
[this](const px4_msgs::msg::VehicleLocalPosition::UniquePtr msg) {
X = msg->x;
Y = msg->y;
std::cout << "\n\n\n\n\n\n\n\n\n\n";
std::cout << "RECEIVED VEHICLE GPS POSITION DATA" << std::endl;
std::cout << "==================================" << std::endl;
std::cout << "ts: " << msg->timestamp << std::endl;
//std::cout << "lat: " << msg->x << std::endl;
//std::cout << "lon: " << msg->y << std::endl;
std::cout << "lat: " << X << std::endl;
std::cout << "lon: " << Y << std::endl;
std::cout << "waypoint: " << waypoints[waypointIndex][0] << std::endl;
std::cout << "waypoint: " << waypoints[waypointIndex][1] << std::endl;
if((waypoints[waypointIndex][0] + 0.3 > X && waypoints[waypointIndex][0] - 0.3 < X)&&(waypoints[waypointIndex][1] + 0.3 > Y && waypoints[waypointIndex][1] - 0.3 < Y)){
waypointIndex++;
if (waypointIndex >= waypoints.size())
exit(0);
//waypointIndex = 0;
RCLCPP_INFO(this->get_logger(), "Next waypoint: %.2f %.2f %.2f", waypoints[waypointIndex][0], waypoints[waypointIndex][1], waypoints[waypointIndex][2]);
}
});
//--------------
// offboard_control_mode needs to be paired with trajectory_setpoint
publish_offboard_control_mode();
publish_trajectory_setpoint();
// stop the counter after reaching 11
if (offboard_setpoint_counter_ < 11) {
offboard_setpoint_counter_++;
}
};
/*
auto nextWaypoint = [this]() -> void {
waypointIndex++;
if (waypointIndex >= waypoints.size())
waypointIndex = 0;
RCLCPP_INFO(this->get_logger(), "Next waypoint: %.2f %.2f %.2f", waypoints[waypointIndex][0], waypoints[waypointIndex][1], waypoints[waypointIndex][2]);
};
*/
commandTimer = this->create_wall_timer(100ms, sendCommands);
//waypointTimer = this->create_wall_timer(2s, nextWaypoint); //EA
}
void arm() const;
void disarm() const;
void topic_callback() const;
private:
std::vector<std::vector<float>> waypoints = {{0,0,-5,},
{2,0,-5,},
{2.35216,0.476806,-5,},
{2.57897,1.09037,-5,},
{2.64107,1.80686,-5,},
{2.50814,2.58248,-5,},
{2.16121,3.36588,-5,},
{1.59437,4.10097,-5,},
{0.815842,4.73016,-5,},
{-0.151838,5.19778,-5,},
{-1.27233,5.45355,-5,},
{-2.49688,5.45578,-5,},
{-3.76641,5.17438,-5,},
{-5.01428,4.59315,-5,},
{-6.1696,3.71161,-5,},
{-7.16089,2.54591,-5,},
{-7.91994,1.12896,-5,},
{-8.38568,-0.490343,-5,},
{-8.50782,-2.24876,-5,},
{-8.25018,-4.07119,-5,},
{-7.59329,-5.87384,-5,},
{-6.53644,-7.56803,-5,},
{-5.09871,-9.06439,-5,},
{-3.31919,-10.2773,-5,},
{-1.25611,-11.1293,-5,},
{1.01499,-11.5555,-5,},
{3.40395,-11.5071,-5,},
{5.8096,-10.9548,-5,},
{8.12407,-9.89139,-5,},
{10.2375,-8.33272,-5,},
{12.0431,-6.31859,-5,},
{13.4424,-3.91182,-5,},
{14.3502,-1.19649,-5,},
{14.6991,1.72493,-5,},
{14.4435,4.73543,-5,},
{13.5626,7.70817,-5,},
{12.0624,10.5118,-5,},
{9.97696,13.0162,-5,},
{7.36759,15.0983,-5,},
{4.32167,16.6482,-5,},
{0.949612,17.5744,-5,},
{-2.619,17.8084,-5,},
{-6.24045,17.3094,-5,},
{-9.76262,16.0665,-5,},
{-13.0314,14.1004,-5,},
{-15.8974,11.4644,-5,},
{-18.2226,8.24237,-5,},
{-19.8868,4.54696,-5,},
{-20.7936,0.515337,-5,},
{-20.8754,-3.69574,-5,},
{-20.0972,-7.91595,-5,},
{-20.8754,-3.69574,-5,},
{-20.7936,0.515337,-5,},
{-19.8868,4.54696,-5,},
{-18.2226,8.24237,-5,},
{-15.8974,11.4644,-5,},
{-13.0314,14.1004,-5,},
{-9.76262,16.0665,-5,},
{-6.24045,17.3094,-5,},
{-2.619,17.8084,-5,},
{0.949612,17.5744,-5,},
{4.32167,16.6482,-5,},
{7.36759,15.0983,-5,},
{9.97696,13.0162,-5,},
{12.0624,10.5118,-5,},
{13.5626,7.70817,-5,},
{14.4435,4.73543,-5,},
{14.6991,1.72493,-5,},
{14.3502,-1.19649,-5,},
{13.4424,-3.91182,-5,},
{12.0431,-6.31859,-5,},
{10.2375,-8.33272,-5,},
{8.12407,-9.89139,-5,},
{5.8096,-10.9548,-5,},
{3.40395,-11.5071,-5,},
{1.01499,-11.5555,-5,},
{-1.25611,-11.1293,-5,},
{-3.31919,-10.2773,-5,},
{-5.09871,-9.06439,-5,},
{-6.53644,-7.56803,-5,},
{-7.59329,-5.87384,-5,},
{-8.25018,-4.07119,-5,},
{-8.50782,-2.24876,-5,},
{-8.38568,-0.490343,-5,},
{-7.91994,1.12896,-5,},
{-7.16089,2.54591,-5,},
{-6.1696,3.71161,-5,},
{-5.01428,4.59315,-5,},
{-3.76641,5.17438,-5,},
{-2.49688,5.45578,-5,},
{-1.27233,5.45355,-5,},
{-0.151838,5.19778,-5,},
{0.815842,4.73016,-5,},
{1.59437,4.10097,-5,},
{2.16121,3.36588,-5,},
{2.50814,2.58248,-5,},
{2.64107,1.80686,-5,},
{2.57897,1.09037,-5,},
{2.35216,0.476806,-5,},
{2,0,-5,},
{0,0,-5,},
{0,0,0,}
}; // Land
int waypointIndex = 0;
rclcpp::TimerBase::SharedPtr commandTimer;
rclcpp::TimerBase::SharedPtr waypointTimer;
rclcpp::Publisher<OffboardControlMode>::SharedPtr offboard_control_mode_publisher_;
rclcpp::Publisher<TrajectorySetpoint>::SharedPtr trajectory_setpoint_publisher_;
rclcpp::Publisher<VehicleCommand>::SharedPtr vehicle_command_publisher_;
rclcpp::Subscription<px4_msgs::msg::Timesync>::SharedPtr timesync_sub_;
//
rclcpp::Subscription<px4_msgs::msg::VehicleLocalPosition>::SharedPtr subscription_;
//
std::atomic<uint64_t> timestamp_; //!< common synced timestamped
uint64_t offboard_setpoint_counter_; //!< counter for the number of setpoints sent
void publish_offboard_control_mode() const;
void publish_trajectory_setpoint() const;
void publish_vehicle_command(uint16_t command, float param1 = 0.0,
float param2 = 0.0) const;
};
void setpoint::arm() const {
publish_vehicle_command(VehicleCommand::VEHICLE_CMD_COMPONENT_ARM_DISARM, 1.0);
RCLCPP_INFO(this->get_logger(), "Arm command send");
}
void setpoint::disarm() const {
publish_vehicle_command(VehicleCommand::VEHICLE_CMD_COMPONENT_ARM_DISARM, 0.0);
RCLCPP_INFO(this->get_logger(), "Disarm command send");
}
void setpoint::publish_offboard_control_mode() const {
OffboardControlMode msg{};
msg.timestamp = timestamp_.load();
msg.position = true;
msg.velocity = false;
msg.acceleration = false;
msg.attitude = false;
msg.body_rate = false;
offboard_control_mode_publisher_->publish(msg);
}
void setpoint::publish_trajectory_setpoint() const {
TrajectorySetpoint msg{};
msg.timestamp = timestamp_.load();
msg.position = {waypoints[waypointIndex][0],waypoints[waypointIndex][1],waypoints[waypointIndex][2]};
msg.yaw = std::nanf("0"); //-3.14; // [-PI:PI]
trajectory_setpoint_publisher_->publish(msg);
}
void setpoint::publish_vehicle_command(uint16_t command, float param1,
float param2) const {
VehicleCommand msg{};
msg.timestamp = timestamp_.load();
msg.param1 = param1;
msg.param2 = param2;
msg.command = command;
msg.target_system = 1;
msg.target_component = 1;
msg.source_system = 1;
msg.source_component = 1;
msg.from_external = true;
vehicle_command_publisher_->publish(msg);
}
int main(int argc, char* argv[]) {
std::cout << "Starting setpoint node..." << std::endl;
setvbuf(stdout, NULL, _IONBF, BUFSIZ);
rclcpp::init(argc, argv);
rclcpp::spin(std::make_shared<setpoint>());
rclcpp::shutdown();
return 0;
}
We send the setpoints to the controller by giving reference points. The aircraft will then try to maneuver to the given points via its control strategy (usually PID). Therefore, to have a smooth maneuver, it is usually suggested to give a series of discrete points between two waypoints, i.e., trajectory which parameterized by time. It should then solve the abrupt motion of your UAV. I'm no expert, but I hope this helps.

why openmp is not working faster?

I am using openmp to convert an image from RGB to gray scale.But openmp is not faster than normal, I wonder why? why single thread is faster than multi-threads? How to accelerate the speed?
here is the code:
#include <iostream>
#include <omp.h>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
int main() {
Mat image = imread("../family.jpg");
Mat gray(image.rows, image.cols, CV_8UC1);
time_t start, stop;
start = clock();
for (int i = 0; i < image.rows; i++)
for (int j = 0; j < image.cols; j++) {
gray.at<uchar>(i,j) = image.at<cv::Vec3b>(i,j)[0]*0.114
+ image.at<cv::Vec3b>(i,j)[1]*0.587
+ image.at<cv::Vec3b>(i,j)[2]*0.299;
}
stop = clock() - start;
cout << "time: " << stop * 1.0 / CLOCKS_PER_SEC << endl;
start = clock();
omp_set_num_threads(4);
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < image.rows; i++)
for (int j = 0; j < image.cols; j++) {
gray.at<uchar>(i,j) = image.at<cv::Vec3b>(i,j)[0]*0.114
+ image.at<cv::Vec3b>(i,j)[1]*0.587
+ image.at<cv::Vec3b>(i,j)[2]*0.299;
}
}
stop = clock() - start;
cout << "time: " << stop * 1.0 / CLOCKS_PER_SEC << endl;
return 0;
}
result:
normal time: 0.035253
openmp time: 0.069894

How to turn OpenCV_GPUMat into CUdeviceptr?

I was modiying the NVTranscoder project from the Video_Codec_SDK_8.0.14 in order to adding some signal processing works into the video frames.
However, I encounter some problems when I turn the GPUMat into CUdeviceptr.
I was wondering how can I turn the GPUMat into CUdeviceptr. After I performed the blurring function where I have emphasized as below, I want to turn the processed mat into a CUdeviceptr.
Besides, the part converting the CUdeviceptr into GPUmat is also wrong, as it shows the gpuInput cannot read memory.
Besides, can anyone point out some problems of my implementation?
The code is as follows:
#include <time.h>
#ifdef _WIN32
#include <windows.h>
#else
#include <pthread.h>
#endif
#include <stdio.h>
#include <string.h>
#include "dynlink_cuda.h" // <cuda.h>
#include "VideoDecoder.h"
#include "VideoEncoder.h"
#include "../common/inc/nvUtils.h"
#include <opencv2/opencv.hpp>
#include "opencv2/gpu/gpu.hpp"
using namespace cv;
#ifdef _WIN32
DWORD WINAPI DecodeProc(LPVOID lpParameter)
{
CudaDecoder* pDecoder = (CudaDecoder*)lpParameter;
pDecoder->Start();
return 0;
}
#else
void* DecodeProc(void *arg)
{
CudaDecoder* pDecoder = (CudaDecoder*)arg;
pDecoder->Start();
return NULL;
}
#endif
int MatchFPS(const float fpsRatio, int decodedFrames, int encodedFrames)
{
if (fpsRatio < 1.f) {
// need to drop frame
if (decodedFrames * fpsRatio < (encodedFrames + 1)) {
return -1;
}
}
else if (fpsRatio > 1.f) {
// need to duplicate frame
int duplicate = 0;
while (decodedFrames*fpsRatio > encodedFrames + duplicate + 1) {
duplicate++;
}
return duplicate;
}
return 0;
}
void PrintHelp()
{
printf("Usage : NvTranscoder \n"
"-i <string> Specify input .h264 file\n"
"-o <string> Specify output bitstream file\n"
"\n### Optional parameters ###\n"
"-size <int int> Specify output resolution <width height>\n"
"-codec <integer> Specify the codec \n"
" 0: H264\n"
" 1: HEVC\n"
"-preset <string> Specify the preset for encoder settings\n"
" hq : nvenc HQ \n"
" hp : nvenc HP \n"
" lowLatencyHP : nvenc low latency HP \n"
" lowLatencyHQ : nvenc low latency HQ \n"
" lossless : nvenc Lossless HP \n"
"-fps <integer> Specify encoding frame rate\n"
"-goplength <integer> Specify gop length\n"
"-numB <integer> Specify number of B frames\n"
"-bitrate <integer> Specify the encoding average bitrate\n"
"-vbvMaxBitrate <integer> Specify the vbv max bitrate\n"
"-vbvSize <integer> Specify the encoding vbv/hrd buffer size\n"
"-rcmode <integer> Specify the rate control mode\n"
" 0: Constant QP mode\n"
" 1: Variable bitrate mode\n"
" 2: Constant bitrate mode\n"
" 8: low-delay CBR, high quality\n"
" 16: CBR, high quality (slower)\n"
" 32: VBR, high quality (slower)\n"
"-qp <integer> Specify qp for Constant QP mode\n"
"-i_qfactor <float> Specify qscale difference between I-frames and P-frames\n"
"-b_qfactor <float> Specify qscale difference between P-frames and B-frames\n"
"-i_qoffset <float> Specify qscale offset between I-frames and P-frames\n"
"-b_qoffset <float> Specify qscale offset between P-frames and B-frames\n"
"-deviceID <integer> Specify the GPU device on which encoding will take place\n"
"-help Prints Help Information\n\n"
);
}
int main(int argc, char* argv[])
{
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
typedef HMODULE CUDADRIVER;
#else
typedef void *CUDADRIVER;
#endif
CUDADRIVER hHandleDriver = 0;
__cu(cuInit(0, __CUDA_API_VERSION, hHandleDriver));
__cu(cuvidInit(0));
EncodeConfig encodeConfig = { 0 };
encodeConfig.endFrameIdx = INT_MAX;
encodeConfig.bitrate = 5000000;
encodeConfig.rcMode = NV_ENC_PARAMS_RC_CONSTQP;
encodeConfig.gopLength = NVENC_INFINITE_GOPLENGTH;
encodeConfig.codec = NV_ENC_H264;
encodeConfig.fps = 0;
encodeConfig.qp = 28;
encodeConfig.i_quant_factor = DEFAULT_I_QFACTOR;
encodeConfig.b_quant_factor = DEFAULT_B_QFACTOR;
encodeConfig.i_quant_offset = DEFAULT_I_QOFFSET;
encodeConfig.b_quant_offset = DEFAULT_B_QOFFSET;
encodeConfig.presetGUID = NV_ENC_PRESET_DEFAULT_GUID;
encodeConfig.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
NVENCSTATUS nvStatus = CNvHWEncoder::ParseArguments(&encodeConfig, argc, argv);
if (nvStatus != NV_ENC_SUCCESS)
{
PrintHelp();
return 1;
}
if (!encodeConfig.inputFileName || !encodeConfig.outputFileName)
{
PrintHelp();
return 1;
}
encodeConfig.fOutput = fopen(encodeConfig.outputFileName, "wb");
if (encodeConfig.fOutput == NULL)
{
PRINTERR("Failed to create \"%s\"\n", encodeConfig.outputFileName);
return 1;
}
//init cuda
CUcontext cudaCtx;
CUdevice device;
__cu(cuDeviceGet(&device, encodeConfig.deviceID));
__cu(cuCtxCreate(&cudaCtx, CU_CTX_SCHED_AUTO, device));
CUcontext curCtx;
CUvideoctxlock ctxLock;
__cu(cuCtxPopCurrent(&curCtx));
__cu(cuvidCtxLockCreate(&ctxLock, curCtx));
CudaDecoder* pDecoder = new CudaDecoder;
FrameQueue* pFrameQueue = new CUVIDFrameQueue(ctxLock);
pDecoder->InitVideoDecoder(encodeConfig.inputFileName, ctxLock, pFrameQueue, encodeConfig.width, encodeConfig.height);
int decodedW, decodedH, decodedFRN, decodedFRD, isProgressive;
pDecoder->GetCodecParam(&decodedW, &decodedH, &decodedFRN, &decodedFRD, &isProgressive);
if (decodedFRN <= 0 || decodedFRD <= 0) {
decodedFRN = 30;
decodedFRD = 1;
}
if(encodeConfig.width <= 0 || encodeConfig.height <= 0) {
encodeConfig.width = decodedW;
encodeConfig.height = decodedH;
}
float fpsRatio = 1.f;
if (encodeConfig.fps <= 0) {
encodeConfig.fps = decodedFRN / decodedFRD;
}
else {
fpsRatio = (float)encodeConfig.fps * decodedFRD / decodedFRN;
}
encodeConfig.pictureStruct = (isProgressive ? NV_ENC_PIC_STRUCT_FRAME : 0);
pFrameQueue->init(encodeConfig.width, encodeConfig.height);
VideoEncoder* pEncoder = new VideoEncoder(ctxLock);
assert(pEncoder->GetHWEncoder());
nvStatus = pEncoder->GetHWEncoder()->Initialize(cudaCtx, NV_ENC_DEVICE_TYPE_CUDA);
if (nvStatus != NV_ENC_SUCCESS)
return 1;
encodeConfig.presetGUID = pEncoder->GetHWEncoder()->GetPresetGUID(encodeConfig.encoderPreset, encodeConfig.codec);
printf("Encoding input : \"%s\"\n", encodeConfig.inputFileName);
printf(" output : \"%s\"\n", encodeConfig.outputFileName);
printf(" codec : \"%s\"\n", encodeConfig.codec == NV_ENC_HEVC ? "HEVC" : "H264");
printf(" size : %dx%d\n", encodeConfig.width, encodeConfig.height);
printf(" bitrate : %d bits/sec\n", encodeConfig.bitrate);
printf(" vbvMaxBitrate : %d bits/sec\n", encodeConfig.vbvMaxBitrate);
printf(" vbvSize : %d bits\n", encodeConfig.vbvSize);
printf(" fps : %d frames/sec\n", encodeConfig.fps);
printf(" rcMode : %s\n", encodeConfig.rcMode == NV_ENC_PARAMS_RC_CONSTQP ? "CONSTQP" :
encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR ? "VBR" :
encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR ? "CBR" :
encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR_MINQP ? "VBR MINQP (deprecated)" :
encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ ? "CBR_LOWDELAY_HQ" :
encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR_HQ ? "CBR_HQ" :
encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR_HQ ? "VBR_HQ" : "UNKNOWN");
if (encodeConfig.gopLength == NVENC_INFINITE_GOPLENGTH)
printf(" goplength : INFINITE GOP \n");
else
printf(" goplength : %d \n", encodeConfig.gopLength);
printf(" B frames : %d \n", encodeConfig.numB);
printf(" QP : %d \n", encodeConfig.qp);
printf(" preset : %s\n", (encodeConfig.presetGUID == NV_ENC_PRESET_LOW_LATENCY_HQ_GUID) ? "LOW_LATENCY_HQ" :
(encodeConfig.presetGUID == NV_ENC_PRESET_LOW_LATENCY_HP_GUID) ? "LOW_LATENCY_HP" :
(encodeConfig.presetGUID == NV_ENC_PRESET_HQ_GUID) ? "HQ_PRESET" :
(encodeConfig.presetGUID == NV_ENC_PRESET_HP_GUID) ? "HP_PRESET" :
(encodeConfig.presetGUID == NV_ENC_PRESET_LOSSLESS_HP_GUID) ? "LOSSLESS_HP" : "LOW_LATENCY_DEFAULT");
printf("\n");
nvStatus = pEncoder->GetHWEncoder()->CreateEncoder(&encodeConfig);
if (nvStatus != NV_ENC_SUCCESS)
return 1;
nvStatus = pEncoder->AllocateIOBuffers(&encodeConfig);
if (nvStatus != NV_ENC_SUCCESS)
return 1;
unsigned long long lStart, lEnd, lFreq;
NvQueryPerformanceCounter(&lStart);
//start decoding thread
#ifdef _WIN32
HANDLE decodeThread = CreateThread(NULL, 0, DecodeProc, (LPVOID)pDecoder, 0, NULL);
#else
pthread_t pid;
pthread_create(&pid, NULL, DecodeProc, (void*)pDecoder);
#endif
//start encoding thread
int frmProcessed = 0;
int frmActual = 0;
while(!(pFrameQueue->isEndOfDecode() && pFrameQueue->isEmpty()) ) {
CUVIDPARSERDISPINFO pInfo;
if(pFrameQueue->dequeue(&pInfo)) {
CUdeviceptr dMappedFrame = 0;
unsigned int pitch;
CUVIDPROCPARAMS oVPP = { 0 };
oVPP.progressive_frame = pInfo.progressive_frame;
oVPP.second_field = 0;
oVPP.top_field_first = pInfo.top_field_first;
oVPP.unpaired_field = (pInfo.progressive_frame == 1 || pInfo.repeat_first_field <= 1);
cuvidMapVideoFrame(pDecoder->GetDecoder(), pInfo.picture_index, &dMappedFrame, &pitch, &oVPP);
vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
gpu::GpuMat gpuInput = gpu::GpuMat(decodedH, decodedW, CV_8UC3, (void*)dMappedFrame, pitch);
gpu::GpuMat d_dst;
gpu::GpuMat d_buf;
gpu::GaussianBlur(gpuInput, d_dst, cv::Size(3, 3), 0);
cv::Mat result;
d_dst.download(result);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
EncodeFrameConfig stEncodeConfig = { 0 };
NV_ENC_PIC_STRUCT picType = (pInfo.progressive_frame || pInfo.repeat_first_field >= 2 ? NV_ENC_PIC_STRUCT_FRAME :
(pInfo.top_field_first ? NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM : NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP));
vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
stEncodeConfig.dptr = result.data;//dMappedFrame;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
stEncodeConfig.pitch = pitch;
stEncodeConfig.width = encodeConfig.width;
stEncodeConfig.height = encodeConfig.height;
int dropOrDuplicate = MatchFPS(fpsRatio, frmProcessed, frmActual);
for (int i = 0; i <= dropOrDuplicate; i++) {
pEncoder->EncodeFrame(&stEncodeConfig, picType);
frmActual++;
}
frmProcessed++;
vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
cuvidUnmapVideoFrame(pDecoder->GetDecoder(), dMappedFrame);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
pFrameQueue->releaseFrame(&pInfo);
}
}
pEncoder->EncodeFrame(NULL, NV_ENC_PIC_STRUCT_FRAME, true);
#ifdef _WIN32
WaitForSingleObject(decodeThread, INFINITE);
#else
pthread_join(pid, NULL);
#endif
if (pEncoder->GetEncodedFrames() > 0)
{
NvQueryPerformanceCounter(&lEnd);
NvQueryPerformanceFrequency(&lFreq);
double elapsedTime = (double)(lEnd - lStart)/(double)lFreq;
printf("Total time: %fms, Decoded Frames: %d, Encoded Frames: %d, Average FPS: %f\n",
elapsedTime * 1000,
pDecoder->m_decodedFrames,
pEncoder->GetEncodedFrames(),
(float)pEncoder->GetEncodedFrames() / elapsedTime);
}
pEncoder->Deinitialize();
delete pDecoder;
delete pEncoder;
delete pFrameQueue;
cuvidCtxLockDestroy(ctxLock);
__cu(cuCtxDestroy(cudaCtx));
return 0;
}
I run the argument "-i C:\test\input.h264 -o C:\test\output.h264 -size 352 288"
The decoded frame is in NV12 format.
What is your exactly the cuda error code returned ? can you check for cuda Errors and post it : this post will help you
Is the member dptr waiting for a device pointer and you are giving a pointer on data stored on the host (cv::Mat instead of cv::cuda::GpuMat)?
can you try this
stEncodeConfig.dptr = dst.data;
As I pointed out in the comments of your linked post, the decoded frame is in raw format (i.e YUV etc). You're providing CV_8UC3 image format which is incorrect.
To test whether GpuMat is being created from CUdeviceptr:
cv::cuda::GpuMat dimg(cv::Size(decodedW, decodedH),CV_8UC1, (void*)(dMappedFrame),pitch);
cv::Mat img;
dimg.download(img);
cv::imshow("Decoded Frame", img);
cv::waitKey(1);
For further details, see this link.

How to increase BatchSize with Tensorflow's C++ API?

I took the code in https://gist.github.com/kyrs/9adf86366e9e4f04addb (which takes an opencv cv::Mat image as input and converts it to tensor) and I use it to label images with the model inception_v3_2016_08_28_frozen.pb stated in the Tensorflow tutorial (https://www.tensorflow.org/tutorials/image_recognition#usage_with_the_c_api). Everything worked fine when using a batchsize of 1. However, when I increase the batchsize to 2 (or greater), the size of
finalOutput (which is of type std::vector) is zero.
Here's the code to reproduce the error:
// Only for VisualStudio
#define COMPILER_MSVC
#define NOMINMAX
#include <string>
#include <iostream>
#include <fstream>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/framework/tensor.h"
int batchSize = 2;
int height = 299;
int width = 299;
int depth = 3;
int mean = 0;
int stdev = 255;
// Set image paths
cv::String pathFilenameImg1 = "D:/IMGS/grace_hopper.jpg";
cv::String pathFilenameImg2 = "D:/IMGS/lenna.jpg";
// Set model paths
std::string graphFile = "D:/Tensorflow/models/inception_v3_2016_08_28_frozen.pb";
std::string labelfile = "D:/Tensorflow/models/imagenet_slim_labels.txt";
std::string InputName = "input";
std::string OutputName = "InceptionV3/Predictions/Reshape_1";
void read_prepare_image(cv::String pathImg, cv::Mat &imgPrepared) {
// Read Color image:
cv::Mat imgBGR = cv::imread(pathImg);
// Now we resize the image to fit Model's expected sizes:
cv::Size s(height, width);
cv::Mat imgResized;
cv::resize(imgBGR, imgResized, s, 0, 0, cv::INTER_CUBIC);
// Convert the image to float and normalize data:
imgResized.convertTo(imgPrepared, CV_32FC1);
imgPrepared = imgPrepared - mean;
imgPrepared = imgPrepared / stdev;
}
int main()
{
// Read and prepare images using OpenCV:
cv::Mat img1, img2;
read_prepare_image(pathFilenameImg1, img1);
read_prepare_image(pathFilenameImg2, img2);
// creating a Tensor for storing the data
tensorflow::Tensor input_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({ batchSize, height, width, depth }));
auto input_tensor_mapped = input_tensor.tensor<float, 4>();
// Copy images data into the tensor:
for (int b = 0; b < batchSize; ++b) {
const float * source_data;
if (b == 0)
source_data = (float*)img1.data;
else
source_data = (float*)img2.data;
for (int y = 0; y < height; ++y) {
const float* source_row = source_data + (y * width * depth);
for (int x = 0; x < width; ++x) {
const float* source_pixel = source_row + (x * depth);
const float* source_B = source_pixel + 0;
const float* source_G = source_pixel + 1;
const float* source_R = source_pixel + 2;
input_tensor_mapped(b, y, x, 0) = *source_R;
input_tensor_mapped(b, y, x, 1) = *source_G;
input_tensor_mapped(b, y, x, 2) = *source_B;
}
}
}
// Load the graph:
tensorflow::GraphDef graph_def;
ReadBinaryProto(tensorflow::Env::Default(), graphFile, &graph_def);
// create a session with the graph
std::unique_ptr<tensorflow::Session> session_inception(tensorflow::NewSession(tensorflow::SessionOptions()));
session_inception->Create(graph_def);
// run the loaded graph
std::vector<tensorflow::Tensor> finalOutput;
session_inception->Run({ { InputName,input_tensor } }, { OutputName }, {}, &finalOutput);
// Get Top 5 classes:
std::cerr << "final output size = " << finalOutput.size() << std::endl;
tensorflow::Tensor output = std::move(finalOutput.at(0));
auto scores = output.flat<float>();
std::cerr << "scores size=" << scores.size() << std::endl;
std::ifstream label(labelfile);
std::string line;
std::vector<std::pair<float, std::string>> sorted;
for (unsigned int i = 0; i <= 1000; ++i) {
std::getline(label, line);
sorted.emplace_back(scores(i), line);
}
std::sort(sorted.begin(), sorted.end());
std::reverse(sorted.begin(), sorted.end());
std::cout << "size of the sorted file is " << sorted.size() << std::endl;
for (unsigned int i = 0; i< 5; ++i)
std::cout << "The output of the current graph has category " << sorted[i].second << " with probability " << sorted[i].first << std::endl;
}
Do I miss anything? Any ideas?
Thanks in advance!
I had the same problem. When I changed to the model used in https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/benchmark (differente version of inception) bigger batch sizes work correctly.
Notice you need to change the input size from 299,299,3 to 224,224,3 and the input and output layer names to: input:0 and output:0
Probably the graph in the protobuf file had a fixed batch size of 1 and I was only changing the shape of the input, not the graph. The graph has to accept a variable batch size by setting the shape to (None, width, heihgt, channels). This is done when you freeze the graph. Since the graph we have is already frozen, there is no way to change the batch size at this point.

Openni opencv kinect Bad Memory allocation

Basically I've got a loop which goes through all the kinects depth pixels. If they are greater than 3000mm it sets the pixel value to black.
For some reason this works only at a close range while pointed to a wall. If I pull the kinect back (giving it a larger area to scan) I get a Bad Memory allocation error. My code can be found below. I get the bad memory allocation error inside that try catch statement. Most of the code is from the opencv kinect sample here and here.
i figured out the problem, its because the depth values are stored in an array instead of matrix, i need a better way of finding out which location in the array, the x.y of the pixels which start from 1,1 point to instead of the (i = x+y*640)
#include <opencv.hpp>
#include <iostream>
#include <string>
#include <stdio.h>
#include <OpenNI.h>
using namespace std;
using namespace cv;
int main()
{
openni::Device device;
openni::VideoStream depth;
const char* device_uri = openni::ANY_DEVICE;
openni::Status ret = openni::OpenNI::initialize();
// Open
ret =device.open( device_uri );
ret = depth.create( device, openni::SENSOR_DEPTH );
if ( ret == openni::STATUS_OK )
{
// Start Depth
depth.start();
}
// Get Depth Stream Min-Max Value
int minDepthValue = depth.getMinPixelValue();
int maxDepthValue = depth.getMaxPixelValue();
//cout << "Depth min-Max Value : " << minDepthValue << "-" << maxDepthValue << endl;
// Frame Information Reference
openni::VideoFrameRef depthFrame;
// Get Sensor Resolution Information
int dImgWidth = depth.getVideoMode().getResolutionX();
int dImgHeight = depth.getVideoMode().getResolutionY();
// Depth Image Matrix
cv::Mat dImg = cv::Mat( dImgHeight, dImgWidth, CV_8UC3 );
Mat grey= cvCreateImage(cvSize(640, 480), 8, 1); ;
for(;;)
{
depth.readFrame( &depthFrame );
openni::DepthPixel* depthImgRaw = (openni::DepthPixel*)depthFrame.getData();
for ( int i = 0 ; i < ( depthFrame.getDataSize() / sizeof( openni::DepthPixel ) ) ; i++ )
{
int idx = i * 3; // Grayscale
unsigned char* data = &dImg.data[idx];
int gray_scale = ( ( depthImgRaw[i] * 255 ) / ( maxDepthValue - minDepthValue ) );
data[0] = (unsigned char)~gray_scale;
data[1] = (unsigned char)~gray_scale;
data[2] = (unsigned char)~gray_scale;
}
openni::DepthPixel* depthpixels = (openni::DepthPixel*)depthFrame.getData();
cvtColor(dImg, grey, CV_RGB2GRAY);
int i ;
try{
for( int y =0; y < 480 ; y++){
//getting in to each pixel in a row
for(int x = 0; x < 640; x++){
//getting out the corresponding pixel value from the array
i = x+y*640;
if (depthpixels[i] >3000)
{
grey.at<unsigned char>(x,y) = 0;
}
}
}
}catch(exception e)
{cout << e.what() <<endl ;
cout <<depthpixels[i] <<endl ;
cout << i <<endl ;
}
// cv:imshow( "depth", dImg );
imshow("dpeth2", grey);
int k = cvWaitKey( 30 ); // About 30fps
if ( k == 0x1b )
break;
}
// Destroy Streams
depth.destroy();
// Close Device
device.close();
// Shutdown OpenNI
openni::OpenNI::shutdown();
return 0;
}
solved the problem simply by swapping my x and y around
for( y =0; y < 480 ; y++)
{
//getting in to each pixel in a row
for( x = 0; x < 640; x++)
{
if (depthpixels[i]>1500)
{
grey.at<unsigned char >(y,x) = 0;
}
if (depthpixels[i] <500)
{
grey.at<unsigned char >(y,x) = 0;
}
i++;
}
}

Resources