Received throughput issue with saturated traffic - network-programming
I am using NS3 (v3.13) Wi-Fi model in infrastructure topology configured as follows (simulation file attached):
Single AP (BSS)
Multiple STAs (stations)
Application duration = 10s
Saturated downlink traffic (OnOffApplication with OnTime=2s and OffTime=0) from AP to all STAs
Phy: 802.11a
Default YansWifiChannelHelper and YansWifiPhyHelper
Rate control: ConstantRateWifiManager
Mobility: ConstantPositionMobilityModel (STAs are positioned on a circle of 2 meters radius around the AP)
Although all is going well, for a high bitrate (saturated traffic), when the number of STAs per BSS increases a lot, some STAs don't receive any BYTE !!
Experiments:
OnOffApplication DataRate = 60Mb/s, Phy DataMode=OfdmRate54Mbps and 30 STAs, one STA receives packets with a bitrate of 7.2Mb/s and another with 15.3Mb/s (all other 28 STAs don't receive any BYTE)
OnOffApplication DataRate = 60Mb/s, DataMode=OfdmRate6Mbps and 30 STAs, one STA receives packets with a bitrate of 1.95Mb/s and another with 4.3Mb/s (all other 28 STAs don't receive any BYTE)
I think that the problem comes from the OnOff Application configurations; how should I configure it to simulate a full buffer downlink traffic?
Thanks in advance for any suggestion.
#include "ns3/core-module.h"
#include "ns3/point-to-point-module.h"
#include "ns3/network-module.h"
#include "ns3/applications-module.h"
#include "ns3/wifi-module.h"
#include "ns3/mobility-module.h"
#include "ns3/csma-module.h"
#include "ns3/internet-module.h"
#include "ns3/flow-monitor-helper.h"
#include "ns3/flow-monitor-module.h"
#include "ns3/applications-module.h"
#include "ns3/internet-module.h"
#include "ns3/gnuplot.h"
#include "ns3/constant-velocity-helper.h"
#include "ns3/integer.h"
#include "ns3/mpi-interface.h"
#include "math.h"
#include <iostream>
/**
* PARAMETERS
*/
#define StaNb 30
#define Distance 2
#define Duration 10
#define DataRate 90000000
#define PacketSize 1500
#define couleur(param) printf("\033[%sm",param)
using namespace ns3;
class Experiment {
public:
Experiment();
void CreateArchi(void);
void CreateApplis();
private:
Ptr<ListPositionAllocator> positionAllocAp;
Ptr<ListPositionAllocator> positionAllocSta;
Ptr<GridPositionAllocator> positionAllocStaCouloir;
Ptr<RandomDiscPositionAllocator> positionAllocStaAmphi;
std::vector<Ptr<ConstantPositionMobilityModel> > constant;
NodeContainer m_wifiAP, m_wifiQSta;
NetDeviceContainer m_APDevice;
NetDeviceContainer m_QStaDevice;
YansWifiChannelHelper m_channel;
Ptr<YansWifiChannel> channel;
YansWifiPhyHelper m_phyLayer_Sta, m_phyLayer_AP;
WifiHelper m_wifi;
QosWifiMacHelper m_macSta, m_macAP;
InternetStackHelper m_stack;
Ipv4InterfaceContainer m_StaInterface;
Ipv4InterfaceContainer m_ApInterface;
Ssid m_ssid;
};
Experiment::Experiment() {
positionAllocStaCouloir = CreateObject<GridPositionAllocator>();
positionAllocAp = CreateObject<ListPositionAllocator>();
positionAllocSta = CreateObject<ListPositionAllocator>();
positionAllocStaAmphi = CreateObject<RandomDiscPositionAllocator>();
m_wifi = WifiHelper::Default();
constant.resize(StaNb + 1);
for (int i = 0; i < StaNb + 1; i++) {
constant[i] = CreateObject<ConstantPositionMobilityModel>();
}
}
void Experiment::CreateArchi(void) {
m_wifiQSta.Create(StaNb);
m_wifiAP.Create(1);
m_ssid = Ssid("BSS_circle");
m_channel = YansWifiChannelHelper::Default();
channel = m_channel.Create();
m_wifi.SetStandard(WIFI_PHY_STANDARD_80211a);
m_wifi.SetRemoteStationManager("ns3::ConstantRateWifiManager", "DataMode",
StringValue("OfdmRate6Mbps"));
m_phyLayer_Sta = YansWifiPhyHelper::Default();
m_phyLayer_AP = YansWifiPhyHelper::Default();
m_phyLayer_Sta.SetChannel(channel);
m_phyLayer_AP.SetChannel(channel);
positionAllocAp->Add(Vector3D(0.0, 0.0, 0.0));
MobilityHelper mobilityAp;
mobilityAp.SetPositionAllocator(positionAllocAp);
mobilityAp.SetMobilityModel("ns3::ConstantPositionMobilityModel");
mobilityAp.Install(m_wifiAP.Get(0));
constant[0]->SetPosition(Vector3D(0.0, 0.0, 0.0));
float deltaAngle = 2 * M_PI / StaNb;
float angle = 0.0;
double x = 0.0;
double y = 0.0;
for (int i = 0; i < StaNb; i++) {
x = cos(angle) * Distance;
y = sin(angle) * Distance;
positionAllocSta->Add(Vector3D(x, y, 0.0));
MobilityHelper mobilitySta;
mobilitySta.SetPositionAllocator(positionAllocSta);
mobilitySta.SetMobilityModel("ns3::ConstantPositionMobilityModel");
mobilitySta.Install(m_wifiQSta.Get(i));
constant[i]->SetPosition(Vector3D(x, y, 0.0));
angle += deltaAngle;
}
m_macSta = QosWifiMacHelper::Default();
m_macSta.SetType("ns3::StaWifiMac", "ActiveProbing", BooleanValue(true),
"Ssid", SsidValue(m_ssid));
m_macAP = QosWifiMacHelper::Default();
m_macAP.SetType("ns3::ApWifiMac", "Ssid", SsidValue(m_ssid),
"BeaconInterval", TimeValue(Time(std::string("100ms"))));
m_APDevice.Add(m_wifi.Install(m_phyLayer_AP, m_macAP, m_wifiAP));
for (int i = 0; i < StaNb; i++) {
m_QStaDevice.Add(
m_wifi.Install(m_phyLayer_Sta, m_macSta, m_wifiQSta.Get(i)));
}
m_stack.Install(m_wifiAP);
m_stack.Install(m_wifiQSta);
Ipv4AddressHelper address;
address.SetBase("192.168.1.0", "255.255.255.0");
m_ApInterface.Add(address.Assign(m_APDevice.Get(0)));
for (int i = 0; i < StaNb; i++) {
m_StaInterface.Add(address.Assign(m_QStaDevice.Get(i)));
}
Ipv4GlobalRoutingHelper::PopulateRoutingTables();
}
void Experiment::CreateApplis() {
ApplicationContainer source;
OnOffHelper onoff("ns3::UdpSocketFactory", Address());
onoff.SetAttribute("OnTime", RandomVariableValue(ConstantVariable(2)));
onoff.SetAttribute("OffTime", RandomVariableValue(ConstantVariable(0)));
onoff.SetAttribute("DataRate", StringValue("500kb/s"));
for (int i = 0; i < StaNb; i++) {
AddressValue remoteAddress(
InetSocketAddress(m_StaInterface.GetAddress(i), 5010));
onoff.SetAttribute("Remote", remoteAddress);
source.Add(onoff.Install(m_wifiAP.Get(0)));
source.Start(Seconds(3.0));
source.Stop(Seconds(Duration));
}
ApplicationContainer sinks;
PacketSinkHelper packetSinkHelper("ns3::UdpSocketFactory",
Address(InetSocketAddress(Ipv4Address::GetAny(), 5010)));
for (int i = 0; i < StaNb; i++) {
sinks.Add(packetSinkHelper.Install(m_wifiQSta.Get(i)));
sinks.Start(Seconds(3.0));
sinks.Stop(Seconds(Duration));
}
}
int main(int argc, char *argv[]) {
Experiment exp = Experiment();
Config::SetDefault("ns3::WifiRemoteStationManager::RtsCtsThreshold",
StringValue("2346"));
exp.CreateArchi();
exp.CreateApplis();
FlowMonitorHelper flowmon;
Ptr<FlowMonitor> monitor = flowmon.InstallAll();
Simulator::Stop(Seconds(Duration));
Simulator::Run();
monitor->CheckForLostPackets();
Ptr<Ipv4FlowClassifier> classifier = DynamicCast<Ipv4FlowClassifier>(
flowmon.GetClassifier());
std::map<FlowId, FlowMonitor::FlowStats> stats = monitor->GetFlowStats();
int c = 0;
for (std::map<FlowId, FlowMonitor::FlowStats>::const_iterator i =
stats.begin(); i != stats.end(); ++i) {
Ipv4FlowClassifier::FiveTuple t = classifier->FindFlow(i->first);
std::cout << "Flux " << i->first << " (" << t.sourceAddress << " -> "
<< t.destinationAddress << ")\n";
std::cout << " Tx Bytes : " << i->second.txBytes << "\n";
std::cout << " Rx Bytes : " << i->second.rxBytes << "\n";
couleur("33");
std::cout << " Bitrate : "
<< i->second.rxBytes * 8.0
/ (i->second.timeLastRxPacket.GetSeconds()
- i->second.timeFirstRxPacket.GetSeconds())
/ 1000000 << " Mbps\n\n";
couleur("0");
if (i->second.rxBytes > 0)
c++;
}
std::cout << " Number of receiving nodes : " << c << "\n";
Simulator::Destroy();
}
I think the medium is too busy.
You need to tuning down onoff datarate e.g 1 mbps. Practically, full buffer 720p video only need no more than 1mbps
You may also check tracing using pcap, ascii or netanim too see either the packet dropping, packet never being send or bug in your code
Related
I am trying to have stable flight with px4 and ros2 offboard control
Hello guys I have a offboard code which give about 50 setpoints to drone. It draws spiral with that setpoints. My problem is I couldnt get smooth travel. In every setpoint drone gives a high roll or pitch instant and then floats to the next setpoint. Is there a way to have stable velocity while passing the setpoints. Here is the code: #include <px4_msgs/msg/offboard_control_mode.hpp> #include <px4_msgs/msg/trajectory_setpoint.hpp> #include <px4_msgs/msg/timesync.hpp> #include <px4_msgs/msg/vehicle_command.hpp> #include <px4_msgs/msg/vehicle_control_mode.hpp> #include <px4_msgs/msg/vehicle_local_position.hpp> #include <rclcpp/rclcpp.hpp> #include <stdint.h> #include <chrono> #include <iostream> #include "std_msgs/msg/string.hpp" #include <math.h> float X; float Y; using namespace std::chrono; using namespace std::chrono_literals; using namespace px4_msgs::msg; class setpoint : public rclcpp::Node { public: setpoint() : Node("setpoint") { offboard_control_mode_publisher_ = this->create_publisher<OffboardControlMode>("fmu/offboard_control_mode/in", 10); trajectory_setpoint_publisher_ = this->create_publisher<TrajectorySetpoint>("fmu/trajectory_setpoint/in", 10); vehicle_command_publisher_ = this->create_publisher<VehicleCommand>("fmu/vehicle_command/in", 10); // get common timestamp timesync_sub_ = this->create_subscription<px4_msgs::msg::Timesync>("fmu/timesync/out", 10, [this](const px4_msgs::msg::Timesync::UniquePtr msg) { timestamp_.store(msg->timestamp); }); offboard_setpoint_counter_ = 0; auto sendCommands = [this]() -> void { if (offboard_setpoint_counter_ == 10) { // Change to Offboard mode after 10 setpoints this->publish_vehicle_command(VehicleCommand::VEHICLE_CMD_DO_SET_MODE, 1, 6); // Arm the vehicle this->arm(); } //------------- subscription_ = this->create_subscription<px4_msgs::msg::VehicleLocalPosition>( "/fmu/vehicle_local_position/out", #ifdef ROS_DEFAULT_API 10, #endif [this](const px4_msgs::msg::VehicleLocalPosition::UniquePtr msg) { X = msg->x; Y = msg->y; std::cout << "\n\n\n\n\n\n\n\n\n\n"; std::cout << "RECEIVED VEHICLE GPS POSITION DATA" << std::endl; std::cout << "==================================" << std::endl; std::cout << "ts: " << msg->timestamp << std::endl; //std::cout << "lat: " << msg->x << std::endl; //std::cout << "lon: " << msg->y << std::endl; std::cout << "lat: " << X << std::endl; std::cout << "lon: " << Y << std::endl; std::cout << "waypoint: " << waypoints[waypointIndex][0] << std::endl; std::cout << "waypoint: " << waypoints[waypointIndex][1] << std::endl; if((waypoints[waypointIndex][0] + 0.3 > X && waypoints[waypointIndex][0] - 0.3 < X)&&(waypoints[waypointIndex][1] + 0.3 > Y && waypoints[waypointIndex][1] - 0.3 < Y)){ waypointIndex++; if (waypointIndex >= waypoints.size()) exit(0); //waypointIndex = 0; RCLCPP_INFO(this->get_logger(), "Next waypoint: %.2f %.2f %.2f", waypoints[waypointIndex][0], waypoints[waypointIndex][1], waypoints[waypointIndex][2]); } }); //-------------- // offboard_control_mode needs to be paired with trajectory_setpoint publish_offboard_control_mode(); publish_trajectory_setpoint(); // stop the counter after reaching 11 if (offboard_setpoint_counter_ < 11) { offboard_setpoint_counter_++; } }; /* auto nextWaypoint = [this]() -> void { waypointIndex++; if (waypointIndex >= waypoints.size()) waypointIndex = 0; RCLCPP_INFO(this->get_logger(), "Next waypoint: %.2f %.2f %.2f", waypoints[waypointIndex][0], waypoints[waypointIndex][1], waypoints[waypointIndex][2]); }; */ commandTimer = this->create_wall_timer(100ms, sendCommands); //waypointTimer = this->create_wall_timer(2s, nextWaypoint); //EA } void arm() const; void disarm() const; void topic_callback() const; private: std::vector<std::vector<float>> waypoints = {{0,0,-5,}, {2,0,-5,}, {2.35216,0.476806,-5,}, {2.57897,1.09037,-5,}, {2.64107,1.80686,-5,}, {2.50814,2.58248,-5,}, {2.16121,3.36588,-5,}, {1.59437,4.10097,-5,}, {0.815842,4.73016,-5,}, {-0.151838,5.19778,-5,}, {-1.27233,5.45355,-5,}, {-2.49688,5.45578,-5,}, {-3.76641,5.17438,-5,}, {-5.01428,4.59315,-5,}, {-6.1696,3.71161,-5,}, {-7.16089,2.54591,-5,}, {-7.91994,1.12896,-5,}, {-8.38568,-0.490343,-5,}, {-8.50782,-2.24876,-5,}, {-8.25018,-4.07119,-5,}, {-7.59329,-5.87384,-5,}, {-6.53644,-7.56803,-5,}, {-5.09871,-9.06439,-5,}, {-3.31919,-10.2773,-5,}, {-1.25611,-11.1293,-5,}, {1.01499,-11.5555,-5,}, {3.40395,-11.5071,-5,}, {5.8096,-10.9548,-5,}, {8.12407,-9.89139,-5,}, {10.2375,-8.33272,-5,}, {12.0431,-6.31859,-5,}, {13.4424,-3.91182,-5,}, {14.3502,-1.19649,-5,}, {14.6991,1.72493,-5,}, {14.4435,4.73543,-5,}, {13.5626,7.70817,-5,}, {12.0624,10.5118,-5,}, {9.97696,13.0162,-5,}, {7.36759,15.0983,-5,}, {4.32167,16.6482,-5,}, {0.949612,17.5744,-5,}, {-2.619,17.8084,-5,}, {-6.24045,17.3094,-5,}, {-9.76262,16.0665,-5,}, {-13.0314,14.1004,-5,}, {-15.8974,11.4644,-5,}, {-18.2226,8.24237,-5,}, {-19.8868,4.54696,-5,}, {-20.7936,0.515337,-5,}, {-20.8754,-3.69574,-5,}, {-20.0972,-7.91595,-5,}, {-20.8754,-3.69574,-5,}, {-20.7936,0.515337,-5,}, {-19.8868,4.54696,-5,}, {-18.2226,8.24237,-5,}, {-15.8974,11.4644,-5,}, {-13.0314,14.1004,-5,}, {-9.76262,16.0665,-5,}, {-6.24045,17.3094,-5,}, {-2.619,17.8084,-5,}, {0.949612,17.5744,-5,}, {4.32167,16.6482,-5,}, {7.36759,15.0983,-5,}, {9.97696,13.0162,-5,}, {12.0624,10.5118,-5,}, {13.5626,7.70817,-5,}, {14.4435,4.73543,-5,}, {14.6991,1.72493,-5,}, {14.3502,-1.19649,-5,}, {13.4424,-3.91182,-5,}, {12.0431,-6.31859,-5,}, {10.2375,-8.33272,-5,}, {8.12407,-9.89139,-5,}, {5.8096,-10.9548,-5,}, {3.40395,-11.5071,-5,}, {1.01499,-11.5555,-5,}, {-1.25611,-11.1293,-5,}, {-3.31919,-10.2773,-5,}, {-5.09871,-9.06439,-5,}, {-6.53644,-7.56803,-5,}, {-7.59329,-5.87384,-5,}, {-8.25018,-4.07119,-5,}, {-8.50782,-2.24876,-5,}, {-8.38568,-0.490343,-5,}, {-7.91994,1.12896,-5,}, {-7.16089,2.54591,-5,}, {-6.1696,3.71161,-5,}, {-5.01428,4.59315,-5,}, {-3.76641,5.17438,-5,}, {-2.49688,5.45578,-5,}, {-1.27233,5.45355,-5,}, {-0.151838,5.19778,-5,}, {0.815842,4.73016,-5,}, {1.59437,4.10097,-5,}, {2.16121,3.36588,-5,}, {2.50814,2.58248,-5,}, {2.64107,1.80686,-5,}, {2.57897,1.09037,-5,}, {2.35216,0.476806,-5,}, {2,0,-5,}, {0,0,-5,}, {0,0,0,} }; // Land int waypointIndex = 0; rclcpp::TimerBase::SharedPtr commandTimer; rclcpp::TimerBase::SharedPtr waypointTimer; rclcpp::Publisher<OffboardControlMode>::SharedPtr offboard_control_mode_publisher_; rclcpp::Publisher<TrajectorySetpoint>::SharedPtr trajectory_setpoint_publisher_; rclcpp::Publisher<VehicleCommand>::SharedPtr vehicle_command_publisher_; rclcpp::Subscription<px4_msgs::msg::Timesync>::SharedPtr timesync_sub_; // rclcpp::Subscription<px4_msgs::msg::VehicleLocalPosition>::SharedPtr subscription_; // std::atomic<uint64_t> timestamp_; //!< common synced timestamped uint64_t offboard_setpoint_counter_; //!< counter for the number of setpoints sent void publish_offboard_control_mode() const; void publish_trajectory_setpoint() const; void publish_vehicle_command(uint16_t command, float param1 = 0.0, float param2 = 0.0) const; }; void setpoint::arm() const { publish_vehicle_command(VehicleCommand::VEHICLE_CMD_COMPONENT_ARM_DISARM, 1.0); RCLCPP_INFO(this->get_logger(), "Arm command send"); } void setpoint::disarm() const { publish_vehicle_command(VehicleCommand::VEHICLE_CMD_COMPONENT_ARM_DISARM, 0.0); RCLCPP_INFO(this->get_logger(), "Disarm command send"); } void setpoint::publish_offboard_control_mode() const { OffboardControlMode msg{}; msg.timestamp = timestamp_.load(); msg.position = true; msg.velocity = false; msg.acceleration = false; msg.attitude = false; msg.body_rate = false; offboard_control_mode_publisher_->publish(msg); } void setpoint::publish_trajectory_setpoint() const { TrajectorySetpoint msg{}; msg.timestamp = timestamp_.load(); msg.position = {waypoints[waypointIndex][0],waypoints[waypointIndex][1],waypoints[waypointIndex][2]}; msg.yaw = std::nanf("0"); //-3.14; // [-PI:PI] trajectory_setpoint_publisher_->publish(msg); } void setpoint::publish_vehicle_command(uint16_t command, float param1, float param2) const { VehicleCommand msg{}; msg.timestamp = timestamp_.load(); msg.param1 = param1; msg.param2 = param2; msg.command = command; msg.target_system = 1; msg.target_component = 1; msg.source_system = 1; msg.source_component = 1; msg.from_external = true; vehicle_command_publisher_->publish(msg); } int main(int argc, char* argv[]) { std::cout << "Starting setpoint node..." << std::endl; setvbuf(stdout, NULL, _IONBF, BUFSIZ); rclcpp::init(argc, argv); rclcpp::spin(std::make_shared<setpoint>()); rclcpp::shutdown(); return 0; }
We send the setpoints to the controller by giving reference points. The aircraft will then try to maneuver to the given points via its control strategy (usually PID). Therefore, to have a smooth maneuver, it is usually suggested to give a series of discrete points between two waypoints, i.e., trajectory which parameterized by time. It should then solve the abrupt motion of your UAV. I'm no expert, but I hope this helps.
why openmp is not working faster?
I am using openmp to convert an image from RGB to gray scale.But openmp is not faster than normal, I wonder why? why single thread is faster than multi-threads? How to accelerate the speed? here is the code: #include <iostream> #include <omp.h> #include <opencv2/opencv.hpp> using namespace std; using namespace cv; int main() { Mat image = imread("../family.jpg"); Mat gray(image.rows, image.cols, CV_8UC1); time_t start, stop; start = clock(); for (int i = 0; i < image.rows; i++) for (int j = 0; j < image.cols; j++) { gray.at<uchar>(i,j) = image.at<cv::Vec3b>(i,j)[0]*0.114 + image.at<cv::Vec3b>(i,j)[1]*0.587 + image.at<cv::Vec3b>(i,j)[2]*0.299; } stop = clock() - start; cout << "time: " << stop * 1.0 / CLOCKS_PER_SEC << endl; start = clock(); omp_set_num_threads(4); #pragma omp parallel { #pragma omp for for (int i = 0; i < image.rows; i++) for (int j = 0; j < image.cols; j++) { gray.at<uchar>(i,j) = image.at<cv::Vec3b>(i,j)[0]*0.114 + image.at<cv::Vec3b>(i,j)[1]*0.587 + image.at<cv::Vec3b>(i,j)[2]*0.299; } } stop = clock() - start; cout << "time: " << stop * 1.0 / CLOCKS_PER_SEC << endl; return 0; } result: normal time: 0.035253 openmp time: 0.069894
How to turn OpenCV_GPUMat into CUdeviceptr?
I was modiying the NVTranscoder project from the Video_Codec_SDK_8.0.14 in order to adding some signal processing works into the video frames. However, I encounter some problems when I turn the GPUMat into CUdeviceptr. I was wondering how can I turn the GPUMat into CUdeviceptr. After I performed the blurring function where I have emphasized as below, I want to turn the processed mat into a CUdeviceptr. Besides, the part converting the CUdeviceptr into GPUmat is also wrong, as it shows the gpuInput cannot read memory. Besides, can anyone point out some problems of my implementation? The code is as follows: #include <time.h> #ifdef _WIN32 #include <windows.h> #else #include <pthread.h> #endif #include <stdio.h> #include <string.h> #include "dynlink_cuda.h" // <cuda.h> #include "VideoDecoder.h" #include "VideoEncoder.h" #include "../common/inc/nvUtils.h" #include <opencv2/opencv.hpp> #include "opencv2/gpu/gpu.hpp" using namespace cv; #ifdef _WIN32 DWORD WINAPI DecodeProc(LPVOID lpParameter) { CudaDecoder* pDecoder = (CudaDecoder*)lpParameter; pDecoder->Start(); return 0; } #else void* DecodeProc(void *arg) { CudaDecoder* pDecoder = (CudaDecoder*)arg; pDecoder->Start(); return NULL; } #endif int MatchFPS(const float fpsRatio, int decodedFrames, int encodedFrames) { if (fpsRatio < 1.f) { // need to drop frame if (decodedFrames * fpsRatio < (encodedFrames + 1)) { return -1; } } else if (fpsRatio > 1.f) { // need to duplicate frame int duplicate = 0; while (decodedFrames*fpsRatio > encodedFrames + duplicate + 1) { duplicate++; } return duplicate; } return 0; } void PrintHelp() { printf("Usage : NvTranscoder \n" "-i <string> Specify input .h264 file\n" "-o <string> Specify output bitstream file\n" "\n### Optional parameters ###\n" "-size <int int> Specify output resolution <width height>\n" "-codec <integer> Specify the codec \n" " 0: H264\n" " 1: HEVC\n" "-preset <string> Specify the preset for encoder settings\n" " hq : nvenc HQ \n" " hp : nvenc HP \n" " lowLatencyHP : nvenc low latency HP \n" " lowLatencyHQ : nvenc low latency HQ \n" " lossless : nvenc Lossless HP \n" "-fps <integer> Specify encoding frame rate\n" "-goplength <integer> Specify gop length\n" "-numB <integer> Specify number of B frames\n" "-bitrate <integer> Specify the encoding average bitrate\n" "-vbvMaxBitrate <integer> Specify the vbv max bitrate\n" "-vbvSize <integer> Specify the encoding vbv/hrd buffer size\n" "-rcmode <integer> Specify the rate control mode\n" " 0: Constant QP mode\n" " 1: Variable bitrate mode\n" " 2: Constant bitrate mode\n" " 8: low-delay CBR, high quality\n" " 16: CBR, high quality (slower)\n" " 32: VBR, high quality (slower)\n" "-qp <integer> Specify qp for Constant QP mode\n" "-i_qfactor <float> Specify qscale difference between I-frames and P-frames\n" "-b_qfactor <float> Specify qscale difference between P-frames and B-frames\n" "-i_qoffset <float> Specify qscale offset between I-frames and P-frames\n" "-b_qoffset <float> Specify qscale offset between P-frames and B-frames\n" "-deviceID <integer> Specify the GPU device on which encoding will take place\n" "-help Prints Help Information\n\n" ); } int main(int argc, char* argv[]) { #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) typedef HMODULE CUDADRIVER; #else typedef void *CUDADRIVER; #endif CUDADRIVER hHandleDriver = 0; __cu(cuInit(0, __CUDA_API_VERSION, hHandleDriver)); __cu(cuvidInit(0)); EncodeConfig encodeConfig = { 0 }; encodeConfig.endFrameIdx = INT_MAX; encodeConfig.bitrate = 5000000; encodeConfig.rcMode = NV_ENC_PARAMS_RC_CONSTQP; encodeConfig.gopLength = NVENC_INFINITE_GOPLENGTH; encodeConfig.codec = NV_ENC_H264; encodeConfig.fps = 0; encodeConfig.qp = 28; encodeConfig.i_quant_factor = DEFAULT_I_QFACTOR; encodeConfig.b_quant_factor = DEFAULT_B_QFACTOR; encodeConfig.i_quant_offset = DEFAULT_I_QOFFSET; encodeConfig.b_quant_offset = DEFAULT_B_QOFFSET; encodeConfig.presetGUID = NV_ENC_PRESET_DEFAULT_GUID; encodeConfig.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; NVENCSTATUS nvStatus = CNvHWEncoder::ParseArguments(&encodeConfig, argc, argv); if (nvStatus != NV_ENC_SUCCESS) { PrintHelp(); return 1; } if (!encodeConfig.inputFileName || !encodeConfig.outputFileName) { PrintHelp(); return 1; } encodeConfig.fOutput = fopen(encodeConfig.outputFileName, "wb"); if (encodeConfig.fOutput == NULL) { PRINTERR("Failed to create \"%s\"\n", encodeConfig.outputFileName); return 1; } //init cuda CUcontext cudaCtx; CUdevice device; __cu(cuDeviceGet(&device, encodeConfig.deviceID)); __cu(cuCtxCreate(&cudaCtx, CU_CTX_SCHED_AUTO, device)); CUcontext curCtx; CUvideoctxlock ctxLock; __cu(cuCtxPopCurrent(&curCtx)); __cu(cuvidCtxLockCreate(&ctxLock, curCtx)); CudaDecoder* pDecoder = new CudaDecoder; FrameQueue* pFrameQueue = new CUVIDFrameQueue(ctxLock); pDecoder->InitVideoDecoder(encodeConfig.inputFileName, ctxLock, pFrameQueue, encodeConfig.width, encodeConfig.height); int decodedW, decodedH, decodedFRN, decodedFRD, isProgressive; pDecoder->GetCodecParam(&decodedW, &decodedH, &decodedFRN, &decodedFRD, &isProgressive); if (decodedFRN <= 0 || decodedFRD <= 0) { decodedFRN = 30; decodedFRD = 1; } if(encodeConfig.width <= 0 || encodeConfig.height <= 0) { encodeConfig.width = decodedW; encodeConfig.height = decodedH; } float fpsRatio = 1.f; if (encodeConfig.fps <= 0) { encodeConfig.fps = decodedFRN / decodedFRD; } else { fpsRatio = (float)encodeConfig.fps * decodedFRD / decodedFRN; } encodeConfig.pictureStruct = (isProgressive ? NV_ENC_PIC_STRUCT_FRAME : 0); pFrameQueue->init(encodeConfig.width, encodeConfig.height); VideoEncoder* pEncoder = new VideoEncoder(ctxLock); assert(pEncoder->GetHWEncoder()); nvStatus = pEncoder->GetHWEncoder()->Initialize(cudaCtx, NV_ENC_DEVICE_TYPE_CUDA); if (nvStatus != NV_ENC_SUCCESS) return 1; encodeConfig.presetGUID = pEncoder->GetHWEncoder()->GetPresetGUID(encodeConfig.encoderPreset, encodeConfig.codec); printf("Encoding input : \"%s\"\n", encodeConfig.inputFileName); printf(" output : \"%s\"\n", encodeConfig.outputFileName); printf(" codec : \"%s\"\n", encodeConfig.codec == NV_ENC_HEVC ? "HEVC" : "H264"); printf(" size : %dx%d\n", encodeConfig.width, encodeConfig.height); printf(" bitrate : %d bits/sec\n", encodeConfig.bitrate); printf(" vbvMaxBitrate : %d bits/sec\n", encodeConfig.vbvMaxBitrate); printf(" vbvSize : %d bits\n", encodeConfig.vbvSize); printf(" fps : %d frames/sec\n", encodeConfig.fps); printf(" rcMode : %s\n", encodeConfig.rcMode == NV_ENC_PARAMS_RC_CONSTQP ? "CONSTQP" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR ? "VBR" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR ? "CBR" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR_MINQP ? "VBR MINQP (deprecated)" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ ? "CBR_LOWDELAY_HQ" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR_HQ ? "CBR_HQ" : encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR_HQ ? "VBR_HQ" : "UNKNOWN"); if (encodeConfig.gopLength == NVENC_INFINITE_GOPLENGTH) printf(" goplength : INFINITE GOP \n"); else printf(" goplength : %d \n", encodeConfig.gopLength); printf(" B frames : %d \n", encodeConfig.numB); printf(" QP : %d \n", encodeConfig.qp); printf(" preset : %s\n", (encodeConfig.presetGUID == NV_ENC_PRESET_LOW_LATENCY_HQ_GUID) ? "LOW_LATENCY_HQ" : (encodeConfig.presetGUID == NV_ENC_PRESET_LOW_LATENCY_HP_GUID) ? "LOW_LATENCY_HP" : (encodeConfig.presetGUID == NV_ENC_PRESET_HQ_GUID) ? "HQ_PRESET" : (encodeConfig.presetGUID == NV_ENC_PRESET_HP_GUID) ? "HP_PRESET" : (encodeConfig.presetGUID == NV_ENC_PRESET_LOSSLESS_HP_GUID) ? "LOSSLESS_HP" : "LOW_LATENCY_DEFAULT"); printf("\n"); nvStatus = pEncoder->GetHWEncoder()->CreateEncoder(&encodeConfig); if (nvStatus != NV_ENC_SUCCESS) return 1; nvStatus = pEncoder->AllocateIOBuffers(&encodeConfig); if (nvStatus != NV_ENC_SUCCESS) return 1; unsigned long long lStart, lEnd, lFreq; NvQueryPerformanceCounter(&lStart); //start decoding thread #ifdef _WIN32 HANDLE decodeThread = CreateThread(NULL, 0, DecodeProc, (LPVOID)pDecoder, 0, NULL); #else pthread_t pid; pthread_create(&pid, NULL, DecodeProc, (void*)pDecoder); #endif //start encoding thread int frmProcessed = 0; int frmActual = 0; while(!(pFrameQueue->isEndOfDecode() && pFrameQueue->isEmpty()) ) { CUVIDPARSERDISPINFO pInfo; if(pFrameQueue->dequeue(&pInfo)) { CUdeviceptr dMappedFrame = 0; unsigned int pitch; CUVIDPROCPARAMS oVPP = { 0 }; oVPP.progressive_frame = pInfo.progressive_frame; oVPP.second_field = 0; oVPP.top_field_first = pInfo.top_field_first; oVPP.unpaired_field = (pInfo.progressive_frame == 1 || pInfo.repeat_first_field <= 1); cuvidMapVideoFrame(pDecoder->GetDecoder(), pInfo.picture_index, &dMappedFrame, &pitch, &oVPP); vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv gpu::GpuMat gpuInput = gpu::GpuMat(decodedH, decodedW, CV_8UC3, (void*)dMappedFrame, pitch); gpu::GpuMat d_dst; gpu::GpuMat d_buf; gpu::GaussianBlur(gpuInput, d_dst, cv::Size(3, 3), 0); cv::Mat result; d_dst.download(result); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ EncodeFrameConfig stEncodeConfig = { 0 }; NV_ENC_PIC_STRUCT picType = (pInfo.progressive_frame || pInfo.repeat_first_field >= 2 ? NV_ENC_PIC_STRUCT_FRAME : (pInfo.top_field_first ? NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM : NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP)); vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv stEncodeConfig.dptr = result.data;//dMappedFrame; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ stEncodeConfig.pitch = pitch; stEncodeConfig.width = encodeConfig.width; stEncodeConfig.height = encodeConfig.height; int dropOrDuplicate = MatchFPS(fpsRatio, frmProcessed, frmActual); for (int i = 0; i <= dropOrDuplicate; i++) { pEncoder->EncodeFrame(&stEncodeConfig, picType); frmActual++; } frmProcessed++; vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv cuvidUnmapVideoFrame(pDecoder->GetDecoder(), dMappedFrame); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ pFrameQueue->releaseFrame(&pInfo); } } pEncoder->EncodeFrame(NULL, NV_ENC_PIC_STRUCT_FRAME, true); #ifdef _WIN32 WaitForSingleObject(decodeThread, INFINITE); #else pthread_join(pid, NULL); #endif if (pEncoder->GetEncodedFrames() > 0) { NvQueryPerformanceCounter(&lEnd); NvQueryPerformanceFrequency(&lFreq); double elapsedTime = (double)(lEnd - lStart)/(double)lFreq; printf("Total time: %fms, Decoded Frames: %d, Encoded Frames: %d, Average FPS: %f\n", elapsedTime * 1000, pDecoder->m_decodedFrames, pEncoder->GetEncodedFrames(), (float)pEncoder->GetEncodedFrames() / elapsedTime); } pEncoder->Deinitialize(); delete pDecoder; delete pEncoder; delete pFrameQueue; cuvidCtxLockDestroy(ctxLock); __cu(cuCtxDestroy(cudaCtx)); return 0; } I run the argument "-i C:\test\input.h264 -o C:\test\output.h264 -size 352 288" The decoded frame is in NV12 format.
What is your exactly the cuda error code returned ? can you check for cuda Errors and post it : this post will help you Is the member dptr waiting for a device pointer and you are giving a pointer on data stored on the host (cv::Mat instead of cv::cuda::GpuMat)? can you try this stEncodeConfig.dptr = dst.data;
As I pointed out in the comments of your linked post, the decoded frame is in raw format (i.e YUV etc). You're providing CV_8UC3 image format which is incorrect. To test whether GpuMat is being created from CUdeviceptr: cv::cuda::GpuMat dimg(cv::Size(decodedW, decodedH),CV_8UC1, (void*)(dMappedFrame),pitch); cv::Mat img; dimg.download(img); cv::imshow("Decoded Frame", img); cv::waitKey(1); For further details, see this link.
How to increase BatchSize with Tensorflow's C++ API?
I took the code in https://gist.github.com/kyrs/9adf86366e9e4f04addb (which takes an opencv cv::Mat image as input and converts it to tensor) and I use it to label images with the model inception_v3_2016_08_28_frozen.pb stated in the Tensorflow tutorial (https://www.tensorflow.org/tutorials/image_recognition#usage_with_the_c_api). Everything worked fine when using a batchsize of 1. However, when I increase the batchsize to 2 (or greater), the size of finalOutput (which is of type std::vector) is zero. Here's the code to reproduce the error: // Only for VisualStudio #define COMPILER_MSVC #define NOMINMAX #include <string> #include <iostream> #include <fstream> #include <opencv2/opencv.hpp> #include <opencv2/imgproc/imgproc.hpp> #include "tensorflow/core/public/session.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/framework/tensor.h" int batchSize = 2; int height = 299; int width = 299; int depth = 3; int mean = 0; int stdev = 255; // Set image paths cv::String pathFilenameImg1 = "D:/IMGS/grace_hopper.jpg"; cv::String pathFilenameImg2 = "D:/IMGS/lenna.jpg"; // Set model paths std::string graphFile = "D:/Tensorflow/models/inception_v3_2016_08_28_frozen.pb"; std::string labelfile = "D:/Tensorflow/models/imagenet_slim_labels.txt"; std::string InputName = "input"; std::string OutputName = "InceptionV3/Predictions/Reshape_1"; void read_prepare_image(cv::String pathImg, cv::Mat &imgPrepared) { // Read Color image: cv::Mat imgBGR = cv::imread(pathImg); // Now we resize the image to fit Model's expected sizes: cv::Size s(height, width); cv::Mat imgResized; cv::resize(imgBGR, imgResized, s, 0, 0, cv::INTER_CUBIC); // Convert the image to float and normalize data: imgResized.convertTo(imgPrepared, CV_32FC1); imgPrepared = imgPrepared - mean; imgPrepared = imgPrepared / stdev; } int main() { // Read and prepare images using OpenCV: cv::Mat img1, img2; read_prepare_image(pathFilenameImg1, img1); read_prepare_image(pathFilenameImg2, img2); // creating a Tensor for storing the data tensorflow::Tensor input_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({ batchSize, height, width, depth })); auto input_tensor_mapped = input_tensor.tensor<float, 4>(); // Copy images data into the tensor: for (int b = 0; b < batchSize; ++b) { const float * source_data; if (b == 0) source_data = (float*)img1.data; else source_data = (float*)img2.data; for (int y = 0; y < height; ++y) { const float* source_row = source_data + (y * width * depth); for (int x = 0; x < width; ++x) { const float* source_pixel = source_row + (x * depth); const float* source_B = source_pixel + 0; const float* source_G = source_pixel + 1; const float* source_R = source_pixel + 2; input_tensor_mapped(b, y, x, 0) = *source_R; input_tensor_mapped(b, y, x, 1) = *source_G; input_tensor_mapped(b, y, x, 2) = *source_B; } } } // Load the graph: tensorflow::GraphDef graph_def; ReadBinaryProto(tensorflow::Env::Default(), graphFile, &graph_def); // create a session with the graph std::unique_ptr<tensorflow::Session> session_inception(tensorflow::NewSession(tensorflow::SessionOptions())); session_inception->Create(graph_def); // run the loaded graph std::vector<tensorflow::Tensor> finalOutput; session_inception->Run({ { InputName,input_tensor } }, { OutputName }, {}, &finalOutput); // Get Top 5 classes: std::cerr << "final output size = " << finalOutput.size() << std::endl; tensorflow::Tensor output = std::move(finalOutput.at(0)); auto scores = output.flat<float>(); std::cerr << "scores size=" << scores.size() << std::endl; std::ifstream label(labelfile); std::string line; std::vector<std::pair<float, std::string>> sorted; for (unsigned int i = 0; i <= 1000; ++i) { std::getline(label, line); sorted.emplace_back(scores(i), line); } std::sort(sorted.begin(), sorted.end()); std::reverse(sorted.begin(), sorted.end()); std::cout << "size of the sorted file is " << sorted.size() << std::endl; for (unsigned int i = 0; i< 5; ++i) std::cout << "The output of the current graph has category " << sorted[i].second << " with probability " << sorted[i].first << std::endl; } Do I miss anything? Any ideas? Thanks in advance!
I had the same problem. When I changed to the model used in https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/benchmark (differente version of inception) bigger batch sizes work correctly. Notice you need to change the input size from 299,299,3 to 224,224,3 and the input and output layer names to: input:0 and output:0
Probably the graph in the protobuf file had a fixed batch size of 1 and I was only changing the shape of the input, not the graph. The graph has to accept a variable batch size by setting the shape to (None, width, heihgt, channels). This is done when you freeze the graph. Since the graph we have is already frozen, there is no way to change the batch size at this point.
Openni opencv kinect Bad Memory allocation
Basically I've got a loop which goes through all the kinects depth pixels. If they are greater than 3000mm it sets the pixel value to black. For some reason this works only at a close range while pointed to a wall. If I pull the kinect back (giving it a larger area to scan) I get a Bad Memory allocation error. My code can be found below. I get the bad memory allocation error inside that try catch statement. Most of the code is from the opencv kinect sample here and here. i figured out the problem, its because the depth values are stored in an array instead of matrix, i need a better way of finding out which location in the array, the x.y of the pixels which start from 1,1 point to instead of the (i = x+y*640) #include <opencv.hpp> #include <iostream> #include <string> #include <stdio.h> #include <OpenNI.h> using namespace std; using namespace cv; int main() { openni::Device device; openni::VideoStream depth; const char* device_uri = openni::ANY_DEVICE; openni::Status ret = openni::OpenNI::initialize(); // Open ret =device.open( device_uri ); ret = depth.create( device, openni::SENSOR_DEPTH ); if ( ret == openni::STATUS_OK ) { // Start Depth depth.start(); } // Get Depth Stream Min-Max Value int minDepthValue = depth.getMinPixelValue(); int maxDepthValue = depth.getMaxPixelValue(); //cout << "Depth min-Max Value : " << minDepthValue << "-" << maxDepthValue << endl; // Frame Information Reference openni::VideoFrameRef depthFrame; // Get Sensor Resolution Information int dImgWidth = depth.getVideoMode().getResolutionX(); int dImgHeight = depth.getVideoMode().getResolutionY(); // Depth Image Matrix cv::Mat dImg = cv::Mat( dImgHeight, dImgWidth, CV_8UC3 ); Mat grey= cvCreateImage(cvSize(640, 480), 8, 1); ; for(;;) { depth.readFrame( &depthFrame ); openni::DepthPixel* depthImgRaw = (openni::DepthPixel*)depthFrame.getData(); for ( int i = 0 ; i < ( depthFrame.getDataSize() / sizeof( openni::DepthPixel ) ) ; i++ ) { int idx = i * 3; // Grayscale unsigned char* data = &dImg.data[idx]; int gray_scale = ( ( depthImgRaw[i] * 255 ) / ( maxDepthValue - minDepthValue ) ); data[0] = (unsigned char)~gray_scale; data[1] = (unsigned char)~gray_scale; data[2] = (unsigned char)~gray_scale; } openni::DepthPixel* depthpixels = (openni::DepthPixel*)depthFrame.getData(); cvtColor(dImg, grey, CV_RGB2GRAY); int i ; try{ for( int y =0; y < 480 ; y++){ //getting in to each pixel in a row for(int x = 0; x < 640; x++){ //getting out the corresponding pixel value from the array i = x+y*640; if (depthpixels[i] >3000) { grey.at<unsigned char>(x,y) = 0; } } } }catch(exception e) {cout << e.what() <<endl ; cout <<depthpixels[i] <<endl ; cout << i <<endl ; } // cv:imshow( "depth", dImg ); imshow("dpeth2", grey); int k = cvWaitKey( 30 ); // About 30fps if ( k == 0x1b ) break; } // Destroy Streams depth.destroy(); // Close Device device.close(); // Shutdown OpenNI openni::OpenNI::shutdown(); return 0; }
solved the problem simply by swapping my x and y around for( y =0; y < 480 ; y++) { //getting in to each pixel in a row for( x = 0; x < 640; x++) { if (depthpixels[i]>1500) { grey.at<unsigned char >(y,x) = 0; } if (depthpixels[i] <500) { grey.at<unsigned char >(y,x) = 0; } i++; } }