Loading Texture2D data in DirectX 11 Compute Shader - directx

I am trying to read some data from a texture2d in DirectX11 compute shader, however, the 'Load' function of a texture2D object keeps returning 0 even though the texture object is filled with the same float number.
This is a 160 * 120 texture2d with DXGI_FORMAT_R32G32B32A32_FLOAT. The following code is how I created this resource:
HRESULT TestResources(ID3D11Device* pd3dDevice, ID3D11DeviceContext* pImmediateContext) {
float *test = new float[4 * 80 * 60 * 4]; // 80 * 60, 4 channels, 1 big texture contains 4 80 * 60 subimage
for (int i = 0; i < 4 * 80 * 60 * 4; i++) test[i] = 0.7f;
HRESULT hr = S_OK;
D3D11_TEXTURE2D_DESC RTtextureDesc;
ZeroMemory(&RTtextureDesc, sizeof(D3D11_TEXTURE2D_DESC));
RTtextureDesc.Width = 160;
RTtextureDesc.Height = 120;
RTtextureDesc.MipLevels = 1;
RTtextureDesc.ArraySize = 1;
RTtextureDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
RTtextureDesc.SampleDesc.Count = 1;
RTtextureDesc.SampleDesc.Quality = 0;
RTtextureDesc.Usage = D3D11_USAGE_DYNAMIC;
RTtextureDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
RTtextureDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
RTtextureDesc.MiscFlags = 0;
D3D11_SUBRESOURCE_DATA InitData;
InitData.pSysMem = test;
InitData.SysMemPitch = sizeof(float) * 4;
V_RETURN(pd3dDevice->CreateTexture2D(&RTtextureDesc, &InitData, &m_pInputTex2Ds));
//V_RETURN(pd3dDevice->CreateTexture2D(&RTtextureDesc, NULL, &m_pInputTex2Ds));
D3D11_SHADER_RESOURCE_VIEW_DESC SRViewDesc;
ZeroMemory(&SRViewDesc, sizeof(SRViewDesc));
SRViewDesc.Format = RTtextureDesc.Format;
SRViewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
SRViewDesc.Texture2D.MostDetailedMip = 0;
SRViewDesc.Texture2D.MipLevels = 1;
V_RETURN(pd3dDevice->CreateShaderResourceView(m_pInputTex2Ds, &SRViewDesc, &m_pInputTexSRV));
delete[] test;
return hr;
}
And then I try to run dispatch with X = Y = 2 and Z = 1 like the following:
void ComputeShaderReduction::ExecuteComputeShader(ID3D11DeviceContext* pd3dImmediateContext, UINT uInputNum, ID3D11UnorderedAccessView** ppUAVInputs, UINT X, UINT Y, UINT Z) {
pd3dImmediateContext->CSSetShader(m_pComputeShader, nullptr, 0);
pd3dImmediateContext->CSSetShaderResources(0, 1, &m_pInputTexSRV); // test code
pd3dImmediateContext->CSSetUnorderedAccessViews(0, uInputNum, ppUAVInputs, nullptr);
//pd3dImmediateContext->CSSetUnorderedAccessViews(0, 1, &m_pGPUOutUAVs, nullptr);
pd3dImmediateContext->UpdateSubresource(m_pConstBuf, 0, nullptr, &m_ConstBuf, 0, 0);
pd3dImmediateContext->CSSetConstantBuffers(0, 1, &m_pConstBuf);
pd3dImmediateContext->Dispatch(X, Y, Z);
pd3dImmediateContext->CSSetShader(nullptr, nullptr, 0);
ID3D11UnorderedAccessView* ppUAViewnullptr[1] = { nullptr };
pd3dImmediateContext->CSSetUnorderedAccessViews(0, 1, ppUAViewnullptr, nullptr);
ID3D11ShaderResourceView* ppSRVnullptr[1] = { nullptr };
pd3dImmediateContext->CSSetShaderResources(0, 1, ppSRVnullptr);
ID3D11Buffer* ppCBnullptr[1] = { nullptr };
pd3dImmediateContext->CSSetConstantBuffers(0, 1, ppCBnullptr);
}
And I wrote a very simple CS shader to try to get the data in the texture2d and out it. So, the compute shader looks like this:
#define subimg_dim_x 80
#define subimg_dim_y 60
Texture2D<float4> BufferIn : register(t0);
StructuredBuffer<float> Test: register(t1);
RWStructuredBuffer<float> BufferOut : register(u0);
groupshared float sdata[subimg_dim_x];
[numthreads(subimg_dim_x, 1, 1)]
void CSMain(uint3 DTid : SV_DispatchThreadID,
uint3 threadIdx : SV_GroupThreadID,
uint3 groupIdx : SV_GroupID) {
sdata[threadIdx.x] = 0.0;
GroupMemoryBarrierWithGroupSync();
if (threadIdx.x == 0) {
float4 num = BufferIn.Load(uint3(groupIdx.x, groupIdx.y, 1));
//BufferOut[groupIdx.y * 2 + groupIdx.x] = 2.0; //This one gives me 2.0 as output in the console
BufferOut[groupIdx.y * 2 + groupIdx.x] = num.x; //This one keeps giving me 0.0 and in the texture, r = g = b = a = 0.7 or x = y = z = w = 0.7, so it suppose to print 0.7 in the console.
}
GroupMemoryBarrierWithGroupSync();
}
I think the way I print the CS shader result on CPU end is correct.
void ComputeShaderReduction::CopyToCPUBuffer(ID3D11Device* pdevice, ID3D11DeviceContext* pd3dImmediateContext, ID3D11Buffer* pGPUOutBufs) {
D3D11_BUFFER_DESC desc;
ZeroMemory(&desc, sizeof(desc));
pGPUOutBufs->GetDesc(&desc);
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
desc.Usage = D3D11_USAGE_STAGING;
desc.BindFlags = 0;
desc.MiscFlags = 0;
if (!m_pCPUOutBufs && SUCCEEDED(pdevice->CreateBuffer(&desc, nullptr, &m_pCPUOutBufs))) {
pd3dImmediateContext->CopyResource(m_pCPUOutBufs, pGPUOutBufs);
}
else pd3dImmediateContext->CopyResource(m_pCPUOutBufs, pGPUOutBufs);
D3D11_MAPPED_SUBRESOURCE MappedResource;
float *p;
pd3dImmediateContext->Map(m_pCPUOutBufs, 0, D3D11_MAP_READ, 0, &MappedResource);
p = (float*)MappedResource.pData;
for (int i = 0; i < 4; i++) printf("%d %f\n", i, p[i]);
pd3dImmediateContext->Unmap(m_pCPUOutBufs, 0);
printf("\n");
}
The buffer that bind to UAV has only 4 elements. So, if all the float numbers in my texture2d are 0.7, I should have 4 0.7s get printed in CopyToCPUBuffer function instead of 0.0s.
Is anyone know what could be wrong in my code or can someone provide me an entire example or a tutorial that shows how to read DirectX 11 texture2d's data in compute shader correctly?
Thanks in advance.

The following is wrong for a start. The Pitch of your input data is the number of bytes per row of the texture, not per pixel.
InitData.SysMemPitch = sizeof(float) * 4;
Secondly:
float4 num = BufferIn.Load(uint3(groupIdx.x, groupIdx.y, 1));
You're trying to load data from the 2nd mip of the texture, it only has 1 mip level.

Related

reading GPU resource data by CPU

i am learning directx11 these days. and i have been stuck in compute shader section.
so i made four resource and three corresponding view.
immutable input buffer = {1,1,1,1,1} / SRV
immutable input buffer = {2,2,2,2,2} / SRV
output buffer / UAV
staging buffer for reading / No View
and i succeeded to create all things, and dispatch cs function, and copy data from output buffer to staging buffer, and i read/check data.
// INPUT BUFFER1--------------------------------------------------
const int dataSize = 5;
D3D11_BUFFER_DESC vb_dest;
vb_dest.ByteWidth = sizeof(float) * dataSize;
vb_dest.StructureByteStride = sizeof(float);
vb_dest.BindFlags = D3D11_BIND_SHADER_RESOURCE;
vb_dest.Usage = D3D11_USAGE_IMMUTABLE;
vb_dest.CPUAccessFlags = 0;
vb_dest.MiscFlags = 0;
float v1_float[dataSize] = { 1,1,1,1,1 };
D3D11_SUBRESOURCE_DATA v1_data;
v1_data.pSysMem = static_cast<void*>(v1_float);
device->CreateBuffer(
&vb_dest,
&v1_data,
valueBuffer1.GetAddressOf());
D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc;
srv_desc.Format = DXGI_FORMAT_R32_FLOAT;
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srv_desc.Buffer.FirstElement = 0;
srv_desc.Buffer.NumElements = dataSize;
srv_desc.Buffer.ElementWidth = sizeof(float);
device->CreateShaderResourceView(
valueBuffer1.Get(),
&srv_desc,
inputSRV1.GetAddressOf());
// INPUT BUFFER2-----------------------------------------------------------
float v2_float[dataSize] = { 2,2,2,2,2 };
D3D11_SUBRESOURCE_DATA v2_data;
v2_data.pSysMem = static_cast<void*>(v2_float);
device->CreateBuffer(
&vb_dest,
&v2_data,
valueBuffer2.GetAddressOf());
device->CreateShaderResourceView(
valueBuffer2.Get(),
&srv_desc,
inputSRV2.GetAddressOf());
// OUTPUT BUFFER-----------------------------------------------------------
D3D11_BUFFER_DESC ov_desc;
ov_desc.ByteWidth = sizeof(float) * dataSize;
ov_desc.StructureByteStride = sizeof(float);
ov_desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
ov_desc.Usage = D3D11_USAGE_DEFAULT;
ov_desc.CPUAccessFlags = 0;
ov_desc.MiscFlags = 0;
device->CreateBuffer(
&ov_desc,
nullptr,
outputResource.GetAddressOf());
D3D11_UNORDERED_ACCESS_VIEW_DESC outputUAV_desc;
outputUAV_desc.Format = DXGI_FORMAT_R32_FLOAT;
outputUAV_desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
outputUAV_desc.Buffer.FirstElement = 0;
outputUAV_desc.Buffer.NumElements = dataSize;
outputUAV_desc.Buffer.Flags = 0;
device->CreateUnorderedAccessView(
outputResource.Get(),
&outputUAV_desc,
outputUAV.GetAddressOf());
// BUFFER FOR COPY-----------------------------------------------------------
D3D11_BUFFER_DESC rb_desc;
rb_desc.ByteWidth = sizeof(float) * dataSize;
rb_desc.StructureByteStride = sizeof(float);
rb_desc.Usage = D3D11_USAGE_STAGING;
rb_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
rb_desc.BindFlags = 0;
rb_desc.MiscFlags = 0;
device->CreateBuffer(
&rb_desc,
nullptr,
readResource.GetAddressOf());
// DISPATCH and COPY and GET DATA
dContext->CSSetShaderResources(0, 1, inputSRV1.GetAddressOf());
dContext->CSSetShaderResources(1, 1, inputSRV2.GetAddressOf());
dContext->CSSetUnorderedAccessViews(0, 1, outputUAV.GetAddressOf(), nullptr);
dContext->CSSetShader(cs.Get(), nullptr, 0);
dContext->Dispatch(1, 1, 1);
dContext->CopyResource(readResource.Get(), outputResource.Get());
D3D11_MAPPED_SUBRESOURCE mappedResource2;
ZeroMemory(&mappedResource2, sizeof(D3D11_MAPPED_SUBRESOURCE));
R_CHECK(dContext->Map(readResource.Get(), 0, D3D11_MAP_READ, 0, &mappedResource2));
float* data = static_cast<float*>(mappedResource2.pData);
for (int i = 0; i < 5; ++i)
{
int a = data[i];
}
and this is compute shader code
StructuredBuffer<float> inputA : register(t0);
StructuredBuffer<float> inputB : register(t1);
RWStructuredBuffer<float> output : register(u0);
[numthreads(5, 1, 1)]
void main(int3 id : SV_DispatchThreadID)
{
output[id.x] = inputA[id.x] + inputB[id.x];
}
in CS, it's adding two input buffer data and store into output buffer.
so expected answer would be {3,3,3,3,3}.
but the result is {3,0,0,0,0} only first idx has proper answer.
any advice would be amazing.
dContext->CopyResource(readResource.Get(), outputResource.Get());
D3D11_MAPPED_SUBRESOURCE mappedResource2;
ZeroMemory(&mappedResource2, sizeof(D3D11_MAPPED_SUBRESOURCE));
R_CHECK(dContext->Map(readResource.Get(), 0, D3D11_MAP_READ, 0, &mappedResource2));
float* data = static_cast<float*>(mappedResource2.pData);
for (int i = 0; i < 5; ++i)
{
int a = data[i];
}
this code should be like this.
CopyResource();
Map();
Declare and allocate 'data'
zeromemory(data);
memcopy(data, resource's pointer);
unMap();
for some reason, i have to use the memcopy instead of just reading resource directly with the pointer that i get from mapping.

DirectX 11 Render to Texture Issue

In my code, i am making 2 rectangles,
Rectangle1: Rendering On a Texture.
Rectangle2: Rendering On Back Buffer.
I am trying to do programmable blending,so need to access the destination pixel in pixel shader.
In my code,
I am creating a texture like below:
d3d11Device->CreateTexture2D(&textureDesc, NULL, &renderTargetTextureMap);
After this i am creating Render Target view of the texture.
d3d11Device->CreateRenderTargetView(renderTargetTextureMap, &renderTargetViewDesc, &renderTargetViewMap);
After this i am declaring vertex and pixel shader.
Then in my draw call,
i am performing following thing:
float bgColor[4] = {0.0f, 0.0f,0.0f, 1.0f };
d3d11DevCon->ClearRenderTargetView(renderTargetViewMap, bgColor);
float bgColor2[4] = { 0.0f, 1.0f, 0.0f, 1.0f };
////////////////////////////////////////////////Buffer 1///////////////////////////////////////////////////////////////
//Set the vertex buffer
UINT stride = sizeof(Vertex);
UINT offset = 0;
///////////////////////////////////////////////////////////Buffer 2//////////////////////////////////////////////////////////////////
d3d11DevCon->IASetIndexBuffer(d2dIndexBuffer, DXGI_FORMAT_R32_UINT, 0);
d3d11DevCon->IASetVertexBuffers(0, 1, &triangleVertBuffer, &stride, &offset);
////Draw the triangle
d3d11DevCon->DrawIndexed(6, 0, 0);
I assume that since i have set my render target view as renderTargetViewMap, so my draw call will render to texture only.
Now i am rendering to my backbuffer:
////////////////////////////////////////////
d3d11DevCon->OMSetRenderTargets(1, &renderTargetView, NULL);
d3d11DevCon->PSSetShaderResources(0, 1, &shaderResourceViewMap);
//d3d11DevCon->ClearRenderTargetView(renderTargetView, bgColor2);
d3d11DevCon->IASetIndexBuffer(d2dIndexBuffer2, DXGI_FORMAT_R32_UINT, 0);
d3d11DevCon->IASetVertexBuffers(0, 1, &triangleVertBuffer2, &stride, &offset);
////Draw the triangle
d3d11DevCon->DrawIndexed(6, 0, 0);
//Present the backbuffer to the screen
SwapChain->Present(0, 0);
So, in this way my rendering is happening.
Issue Face:
In my pixel shader,
VS_OUTPUT VS(float4 inPos : POSITION, float4 inColor : COLOR)
{
VS_OUTPUT output;
output.Pos = inPos;
output.Color = inColor;
return output;
}
float4 PS(VS_OUTPUT input) : SV_TARGET
{
float2 temp;
temp = input.Pos;
float4 diffuse = ObjTexture.Sample(ObjSamplerState,0.5+0.5*temp);
return input.Color + diffuse;
}
Here the diffuse is comming out to be equal to my bgcolor which i have set when rendering to texture
float bgColor[4] = {0.0f, 0.0f,0.0f, 1.0f };
d3d11DevCon->ClearRenderTargetView(renderTargetViewMap, bgColor);
I have also drawn a rectangle on it, but those pixels i am not able to access.
How can i access the pixel of rectangle that i have drawn on rendering to texture.
This is Issue Image
Desired Result
Shader File: Effect.fx
struct VS_OUTPUT
{
float4 Pos : SV_POSITION;
float4 Color : COLOR;
};
Texture2D ObjTexture;
SamplerState ObjSamplerState;
VS_OUTPUT VS(float4 inPos : POSITION, float4 inColor : COLOR)
{
VS_OUTPUT output;
output.Pos = inPos;
output.Color = inColor;
return output;
}
float4 PS(VS_OUTPUT input) : SV_TARGET
{
float2 temp;
temp = input.Pos;
float4 diffuse = ObjTexture.Sample(ObjSamplerState,0.5+0.5*temp);
return input.Color + diffuse;
}
Edit - 1:
With my latest change in code, i am able to blend my rectangle 2 with rectangle 1, but one issue i am facing is that when i blend then major part of my rectangle 2 is changes to yellow (red + green) on edges only i am able to see the actual green color.
Modified code:
main.cpp
//Include and link appropriate libraries and headers//
#pragma comment(lib, "d3d11.lib")
#pragma comment(lib, "d3dx11.lib")
#pragma comment(lib, "d3dx10.lib")
#include <windows.h>
#include <d3d11.h>
#include <d3dx11.h>
#include <d3dx10.h>
#include <xnamath.h>
//Global Declarations - Interfaces//
IDXGISwapChain* SwapChain;
ID3D11Device* d3d11Device;
ID3D11DeviceContext* d3d11DevCon;
ID3D11RenderTargetView* renderTargetView;
ID3D11Buffer* triangleVertBuffer;
ID3D11Buffer* triangleVertBuffer2;
ID3D11VertexShader* VS;
ID3D11PixelShader* PS;
ID3D10Blob* VS_Buffer;
ID3D10Blob* PS_Buffer;
ID3D11InputLayout* vertLayout;
XMMATRIX mapView;
XMMATRIX mapProjection;
XMVECTOR DefaultForward = XMVectorSet(0.0f, 0.0f, 1.0f, 0.0f);
//Global Declarations - Others//
LPCTSTR WndClassName = L"firstwindow";
HWND hwnd = NULL;
HRESULT hr;
const int Width = 800;
const int Height = 600;
bool InitializeDirect3d11App(HINSTANCE hInstance)
{
//Describe our Buffer
DXGI_MODE_DESC bufferDesc;
ZeroMemory(&bufferDesc, sizeof(DXGI_MODE_DESC));
bufferDesc.Width = Width;
bufferDesc.Height = Height;
bufferDesc.RefreshRate.Numerator = 60;
bufferDesc.RefreshRate.Denominator = 1;
bufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
bufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED;
bufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
//Describe our SwapChain
DXGI_SWAP_CHAIN_DESC swapChainDesc;
ZeroMemory(&swapChainDesc, sizeof(DXGI_SWAP_CHAIN_DESC));
swapChainDesc.BufferDesc = bufferDesc;
swapChainDesc.SampleDesc.Count = 1;
swapChainDesc.SampleDesc.Quality = 0;
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swapChainDesc.BufferCount = 1;
swapChainDesc.OutputWindow = hwnd;
swapChainDesc.Windowed = TRUE;
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
//Create our SwapChain
hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, NULL, NULL, NULL,
D3D11_SDK_VERSION, &swapChainDesc, &SwapChain, &d3d11Device, NULL, &d3d11DevCon);
//Create our BackBuffer
ID3D11Texture2D* BackBuffer;
hr = SwapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&BackBuffer);
//Create our Render Target
hr = d3d11Device->CreateRenderTargetView(BackBuffer, NULL, &renderTargetView);
BackBuffer->Release();
////////////////////////////////////////////////////////////////////////EXPERIMENT AREA//////////////////////////////////////////////////////////////////////////////////////
ZeroMemory(&textureDesc, sizeof(textureDesc));
// Setup the texture description.
// We will have our map be a square
// We will need to have this texture bound as a render target AND a shader resource
textureDesc.Width = Width/ 3.9729999999999999999999999999999;
textureDesc.Height = Height/3.9729999999999999999999999999999;
textureDesc.MipLevels = 1;
textureDesc.ArraySize = 1;
textureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
textureDesc.SampleDesc.Count = 1;
textureDesc.SampleDesc.Quality = 0;
textureDesc.Usage = D3D11_USAGE_DEFAULT;
textureDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
textureDesc.CPUAccessFlags = 0;
textureDesc.MiscFlags = 0;
d3d11Device->CreateTexture2D(&textureDesc, NULL, &renderTargetTextureMap);
// Setup the description of the render target view.
renderTargetViewDesc.Format = textureDesc.Format;
renderTargetViewDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
renderTargetViewDesc.Texture2D.MipSlice = 0;
// Create the render target view.
d3d11Device->CreateRenderTargetView(renderTargetTextureMap, &renderTargetViewDesc, &renderTargetViewMap);
/////////////////////// Map's Shader Resource View
// Setup the description of the shader resource view.
shaderResourceViewDesc.Format = textureDesc.Format;
shaderResourceViewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
shaderResourceViewDesc.Texture2D.MostDetailedMip = 0;
shaderResourceViewDesc.Texture2D.MipLevels = 1;
// Create the shader resource view.
d3d11Device->CreateShaderResourceView(renderTargetTextureMap, &shaderResourceViewDesc, &shaderResourceViewMap);
d3d11DevCon->OMSetRenderTargets(1, &renderTargetViewMap, NULL);
d3d11DevCon->PSSetShaderResources(0, 1, &shaderResourceViewMap);
return true;
}
void CleanUp()
{
//Release the COM Objects we created
SwapChain->Release();
d3d11Device->Release();
d3d11DevCon->Release();
renderTargetView->Release();
triangleVertBuffer->Release();
VS->Release();
PS->Release();
VS_Buffer->Release();
PS_Buffer->Release();
vertLayout->Release();
}
bool InitScene()
{
//Compile Shaders from shader file
hr = D3DX11CompileFromFile(L"Effect.fx", 0, 0, "VS", "vs_5_0", 0, 0, 0, &VS_Buffer, 0, 0);
hr = D3DX11CompileFromFile(L"Effect.fx", 0, 0, "PS", "ps_5_0", 0, 0, 0, &PS_Buffer, 0, 0);
//Create the Shader Objects
hr = d3d11Device->CreateVertexShader(VS_Buffer->GetBufferPointer(), VS_Buffer->GetBufferSize(), NULL, &VS);
hr = d3d11Device->CreatePixelShader(PS_Buffer->GetBufferPointer(), PS_Buffer->GetBufferSize(), NULL, &PS);
//Set Vertex and Pixel Shaders
d3d11DevCon->VSSetShader(VS, 0, 0);
d3d11DevCon->PSSetShader(PS, 0, 0);
//Create the Input Layout
hr = d3d11Device->CreateInputLayout(layout, numElements, VS_Buffer->GetBufferPointer(),
VS_Buffer->GetBufferSize(), &vertLayout);
//Set the Input Layout
d3d11DevCon->IASetInputLayout(vertLayout);
//Set Primitive Topology
d3d11DevCon->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
//Create the Viewport
D3D11_VIEWPORT viewport;
ZeroMemory(&viewport, sizeof(D3D11_VIEWPORT));
viewport.TopLeftX = 0;
viewport.TopLeftY = 0;
viewport.Width = 800;
viewport.Height = 600;
//Set the Viewport
d3d11DevCon->RSSetViewports(1, &viewport);
////////////////***********************First Texture Vertex Buffer *******************************/////////////////////////////
//Create the vertex buffer
Vertex v[] =
{
Vertex(-0.5f, -0.5f, 0.0f, 1.0f,0.0f,0.0f, 1.0f),
Vertex(-0.5f, 0.5f, 0.0f, 1.0f,0.0f,0.0f, 1.0f),
Vertex(0.5f, 0.5f, 0.0f, 1.0f, 0.0f,0.0f, 1.0f),
Vertex(0.5f, -0.5f, 0.0f, 1.0f,0.0f, 0.0f, 1.0f),
};
DWORD indices[] = {
// Front Face
0, 1, 3,
1, 2, 3,
};
D3D11_BUFFER_DESC indexBufferDesc;
ZeroMemory(&indexBufferDesc, sizeof(indexBufferDesc));
indexBufferDesc.Usage = D3D11_USAGE_DEFAULT;
indexBufferDesc.ByteWidth = sizeof(DWORD) * 2 * 3;
indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER;
indexBufferDesc.CPUAccessFlags = 0;
indexBufferDesc.MiscFlags = 0;
D3D11_SUBRESOURCE_DATA iinitData;
iinitData.pSysMem = indices;
d3d11Device->CreateBuffer(&indexBufferDesc, &iinitData, &d2dIndexBuffer);
D3D11_BUFFER_DESC vertexBufferDesc;
ZeroMemory(&vertexBufferDesc, sizeof(vertexBufferDesc));
vertexBufferDesc.Usage = D3D11_USAGE_DEFAULT;
vertexBufferDesc.ByteWidth = sizeof(Vertex) * 4;
vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vertexBufferDesc.CPUAccessFlags = 0;
vertexBufferDesc.MiscFlags = 0;
D3D11_SUBRESOURCE_DATA vertexBufferData;
ZeroMemory(&vertexBufferData, sizeof(vertexBufferData));
vertexBufferData.pSysMem = v;
hr = d3d11Device->CreateBuffer(&vertexBufferDesc, &vertexBufferData, &triangleVertBuffer);
////////////////////////////////////////////////////// Second Vertex.
Vertex v2[] = {
// positions // colors // texture coords
Vertex(1.0f, 1.0f, 0.0f,0.0f,1.0f,0.0f,1.0f), // top right
Vertex(1.0f,0.0f, 0.0f,0.0f,1.0f,0.0f,1.0f), // bottom right
Vertex(0.0f,0.0f, 0.0f,0.0f,1.0f,0.0f,1.0f), // bottom left
Vertex(0.0f, 1.0, 0.0f,0.0f, 1.0f,0.0f,1.0f) // top left
};
DWORD indices2[] = {
// Front Face
0, 1, 2,
0, 2, 3,
};
D3D11_BUFFER_DESC indexBufferDesc2;
ZeroMemory(&indexBufferDesc2, sizeof(indexBufferDesc2));
indexBufferDesc2.Usage = D3D11_USAGE_DEFAULT;
indexBufferDesc2.ByteWidth = sizeof(DWORD) * 2 * 3;
indexBufferDesc2.BindFlags = D3D11_BIND_INDEX_BUFFER;
indexBufferDesc2.CPUAccessFlags = 0;
indexBufferDesc2.MiscFlags = 0;
D3D11_SUBRESOURCE_DATA iinitData2;
iinitData2.pSysMem = indices2;
d3d11Device->CreateBuffer(&indexBufferDesc2, &iinitData2, &d2dIndexBuffer2);
D3D11_BUFFER_DESC vertexBufferDesc2;
ZeroMemory(&vertexBufferDesc2, sizeof(vertexBufferDesc2));
vertexBufferDesc2.Usage = D3D11_USAGE_DEFAULT;
vertexBufferDesc2.ByteWidth = sizeof(Vertex) * 4;
vertexBufferDesc2.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vertexBufferDesc2.CPUAccessFlags = 0;
vertexBufferDesc2.MiscFlags = 0;
D3D11_SUBRESOURCE_DATA vertexBufferData2;
ZeroMemory(&vertexBufferData2, sizeof(vertexBufferData2));
vertexBufferData2.pSysMem = v2;
hr = d3d11Device->CreateBuffer(&vertexBufferDesc2, &vertexBufferData2, &triangleVertBuffer2);
UINT stride = sizeof(Vertex);
UINT offset = 0;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
return true;
}
Shader File:
struct VS_OUTPUT
{
float4 Pos : SV_POSITION;
float4 Color : COLOR;
};
Texture2D ObjTexture;
SamplerState ObjSamplerState;
VS_OUTPUT VS(float4 inPos : POSITION, float4 inColor : COLOR)
{
VS_OUTPUT output;
output.Pos = inPos;
output.Color = inColor;
return output;
}
float4 PS(VS_OUTPUT input) : SV_TARGET
{
float2 temp;
temp = input.Pos;
float4 diffuse = ObjTexture.Sample(ObjSamplerState,0.5*temp);
return input.Color + diffuse ;
}
return DefWindowProc(hwnd,
msg,
wParam,
lParam);
}
Edit 3:
My vertex Structure:
struct Vertex //Overloaded Vertex Structure
{
Vertex() {}
Vertex(float x, float y, float z,
float cr, float cg, float cb, float ca)
: pos(x, y, z), color(cr, cg, cb, ca) {}
XMFLOAT3 pos;
XMFLOAT4 color;
};
Input description:
D3D11_INPUT_ELEMENT_DESC layout[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
I think so issue is when i pass my texture and read it in diffuse in pixel shader, then mapping of my texture on back buffer is not same and hence i am only finding red color from 2nd scan line and hence resultant zero is produced..?
Edit 3:
My initialization code:
struct Vertex //Overloaded Vertex Structure
{
Vertex() {}
Vertex(float x, float y, float z,
float cr, float cg, float cb, float ca)
: pos(x, y, z), color(cr, cg, cb, ca) {}
XMFLOAT3 pos;
XMFLOAT4 color;
};
ID3D11Texture2D* renderTargetTextureMap;
ID3D11RenderTargetView* renderTargetViewMap;
ID3D11ShaderResourceView* shaderResourceViewMap;
ID3D11SamplerState* CubesTexSamplerState;
ID3D11Buffer *d2dIndexBuffer;
ID3D11Buffer *d2dIndexBuffer2;
D3D11_TEXTURE2D_DESC textureDesc;
D3D11_RENDER_TARGET_VIEW_DESC renderTargetViewDesc;
D3D11_SHADER_RESOURCE_VIEW_DESC shaderResourceViewDesc;
D3D11_INPUT_ELEMENT_DESC layout[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
UINT numElements = ARRAYSIZE(layout);
bool InitializeDirect3d11App(HINSTANCE hInstance)
{
//Describe our Buffer
DXGI_MODE_DESC bufferDesc;
ZeroMemory(&bufferDesc, sizeof(DXGI_MODE_DESC));
bufferDesc.Width = Width;
bufferDesc.Height = Height;
bufferDesc.RefreshRate.Numerator = 60;
bufferDesc.RefreshRate.Denominator = 1;
bufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
bufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED;
bufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
//Describe our SwapChain
DXGI_SWAP_CHAIN_DESC swapChainDesc;
ZeroMemory(&swapChainDesc, sizeof(DXGI_SWAP_CHAIN_DESC));
swapChainDesc.BufferDesc = bufferDesc;
swapChainDesc.SampleDesc.Count = 1;
swapChainDesc.SampleDesc.Quality = 0;
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swapChainDesc.BufferCount = 1;
swapChainDesc.OutputWindow = hwnd;
swapChainDesc.Windowed = TRUE;
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
//Create our SwapChain
hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, NULL, NULL, NULL,
D3D11_SDK_VERSION, &swapChainDesc, &SwapChain, &d3d11Device, NULL, &d3d11DevCon);
//Create our BackBuffer
ID3D11Texture2D* BackBuffer;
hr = SwapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&BackBuffer);
//Create our Render Target
hr = d3d11Device->CreateRenderTargetView(BackBuffer, NULL, &renderTargetView);
BackBuffer->Release();
////////////////////////////////////////////////////////////////////////EXPERIMENT AREA//////////////////////////////////////////////////////////////////////////////////////
ZeroMemory(&textureDesc, sizeof(textureDesc));
// Setup the texture description.
// We will have our map be a square
// We will need to have this texture bound as a render target AND a shader resource
textureDesc.Width = Width/2;
textureDesc.Height = Height/2;
textureDesc.MipLevels = 1;
textureDesc.ArraySize = 1;
textureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
textureDesc.SampleDesc.Count = 1;
textureDesc.SampleDesc.Quality = 0;
textureDesc.Usage = D3D11_USAGE_DEFAULT;
textureDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
textureDesc.CPUAccessFlags = 0;
textureDesc.MiscFlags = 0;
d3d11Device->CreateTexture2D(&textureDesc, NULL, &renderTargetTextureMap);
// Setup the description of the render target view.
renderTargetViewDesc.Format = textureDesc.Format;
renderTargetViewDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
renderTargetViewDesc.Texture2D.MipSlice = 0;
// Create the render target view.
d3d11Device->CreateRenderTargetView(renderTargetTextureMap, &renderTargetViewDesc, &renderTargetView);
/////////////////////// Map's Shader Resource View
// Setup the description of the shader resource view.
shaderResourceViewDesc.Format = textureDesc.Format;
shaderResourceViewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
shaderResourceViewDesc.Texture2D.MostDetailedMip = 0;
shaderResourceViewDesc.Texture2D.MipLevels = 1;
// Create the shader resource view.
d3d11Device->CreateShaderResourceView(renderTargetTextureMap, &shaderResourceViewDesc, &shaderResourceViewMap);
//d3d11DevCon->OMSetRenderTargets(1, &renderTargetViewMap, NULL);
//d3d11DevCon->PSSetShaderResources(0, 1, &shaderResourceViewMap);
return true;
}
void CleanUp()
{
//Release the COM Objects we created
SwapChain->Release();
d3d11Device->Release();
d3d11DevCon->Release();
renderTargetView->Release();
triangleVertBuffer->Release();
VS->Release();
PS->Release();
VS_Buffer->Release();
PS_Buffer->Release();
vertLayout->Release();
}
bool InitScene()
{
//Compile Shaders from shader file
hr = D3DX11CompileFromFile(L"Effect.fx", 0, 0, "VS", "vs_5_0", 0, 0, 0, &VS_Buffer, 0, 0);
hr = D3DX11CompileFromFile(L"Effect.fx", 0, 0, "PS", "ps_5_0", 0, 0, 0, &PS_Buffer, 0, 0);
//Create the Shader Objects
hr = d3d11Device->CreateVertexShader(VS_Buffer->GetBufferPointer(), VS_Buffer->GetBufferSize(), NULL, &VS);
hr = d3d11Device->CreatePixelShader(PS_Buffer->GetBufferPointer(), PS_Buffer->GetBufferSize(), NULL, &PS);
//Set Vertex and Pixel Shaders
d3d11DevCon->VSSetShader(VS, 0, 0);
d3d11DevCon->PSSetShader(PS, 0, 0);
//Create the Input Layout
hr = d3d11Device->CreateInputLayout(layout, numElements, VS_Buffer->GetBufferPointer(),
VS_Buffer->GetBufferSize(), &vertLayout);
//Set the Input Layout
d3d11DevCon->IASetInputLayout(vertLayout);
//Set Primitive Topology
d3d11DevCon->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
//Create the Viewport
D3D11_VIEWPORT viewport;
ZeroMemory(&viewport, sizeof(D3D11_VIEWPORT));
viewport.TopLeftX = 0;
viewport.TopLeftY = 0;
viewport.Width = 800;
viewport.Height = 600;
//Set the Viewport
d3d11DevCon->RSSetViewports(1, &viewport);
////////////////***********************First Texture Vertex Buffer *******************************/////////////////////////////
//Create the vertex buffer
Vertex v[] =
{
Vertex(-0.35f, -0.35f, 0.0f, 1.0f,0.0f,0.0f, 1.0f),
Vertex(-0.35f, 0.35f, 0.0f, 1.0f,0.0f,0.0f, 1.0f),
Vertex(0.35f, 0.35f, 0.0f, 1.0f, 0.0f,0.0f, 1.0f),
Vertex(0.35f, -0.35f, 0.0f, 1.0f,0.0f, 0.0f, 1.0f),
};
DWORD indices[] = {
// Front Face
0, 1, 3,
1, 2, 3,
};
D3D11_BUFFER_DESC indexBufferDesc;
ZeroMemory(&indexBufferDesc, sizeof(indexBufferDesc));
indexBufferDesc.Usage = D3D11_USAGE_DEFAULT;
indexBufferDesc.ByteWidth = sizeof(DWORD) * 2 * 3;
indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER;
indexBufferDesc.CPUAccessFlags = 0;
indexBufferDesc.MiscFlags = 0;
D3D11_SUBRESOURCE_DATA iinitData;
iinitData.pSysMem = indices;
d3d11Device->CreateBuffer(&indexBufferDesc, &iinitData, &d2dIndexBuffer);
D3D11_BUFFER_DESC vertexBufferDesc;
ZeroMemory(&vertexBufferDesc, sizeof(vertexBufferDesc));
vertexBufferDesc.Usage = D3D11_USAGE_DEFAULT;
vertexBufferDesc.ByteWidth = sizeof(Vertex) * 4;
vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vertexBufferDesc.CPUAccessFlags = 0;
vertexBufferDesc.MiscFlags = 0;
D3D11_SUBRESOURCE_DATA vertexBufferData;
ZeroMemory(&vertexBufferData, sizeof(vertexBufferData));
vertexBufferData.pSysMem = v;
hr = d3d11Device->CreateBuffer(&vertexBufferDesc, &vertexBufferData, &triangleVertBuffer);
////////////////////////////////////////////////////// Second Vertex.
Vertex v2[] = {
// positions // colors // texture coords
Vertex(1.0f, 1.0f, 0.0f,0.0f,1.0f,0.0f,1.0f), // top right
Vertex(1.0f,0.0f, 0.0f,0.0f,1.0f,0.0f,1.0f), // bottom right
Vertex(0.0f,0.0f, 0.0f,0.0f,1.0f,0.0f,1.0f), // bottom left
Vertex(0.0f, 1.0, 0.0f,0.0f, 1.0f,0.0f,1.0f) // top left
};
DWORD indices2[] = {
// Front Face
0, 1, 2,
0, 2, 3,
};
D3D11_BUFFER_DESC indexBufferDesc2;
ZeroMemory(&indexBufferDesc2, sizeof(indexBufferDesc2));
indexBufferDesc2.Usage = D3D11_USAGE_DEFAULT;
indexBufferDesc2.ByteWidth = sizeof(DWORD) * 2 * 3;
indexBufferDesc2.BindFlags = D3D11_BIND_INDEX_BUFFER;
indexBufferDesc2.CPUAccessFlags = 0;
indexBufferDesc2.MiscFlags = 0;
D3D11_SUBRESOURCE_DATA iinitData2;
iinitData2.pSysMem = indices2;
d3d11Device->CreateBuffer(&indexBufferDesc2, &iinitData2, &d2dIndexBuffer2);
D3D11_BUFFER_DESC vertexBufferDesc2;
ZeroMemory(&vertexBufferDesc2, sizeof(vertexBufferDesc2));
vertexBufferDesc2.Usage = D3D11_USAGE_DEFAULT;
vertexBufferDesc2.ByteWidth = sizeof(Vertex) * 4;
vertexBufferDesc2.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vertexBufferDesc2.CPUAccessFlags = 0;
vertexBufferDesc2.MiscFlags = 0;
D3D11_SUBRESOURCE_DATA vertexBufferData2;
ZeroMemory(&vertexBufferData2, sizeof(vertexBufferData2));
vertexBufferData2.pSysMem = v2;
hr = d3d11Device->CreateBuffer(&vertexBufferDesc2, &vertexBufferData2, &triangleVertBuffer2);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
return true;
}
Edit 4:
Added draw scene code:
void DrawScene()
{
//Set the vertex buffer
UINT stride = sizeof(Vertex);
UINT offset = 0;
///////////////////////////////////////////////////////////Buffer 2//////////////////////////////////////////////////////////////////
d3d11DevCon->IASetIndexBuffer(d2dIndexBuffer, DXGI_FORMAT_R32_UINT, 0);
d3d11DevCon->IASetVertexBuffers(0, 1, &triangleVertBuffer, &stride, &offset);
////Draw the triangle
d3d11DevCon->DrawIndexed(6, 0, 0);
d3d11DevCon->OMSetRenderTargets(1, &renderTargetView, NULL);
////////////////////////////////////////////
d3d11DevCon->PSSetShaderResources(0, 1, &shaderResourceViewMap);
d3d11DevCon->IASetIndexBuffer(d2dIndexBuffer2, DXGI_FORMAT_R32_UINT, 0);
d3d11DevCon->IASetVertexBuffers(0, 1, &triangleVertBuffer2, &stride, &offset);
////Draw the triangle
d3d11DevCon->DrawIndexed(6, 0, 0);
//Present the backbuffer to the screen
SwapChain->Present(0, 0);
}
In order to calculate correct texture coordinates from the vertex Position, you need to:
1.Create the render target texture with Width and Height (not the halves):
textureDesc.Width = Width; // /2; // Do not use half width
textureDesc.Height = Height; // /2; // Do not use half height
2.Divide the position to a float2(1 / Width, 1 / Height) in your pixel shader, like this:
float2 tex = input.Pos * float2(1.0f / 800.0f, 1.0f / 600.0f);
float4 diffuse = ObjTexture.Sample(ObjSamplerState, tex);
return input.Color + diffuse;

Converting cv::Mat to MTLTexture

An intermediate step of my current project requires conversion of opencv's cv::Mat to MTLTexture, the texture container of Metal. I need to store the Floats in the Mat as Floats in the texture; my project cannot quite afford the loss of precision.
This is my attempt at such a conversion.
- (id<MTLTexture>)texForMat:(cv::Mat)image context:(MBEContext *)context
{
id<MTLTexture> texture;
int width = image.cols;
int height = image.rows;
Float32 *rawData = (Float32 *)calloc(height * width * 4,sizeof(float));
int bytesPerPixel = 4;
int bytesPerRow = bytesPerPixel * width;
float r, g, b,a;
for(int i = 0; i < height; i++)
{
Float32* imageData = (Float32*)(image.data + image.step * i);
for(int j = 0; j < width; j++)
{
r = (Float32)(imageData[4 * j]);
g = (Float32)(imageData[4 * j + 1]);
b = (Float32)(imageData[4 * j + 2]);
a = (Float32)(imageData[4 * j + 3]);
rawData[image.step * (i) + (4 * j)] = r;
rawData[image.step * (i) + (4 * j + 1)] = g;
rawData[image.step * (i) + (4 * j + 2)] = b;
rawData[image.step * (i) + (4 * j + 3)] = a;
}
}
MTLTextureDescriptor *textureDescriptor = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float
width:width
height:height
mipmapped:NO];
texture = [context.device newTextureWithDescriptor:textureDescriptor];
MTLRegion region = MTLRegionMake2D(0, 0, width, height);
[texture replaceRegion:region mipmapLevel:0 withBytes:rawData bytesPerRow:bytesPerRow];
free(rawData);
return texture;
}
But it doesn't seem to be working. It reads zeroes every time from the Mat, and throws up EXC_BAD_ACCESS. I need the MTLTexture in MTLPixelFormatRGBA16Float to keep the precision.
Thanks for considering this issue.
One problem here is you’re loading up rawData with Float32s but your texture is RGBA16Float, so the data will be corrupted (16Float is half the size of Float32). This shouldn’t cause your crash, but it’s an issue you’ll have to deal with.
Also as “chappjc” noted you’re using ‘image.step’ when writing your data out, but that buffer should be contiguous and not ever have a step that’s not just (width * bytesPerPixel).

JavaCV creating and drawing grayscale one channel histogram

i am new to this website, please let me know if i have made any mistake on my post.
I have some questions regarding calculating and drawing histogram in javacv. Below are the codes that i have written based on some information that i have searched:
There is this error that i get: OpenCV Error: One of arguments' values is out of range (index is out of range) in unknown function, file ......\src\opencv\modules\core\src\array.cpp, line 1691
private CvHistogram getHistogram(IplImage image) {//get histogram data, input has been converted to grayscale beforehand
IplImage[] hsvImage1 = {image};
//bins and value-range
int numberOfBins = 256;
float minRange = 0.0f;
float maxRange = 255.0f;
// Allocate histogram object
int dims = 1;
int[] sizes = new int[]{numberOfBins};
int histType = CV_HIST_ARRAY;
float[] minMax = new float[]{minRange, maxRange};
float[][] ranges = new float[][]{minMax};
CvHistogram hist = cvCreateHist(dims, sizes, histType, ranges, 1);
cvCalcHist(hsvImage1, hist, 0, null);
return hist;
}
private IplImage DrawHistogram(CvHistogram hist, IplImage image) {//draw histogram
int scaleX = 1;
int scaleY = 1;
int i;
float[] max_value = {0};
int[] int_value = {0};
cvGetMinMaxHistValue(hist, max_value, max_value, int_value, int_value);//get min and max value for histogram
IplImage imgHist = cvCreateImage(cvSize(256, image.height() ),IPL_DEPTH_8U,1);//create image to store histogram
cvZero(imgHist);
CvPoint pts = new CvPoint(5);
for (i = 0; i < 256; i++) {//draw the histogram
float value = opencv_legacy.cvQueryHistValue_1D(hist, i);
float nextValue = opencv_legacy.cvQueryHistValue_1D(hist, i + 1);
pts.position(0).x(i * scaleX).y(image.height() * scaleY);
pts.position(1).x(i * scaleX + scaleX).y(image.height() * scaleY);
pts.position(2).x(i * scaleX + scaleX).y((int)((image.height() - nextValue * image.height() /max_value[0]) * scaleY));
pts.position(3).x(i * scaleX).y((int)((image.height() - value * image.height() / max_value[0]) * scaleY));
pts.position(4).x(i * scaleX).y(image.height() * scaleY);
cvFillConvexPoly(imgHist, pts.position(0), 5, CvScalar.RED, CV_AA, 0);
}
return imgHist;
}
I have tried searching few links that i provided at the bottom, however, each of them are in different language, therefore i am not sure i have converted them to java correctly. To be honest there are few things i doubt, will be glad if any advice can be provided, such as:
float[] max_value = {0}; // i referred to the internet and it helps me to getby syntax error in cvGetMinMaxHistValue() , not sure if it will cause logic error
pts.position(3).x(i * scaleX).y((int)((image.height() - value * image.height() / max_value[0]) * scaleY)); // i put int to downcast it to the type the pts will recognise, and one more thing is max_value[0] is 0, wondering if it will cause logical error due to division
Links used:
//use this
public CvHistogram getHistogram(IplImage image) {//get histogram data, input has been converted to grayscale beforehand
IplImageArray hsvImage1 = splitChannels(image);
//bins and value-range
int numberOfBins = 256;
float minRange = 0.0f;
float maxRange = 255.0f;
// Allocate histogram object
int dims = 1;
int[] sizes = new int[]{numberOfBins};
int histType = CV_HIST_ARRAY;
float[] minMax = new float[]{minRange, maxRange};
float[][] ranges = new float[][]{minMax};
CvHistogram hist = cvCreateHist(dims, sizes, histType, ranges, 1);
cvCalcHist(hsvImage1, hist, 0, null);
return hist;
}
private IplImageArray splitChannels(IplImage hsvImage) {
CvSize size = hsvImage.cvSize();
int depth = hsvImage.depth();
IplImage channel0 = cvCreateImage(size, depth, 1);
IplImage channel1 = cvCreateImage(size, depth, 1);
IplImage channel2 = cvCreateImage(size, depth, 1);
cvSplit(hsvImage, channel0, channel1, channel2, null);
return new IplImageArray(channel0, channel1, channel2);
}
Your error is in this part:
for (i = 0; i < 256; i++) {//draw the histogram
float value = opencv_legacy.cvQueryHistValue_1D(hist, i);
float nextValue = opencv_legacy.cvQueryHistValue_1D(hist, i + 1);
You use i+1 and it causes the error out of range, you can use your for until 255 to correct it.
I hope I helped you. GL

Why do operations with an array corrupt the values?

I'm trying to implement the Particle Swarm Optimization on CUDA. I'm partially initializing data arrays on host, then I allocate memory on CUDA and copy it there, and then try to proceed with the initialization.
The problem is, when I'm trying to modify array element like so
__global__ void kernelInit(
float* X,
size_t pitch,
int width,
float X_high,
float X_low
) {
// Silly, but pretty reliable way to address array elements
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
int r = tid / width;
int c = tid % width;
float* pElement = (float*)((char*)X + r * pitch) + c;
*pElement = *pElement * (X_high - X_low) - X_low;
//*pElement = (X_high - X_low) - X_low;
}
It corrupts the values and gives me 1.#INF00 as array element. When I uncomment the last line *pElement = (X_high - X_low) - X_low; and comment the previous, it works as expected: I get values like 15.36 and so on.
I believe the problem is either with my memory allocation and copying, and/or with adressing the specific array element. I read the CUDA manual about these both topics, but I can't spot the error: I still get corrupt array if I do anything with the element of the array. For example, *pElement = *pElement * 2 gives unreasonable big results like 779616...00000000.00000 when the initial pElement is expected to be just a float in [0;1].
Here is the full source. Initialization of arrays begins in main (bottom of the source), then f1 function does the work for CUDA and launches the initialization kernel kernelInit:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <cuda.h>
#include <cuda_runtime.h>
const unsigned f_n = 3;
const unsigned n = 2;
const unsigned p = 64;
typedef struct {
unsigned k_max;
float c1;
float c2;
unsigned p;
float inertia_factor;
float Ef;
float X_low[f_n];
float X_high[f_n];
float X_min[n][f_n];
} params_t;
typedef void (*kernelWrapperType) (
float *X,
float *X_highVec,
float *V,
float *X_best,
float *Y,
float *Y_best,
float *X_swarmBest,
bool &termination,
const float &inertia,
const params_t *params,
const unsigned &f
);
typedef float (*twoArgsFuncType) (
float x1,
float x2
);
__global__ void kernelInit(
float* X,
size_t pitch,
int width,
float X_high,
float X_low
) {
// Silly, but pretty reliable way to address array elements
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
int r = tid / width;
int c = tid % width;
float* pElement = (float*)((char*)X + r * pitch) + c;
*pElement = *pElement * (X_high - X_low) - X_low;
//*pElement = (X_high - X_low) - X_low;
}
__device__ float kernelF1(
float x1,
float x2
) {
float y = pow(x1, 2.f) + pow(x2, 2.f);
return y;
}
void f1(
float *X,
float *X_highVec,
float *V,
float *X_best,
float *Y,
float *Y_best,
float *X_swarmBest,
bool &termination,
const float &inertia,
const params_t *params,
const unsigned &f
) {
float *X_d = NULL;
float *Y_d = NULL;
unsigned length = n * p;
const cudaChannelFormatDesc desc = cudaCreateChannelDesc<float4>();
size_t pitch;
size_t dpitch;
cudaError_t err;
unsigned width = n;
unsigned height = p;
err = cudaMallocPitch (&X_d, &dpitch, width * sizeof(float), height);
pitch = n * sizeof(float);
err = cudaMemcpy2D(X_d, dpitch, X, pitch, width * sizeof(float), height, cudaMemcpyHostToDevice);
err = cudaMalloc (&Y_d, sizeof(float) * p);
err = cudaMemcpy (Y_d, Y, sizeof(float) * p, cudaMemcpyHostToDevice);
dim3 threads; threads.x = 32;
dim3 blocks; blocks.x = (length/threads.x) + 1;
kernelInit<<<threads,blocks>>>(X_d, dpitch, width, params->X_high[f], params->X_low[f]);
err = cudaMemcpy2D(X, pitch, X_d, dpitch, n*sizeof(float), p, cudaMemcpyDeviceToHost);
err = cudaFree(X_d);
err = cudaMemcpy(Y, Y_d, sizeof(float) * p, cudaMemcpyDeviceToHost);
err = cudaFree(Y_d);
}
float F1(
float x1,
float x2
) {
float y = pow(x1, 2.f) + pow(x2, 2.f);
return y;
}
/*
* Generates random float in [0.0; 1.0]
*/
float frand(){
return (float)rand()/(float)RAND_MAX;
}
/*
* This is the main routine which declares and initializes the integer vector, moves it to the device, launches kernel
* brings the result vector back to host and dumps it on the console.
*/
int main() {
const params_t params = {
100,
0.5,
0.5,
p,
0.98,
0.01,
{-5.12, -2.048, -5.12},
{5.12, 2.048, 5.12},
{{0, 1, 0}, {0, 1, 0}}
};
float X[p][n];
float X_highVec[n];
float V[p][n];
float X_best[p][n];
float Y[p] = {0};
float Y_best[p] = {0};
float X_swarmBest[n];
kernelWrapperType F_wrapper[f_n] = {&f1, &f1, &f1};
twoArgsFuncType F[f_n] = {&F1, &F1, &F1};
for (unsigned f = 0; f < f_n; f++) {
printf("Optimizing function #%u\n", f);
srand ( time(NULL) );
for (unsigned i = 0; i < p; i++)
for (unsigned j = 0; j < n; j++)
X[i][j] = X_best[i][j] = frand();
for (int i = 0; i < n; i++)
X_highVec[i] = params.X_high[f];
for (unsigned i = 0; i < p; i++)
for (unsigned j = 0; j < n; j++)
V[i][j] = frand();
for (unsigned i = 0; i < p; i++)
Y_best[i] = F[f](X[i][0], X[i][1]);
for (unsigned i = 0; i < n; i++)
X_swarmBest[i] = params.X_high[f];
float y_swarmBest = F[f](X_highVec[0], X_highVec[1]);
bool termination = false;
float inertia = 1.;
for (unsigned k = 0; k < params.k_max; k++) {
F_wrapper[f]((float *)X, X_highVec, (float *)V, (float *)X_best, Y, Y_best, X_swarmBest, termination, inertia, &params, f);
}
for (unsigned i = 0; i < p; i++)
{
for (unsigned j = 0; j < n; j++)
{
printf("%f\t", X[i][j]);
}
printf("F = %f\n", Y[i]);
}
getchar();
}
}
Update: I tried adding error handling like so
err = cudaMallocPitch (&X_d, &dpitch, width * sizeof(float), height);
if (err != cudaSuccess) {
fprintf(stderr, cudaGetErrorString(err));
exit(1);
}
after each API call, but it gave me nothing and didn't return (I still get all the results and program works to the end).
This is an unnecessarily complex piece of code for what should be a simple repro case, but this immediately jumps out:
const unsigned n = 2;
const unsigned p = 64;
unsigned length = n * p
dim3 threads; threads.x = 32;
dim3 blocks; blocks.x = (length/threads.x) + 1;
kernelInit<<<threads,blocks>>>(X_d, dpitch, width, params->X_high[f], params->X_low[f]);
So you are firstly computing the incorrect number of blocks, and then reversing the order of the blocks per grid and threads per block arguments in the kernel launch. That may well lead to out of bounds memory access, either hosing something in GPU memory or causing an unspecified launch failure, which your lack of error handling might not be catching. There is a tool called cuda-memcheck which has been shipped with the toolkit since about CUDA 3.0. If you run it, it will give you valgrind style memory access violation reports. You should get into the habit of using it, if you are not already doing so.
As for infinite values, that is to be expected isn't it? Your code starts with values in (0,1), and then does
X[i] = X[i] * (5.12--5.12) - -5.12
100 times, which is the rough equivalent of multiplying by 10^100, which is then followed by
X[i] = X[i] * (2.048--2.048) - -2.048
100 times, which is the rough equivalent of multiplying by 4^100, finally followed by
X[i] = X[i] * (5.12--5.12) - -5.12
again. So your results should be of the order of 1E250, which is much larger than the maximum 3.4E38 which is the rough upper limit of representable numbers in IEEE 754 single precision.

Resources